direct-io.hg

view tools/libxc/xc_hvm_build.c @ 12765:2dd4569e0640

[LIBXC] Add an error reporting API to the libxc library.

- An 'xc_error' struct is used to pass around error
details. Currently contains two members 'code' an enumeration of
error types, and 'message' a free text description of the specific
problem.

- The xc_get_last_error() method returns a const pointer to the
internal instance of this struct manged by libxc. By returning a
const pointer we can add extra members to the end of the struct at
any time without worrying about ABI of callers. This will let us
provide more fine-grained info if needed in the future.

- The xc_error instance is statically defined inside libxc and marked
__thread. This ensures that errors are recorded per-thread, and
that when dealing with errors we never need to call malloc - all
storage needed is statically allocated.

- The xc_clear_last_error() method resets any currently recorded
error details

- The xc_error_code_to_desc() method converts the integer error code
into a generic user facing messsage. eg "Invalid kernel". Together
with the 'message' field from xc_error, this provides the user
visible feedback. eg "Invalid kernel: Non PAE-kernel on PAE host."

- A callback can be registered with xc_set_error_handler to receive
notification whenever an error is recorded, rather than querying
for error details after the fact with xc_get_last_error

- If built with -DDEBUG set, a default error handler will be
registered which calls fprintf(stderr), thus maintaining current
behaviour of logging errors to stderr during developer builds.

- The python binding for libxc is updated to use xc_get_last_error
to pull out error details whenever appropriate, instead of
returning info based on 'errno'

- The xc_set_error method is private to libxc internals, and is used
for setting error details

- The ERROR and PERROR macros have been updated to call xc_set_error
automatically specifying XC_INTERNAL_ERROR as the error code. This
gives a generic error report for all current failure points

- Some uses of the ERROR macro have been replaced with explicit
calls to xc_set_error to enable finer grained error reporting. In
particular the code dealing with invalid kernel types uses this
to report about PAE/architecture/wordsize mismatches

The patch has been tested by calling xm create against a varietry of
config files defining invalid kernels of various kinds. It has also
been tested with libvirt talking to xend. In both cases the error
messages were propagated all the way back up the stack.

There is only one place where I need to do further work. The suspend
& restore APIs in Xend invoke external helper programs rather than
calling libxc directly. This means that error details are essentially
lost. Since there is already code in XenD which scans STDERR from
these programs I will investigate adapting this to extract actual
error messages from these helpers.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
author kfraser@localhost.localdomain
date Thu Dec 07 11:36:26 2006 +0000 (2006-12-07)
parents 9d6bc06919e0
children 51fd5bdc2744
line source
1 /******************************************************************************
2 * xc_hvm_build.c
3 */
5 #define ELFSIZE 32
6 #include <stddef.h>
7 #include <inttypes.h>
8 #include "xg_private.h"
9 #include "xc_private.h"
10 #include "xc_elf.h"
11 #include <stdlib.h>
12 #include <unistd.h>
13 #include <zlib.h>
14 #include <xen/hvm/hvm_info_table.h>
15 #include <xen/hvm/params.h>
16 #include <xen/hvm/e820.h>
18 #define SCRATCH_PFN 0xFFFFF
20 #define HVM_LOADER_ENTR_ADDR 0x00100000
21 static int
22 parseelfimage(
23 char *elfbase, unsigned long elfsize, struct domain_setup_info *dsi);
24 static int
25 loadelfimage(
26 char *elfbase, int xch, uint32_t dom, unsigned long *parray,
27 struct domain_setup_info *dsi);
29 int xc_set_hvm_param(
30 int handle, domid_t dom, int param, unsigned long value)
31 {
32 DECLARE_HYPERCALL;
33 xen_hvm_param_t arg;
34 int rc;
36 hypercall.op = __HYPERVISOR_hvm_op;
37 hypercall.arg[0] = HVMOP_set_param;
38 hypercall.arg[1] = (unsigned long)&arg;
39 arg.domid = dom;
40 arg.index = param;
41 arg.value = value;
42 if ( lock_pages(&arg, sizeof(arg)) != 0 )
43 return -1;
44 rc = do_xen_hypercall(handle, &hypercall);
45 unlock_pages(&arg, sizeof(arg));
46 return rc;
47 }
49 int xc_get_hvm_param(
50 int handle, domid_t dom, int param, unsigned long *value)
51 {
52 DECLARE_HYPERCALL;
53 xen_hvm_param_t arg;
54 int rc;
56 hypercall.op = __HYPERVISOR_hvm_op;
57 hypercall.arg[0] = HVMOP_get_param;
58 hypercall.arg[1] = (unsigned long)&arg;
59 arg.domid = dom;
60 arg.index = param;
61 if ( lock_pages(&arg, sizeof(arg)) != 0 )
62 return -1;
63 rc = do_xen_hypercall(handle, &hypercall);
64 unlock_pages(&arg, sizeof(arg));
65 *value = arg.value;
66 return rc;
67 }
69 static void build_e820map(void *e820_page, unsigned long long mem_size)
70 {
71 struct e820entry *e820entry =
72 (struct e820entry *)(((unsigned char *)e820_page) + E820_MAP_OFFSET);
73 unsigned long long extra_mem_size = 0;
74 unsigned char nr_map = 0;
76 /*
77 * Physical address space from HVM_BELOW_4G_RAM_END to 4G is reserved
78 * for PCI devices MMIO. So if HVM has more than HVM_BELOW_4G_RAM_END
79 * RAM, memory beyond HVM_BELOW_4G_RAM_END will go to 4G above.
80 */
81 if ( mem_size > HVM_BELOW_4G_RAM_END )
82 {
83 extra_mem_size = mem_size - HVM_BELOW_4G_RAM_END;
84 mem_size = HVM_BELOW_4G_RAM_END;
85 }
87 /* 0x0-0x9F000: Ordinary RAM. */
88 e820entry[nr_map].addr = 0x0;
89 e820entry[nr_map].size = 0x9F000;
90 e820entry[nr_map].type = E820_RAM;
91 nr_map++;
93 /*
94 * 0x9F000-0x9F800: SMBIOS tables.
95 * 0x9FC00-0xA0000: Extended BIOS Data Area (EBDA).
96 * TODO: SMBIOS tables should be moved higher (>=0xE0000).
97 * They are unusually low in our memory map: could cause problems?
98 */
99 e820entry[nr_map].addr = 0x9F000;
100 e820entry[nr_map].size = 0x1000;
101 e820entry[nr_map].type = E820_RESERVED;
102 nr_map++;
104 /*
105 * Following regions are standard regions of the PC memory map.
106 * They are not covered by e820 regions. OSes will not use as RAM.
107 * 0xA0000-0xC0000: VGA memory-mapped I/O. Not covered by E820.
108 * 0xC0000-0xE0000: 16-bit devices, expansion ROMs (inc. vgabios).
109 * TODO: hvmloader should free pages which turn out to be unused.
110 */
112 /*
113 * 0xE0000-0x0F0000: PC-specific area. We place ACPI tables here.
114 * We *cannot* mark as E820_ACPI, for two reasons:
115 * 1. ACPI spec. says that E820_ACPI regions below
116 * 16MB must clip INT15h 0x88 and 0xe801 queries.
117 * Our rombios doesn't do this.
118 * 2. The OS is allowed to reclaim ACPI memory after
119 * parsing the tables. But our FACS is in this
120 * region and it must not be reclaimed (it contains
121 * the ACPI global lock!).
122 * 0xF0000-0x100000: System BIOS.
123 * TODO: hvmloader should free pages which turn out to be unused.
124 */
125 e820entry[nr_map].addr = 0xE0000;
126 e820entry[nr_map].size = 0x20000;
127 e820entry[nr_map].type = E820_RESERVED;
128 nr_map++;
130 /* Low RAM goes here. Remove 3 pages for ioreq, bufioreq, and xenstore. */
131 e820entry[nr_map].addr = 0x100000;
132 e820entry[nr_map].size = mem_size - 0x100000 - PAGE_SIZE * 3;
133 e820entry[nr_map].type = E820_RAM;
134 nr_map++;
136 if ( extra_mem_size )
137 {
138 e820entry[nr_map].addr = (1ULL << 32);
139 e820entry[nr_map].size = extra_mem_size;
140 e820entry[nr_map].type = E820_RAM;
141 nr_map++;
142 }
144 *(((unsigned char *)e820_page) + E820_MAP_NR_OFFSET) = nr_map;
145 }
147 static int setup_guest(int xc_handle,
148 uint32_t dom, int memsize,
149 char *image, unsigned long image_size,
150 vcpu_guest_context_t *ctxt)
151 {
152 xen_pfn_t *page_array = NULL;
153 unsigned long i, nr_pages = (unsigned long)memsize << (20 - PAGE_SHIFT);
154 unsigned long shared_page_nr;
155 struct xen_add_to_physmap xatp;
156 struct shared_info *shared_info;
157 void *e820_page;
158 struct domain_setup_info dsi;
159 uint64_t v_end;
160 int rc;
162 memset(&dsi, 0, sizeof(struct domain_setup_info));
164 if ( (parseelfimage(image, image_size, &dsi)) != 0 )
165 goto error_out;
167 if ( (dsi.v_kernstart & (PAGE_SIZE - 1)) != 0 )
168 {
169 PERROR("Guest OS must load to a page boundary.\n");
170 goto error_out;
171 }
173 v_end = (unsigned long long)memsize << 20;
175 IPRINTF("VIRTUAL MEMORY ARRANGEMENT:\n"
176 " Loaded HVM loader: %016"PRIx64"->%016"PRIx64"\n"
177 " TOTAL: %016"PRIx64"->%016"PRIx64"\n",
178 dsi.v_kernstart, dsi.v_kernend,
179 dsi.v_start, v_end);
180 IPRINTF(" ENTRY ADDRESS: %016"PRIx64"\n", dsi.v_kernentry);
182 if ( (v_end - dsi.v_start) > ((unsigned long long)nr_pages << PAGE_SHIFT) )
183 {
184 PERROR("Initial guest OS requires too much space: "
185 "(%lluMB is greater than %lluMB limit)\n",
186 (unsigned long long)(v_end - dsi.v_start) >> 20,
187 ((unsigned long long)nr_pages << PAGE_SHIFT) >> 20);
188 goto error_out;
189 }
191 if ( (page_array = malloc(nr_pages * sizeof(xen_pfn_t))) == NULL )
192 {
193 PERROR("Could not allocate memory.\n");
194 goto error_out;
195 }
197 for ( i = 0; i < nr_pages; i++ )
198 page_array[i] = i;
199 for ( i = HVM_BELOW_4G_RAM_END >> PAGE_SHIFT; i < nr_pages; i++ )
200 page_array[i] += HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT;
202 /* Allocate memory for HVM guest, skipping VGA hole 0xA0000-0xC0000. */
203 rc = xc_domain_memory_populate_physmap(
204 xc_handle, dom, (nr_pages > 0xa0) ? 0xa0 : nr_pages,
205 0, 0, &page_array[0x00]);
206 if ( (rc == 0) && (nr_pages > 0xc0) )
207 rc = xc_domain_memory_populate_physmap(
208 xc_handle, dom, nr_pages - 0xc0, 0, 0, &page_array[0xc0]);
209 if ( rc != 0 )
210 {
211 PERROR("Could not allocate memory for HVM guest.\n");
212 goto error_out;
213 }
215 loadelfimage(image, xc_handle, dom, page_array, &dsi);
217 if ( (e820_page = xc_map_foreign_range(
218 xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE,
219 E820_MAP_PAGE >> PAGE_SHIFT)) == NULL )
220 goto error_out;
221 memset(e820_page, 0, PAGE_SIZE);
222 build_e820map(e820_page, v_end);
223 munmap(e820_page, PAGE_SIZE);
225 /* Map and initialise shared_info page. */
226 xatp.domid = dom;
227 xatp.space = XENMAPSPACE_shared_info;
228 xatp.idx = 0;
229 xatp.gpfn = SCRATCH_PFN;
230 if ( (xc_memory_op(xc_handle, XENMEM_add_to_physmap, &xatp) != 0) ||
231 ((shared_info = xc_map_foreign_range(
232 xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE,
233 SCRATCH_PFN)) == NULL) )
234 goto error_out;
235 memset(shared_info, 0, PAGE_SIZE);
236 for ( i = 0; i < MAX_VIRT_CPUS; i++ )
237 shared_info->vcpu_info[i].evtchn_upcall_mask = 1;
238 memset(&shared_info->evtchn_mask[0], 0xff,
239 sizeof(shared_info->evtchn_mask));
240 munmap(shared_info, PAGE_SIZE);
242 if ( v_end > HVM_BELOW_4G_RAM_END )
243 shared_page_nr = (HVM_BELOW_4G_RAM_END >> PAGE_SHIFT) - 1;
244 else
245 shared_page_nr = (v_end >> PAGE_SHIFT) - 1;
247 /* Paranoia: clean pages. */
248 if ( xc_clear_domain_page(xc_handle, dom, shared_page_nr) ||
249 xc_clear_domain_page(xc_handle, dom, shared_page_nr-1) ||
250 xc_clear_domain_page(xc_handle, dom, shared_page_nr-2) )
251 goto error_out;
253 xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, shared_page_nr-1);
254 xc_set_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN, shared_page_nr-2);
255 xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN, shared_page_nr);
257 free(page_array);
259 ctxt->user_regs.eip = dsi.v_kernentry;
261 return 0;
263 error_out:
264 free(page_array);
265 return -1;
266 }
268 static int xc_hvm_build_internal(int xc_handle,
269 uint32_t domid,
270 int memsize,
271 char *image,
272 unsigned long image_size)
273 {
274 struct xen_domctl launch_domctl;
275 vcpu_guest_context_t ctxt;
276 int rc;
278 if ( (image == NULL) || (image_size == 0) )
279 {
280 ERROR("Image required");
281 goto error_out;
282 }
284 memset(&ctxt, 0, sizeof(ctxt));
286 if ( setup_guest(xc_handle, domid, memsize, image, image_size, &ctxt) < 0 )
287 {
288 goto error_out;
289 }
291 if ( lock_pages(&ctxt, sizeof(ctxt) ) )
292 {
293 PERROR("%s: ctxt mlock failed", __func__);
294 goto error_out;
295 }
297 memset(&launch_domctl, 0, sizeof(launch_domctl));
298 launch_domctl.domain = (domid_t)domid;
299 launch_domctl.u.vcpucontext.vcpu = 0;
300 set_xen_guest_handle(launch_domctl.u.vcpucontext.ctxt, &ctxt);
301 launch_domctl.cmd = XEN_DOMCTL_setvcpucontext;
302 rc = xc_domctl(xc_handle, &launch_domctl);
304 unlock_pages(&ctxt, sizeof(ctxt));
306 return rc;
308 error_out:
309 return -1;
310 }
312 static inline int is_loadable_phdr(Elf32_Phdr *phdr)
313 {
314 return ((phdr->p_type == PT_LOAD) &&
315 ((phdr->p_flags & (PF_W|PF_X)) != 0));
316 }
318 static int parseelfimage(char *elfbase,
319 unsigned long elfsize,
320 struct domain_setup_info *dsi)
321 {
322 Elf32_Ehdr *ehdr = (Elf32_Ehdr *)elfbase;
323 Elf32_Phdr *phdr;
324 Elf32_Shdr *shdr;
325 unsigned long kernstart = ~0UL, kernend=0UL;
326 char *shstrtab;
327 int h;
329 if ( !IS_ELF(*ehdr) )
330 {
331 xc_set_error(XC_INVALID_KERNEL,
332 "Kernel image does not have an ELF header.");
333 return -EINVAL;
334 }
336 if ( (ehdr->e_phoff + (ehdr->e_phnum * ehdr->e_phentsize)) > elfsize )
337 {
338 xc_set_error(XC_INVALID_KERNEL,
339 "ELF program headers extend beyond end of image.");
340 return -EINVAL;
341 }
343 if ( (ehdr->e_shoff + (ehdr->e_shnum * ehdr->e_shentsize)) > elfsize )
344 {
345 xc_set_error(XC_INVALID_KERNEL,
346 "ELF section headers extend beyond end of image.");
347 return -EINVAL;
348 }
350 /* Find the section-header strings table. */
351 if ( ehdr->e_shstrndx == SHN_UNDEF )
352 {
353 xc_set_error(XC_INVALID_KERNEL,
354 "ELF image has no section-header strings table (shstrtab).");
355 return -EINVAL;
356 }
357 shdr = (Elf32_Shdr *)(elfbase + ehdr->e_shoff +
358 (ehdr->e_shstrndx*ehdr->e_shentsize));
359 shstrtab = elfbase + shdr->sh_offset;
361 for ( h = 0; h < ehdr->e_phnum; h++ )
362 {
363 phdr = (Elf32_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
364 if ( !is_loadable_phdr(phdr) )
365 continue;
366 if ( phdr->p_paddr < kernstart )
367 kernstart = phdr->p_paddr;
368 if ( (phdr->p_paddr + phdr->p_memsz) > kernend )
369 kernend = phdr->p_paddr + phdr->p_memsz;
370 }
372 if ( (kernstart > kernend) ||
373 (ehdr->e_entry < kernstart) ||
374 (ehdr->e_entry > kernend) )
375 {
376 xc_set_error(XC_INVALID_KERNEL,
377 "Malformed ELF image.");
378 return -EINVAL;
379 }
381 dsi->v_start = 0x00000000;
383 dsi->v_kernstart = kernstart;
384 dsi->v_kernend = kernend;
385 dsi->v_kernentry = HVM_LOADER_ENTR_ADDR;
387 dsi->v_end = dsi->v_kernend;
389 return 0;
390 }
392 static int
393 loadelfimage(
394 char *elfbase, int xch, uint32_t dom, unsigned long *parray,
395 struct domain_setup_info *dsi)
396 {
397 Elf32_Ehdr *ehdr = (Elf32_Ehdr *)elfbase;
398 Elf32_Phdr *phdr;
399 int h;
401 char *va;
402 unsigned long pa, done, chunksz;
404 for ( h = 0; h < ehdr->e_phnum; h++ )
405 {
406 phdr = (Elf32_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
407 if ( !is_loadable_phdr(phdr) )
408 continue;
410 for ( done = 0; done < phdr->p_filesz; done += chunksz )
411 {
412 pa = (phdr->p_paddr + done) - dsi->v_start;
413 if ((va = xc_map_foreign_range(
414 xch, dom, PAGE_SIZE, PROT_WRITE,
415 parray[pa >> PAGE_SHIFT])) == 0)
416 return -1;
417 chunksz = phdr->p_filesz - done;
418 if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
419 chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
420 memcpy(va + (pa & (PAGE_SIZE-1)),
421 elfbase + phdr->p_offset + done, chunksz);
422 munmap(va, PAGE_SIZE);
423 }
425 for ( ; done < phdr->p_memsz; done += chunksz )
426 {
427 pa = (phdr->p_paddr + done) - dsi->v_start;
428 if ((va = xc_map_foreign_range(
429 xch, dom, PAGE_SIZE, PROT_WRITE,
430 parray[pa >> PAGE_SHIFT])) == 0)
431 return -1;
432 chunksz = phdr->p_memsz - done;
433 if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
434 chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
435 memset(va + (pa & (PAGE_SIZE-1)), 0, chunksz);
436 munmap(va, PAGE_SIZE);
437 }
438 }
440 return 0;
441 }
443 /* xc_hvm_build
444 *
445 * Create a domain for a virtualized Linux, using files/filenames
446 *
447 */
449 int xc_hvm_build(int xc_handle,
450 uint32_t domid,
451 int memsize,
452 const char *image_name)
453 {
454 char *image;
455 int sts;
456 unsigned long image_size;
458 if ( (image_name == NULL) ||
459 ((image = xc_read_image(image_name, &image_size)) == NULL) )
460 return -1;
462 sts = xc_hvm_build_internal(xc_handle, domid, memsize, image, image_size);
464 free(image);
466 return sts;
467 }
469 /* xc_hvm_build_mem
470 *
471 * Create a domain for a virtualized Linux, using buffers
472 *
473 */
475 int xc_hvm_build_mem(int xc_handle,
476 uint32_t domid,
477 int memsize,
478 const char *image_buffer,
479 unsigned long image_size)
480 {
481 int sts;
482 unsigned long img_len;
483 char *img;
485 /* Validate that there is a kernel buffer */
487 if ( (image_buffer == NULL) || (image_size == 0) )
488 {
489 ERROR("kernel image buffer not present");
490 return -1;
491 }
493 img = xc_inflate_buffer(image_buffer, image_size, &img_len);
494 if (img == NULL)
495 {
496 ERROR("unable to inflate ram disk buffer");
497 return -1;
498 }
500 sts = xc_hvm_build_internal(xc_handle, domid, memsize,
501 img, img_len);
503 /* xc_inflate_buffer may return the original buffer pointer (for
504 for already inflated buffers), so exercise some care in freeing */
506 if ( (img != NULL) && (img != image_buffer) )
507 free(img);
509 return sts;
510 }
512 /*
513 * Local variables:
514 * mode: C
515 * c-set-style: "BSD"
516 * c-basic-offset: 4
517 * tab-width: 4
518 * indent-tabs-mode: nil
519 * End:
520 */