direct-io.hg

view tools/libxc/xc_hvm_build.c @ 14414:cb1693873a7e

libxc: Explicitly reserve space for special pages in HVM memmap.
Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Fri Mar 16 18:59:28 2007 +0000 (2007-03-16)
parents f3f5f2756d75
children 36e6f85cd572
line source
1 /******************************************************************************
2 * xc_hvm_build.c
3 */
5 #include <stddef.h>
6 #include <inttypes.h>
7 #include <stdlib.h>
8 #include <unistd.h>
9 #include <zlib.h>
11 #include "xg_private.h"
12 #include "xc_private.h"
14 #include <xen/foreign/x86_32.h>
15 #include <xen/foreign/x86_64.h>
16 #include <xen/hvm/hvm_info_table.h>
17 #include <xen/hvm/params.h>
18 #include <xen/hvm/e820.h>
20 #include <xen/libelf.h>
22 #define SCRATCH_PFN 0xFFFFF
24 /* Need to provide the right flavour of vcpu context for Xen */
25 typedef union
26 {
27 vcpu_guest_context_x86_64_t c64;
28 vcpu_guest_context_x86_32_t c32;
29 vcpu_guest_context_t c;
30 } vcpu_guest_context_either_t;
33 int xc_set_hvm_param(
34 int handle, domid_t dom, int param, unsigned long value)
35 {
36 DECLARE_HYPERCALL;
37 xen_hvm_param_t arg;
38 int rc;
40 hypercall.op = __HYPERVISOR_hvm_op;
41 hypercall.arg[0] = HVMOP_set_param;
42 hypercall.arg[1] = (unsigned long)&arg;
43 arg.domid = dom;
44 arg.index = param;
45 arg.value = value;
46 if ( lock_pages(&arg, sizeof(arg)) != 0 )
47 return -1;
48 rc = do_xen_hypercall(handle, &hypercall);
49 unlock_pages(&arg, sizeof(arg));
50 return rc;
51 }
53 int xc_get_hvm_param(
54 int handle, domid_t dom, int param, unsigned long *value)
55 {
56 DECLARE_HYPERCALL;
57 xen_hvm_param_t arg;
58 int rc;
60 hypercall.op = __HYPERVISOR_hvm_op;
61 hypercall.arg[0] = HVMOP_get_param;
62 hypercall.arg[1] = (unsigned long)&arg;
63 arg.domid = dom;
64 arg.index = param;
65 if ( lock_pages(&arg, sizeof(arg)) != 0 )
66 return -1;
67 rc = do_xen_hypercall(handle, &hypercall);
68 unlock_pages(&arg, sizeof(arg));
69 *value = arg.value;
70 return rc;
71 }
73 static void build_e820map(void *e820_page, unsigned long long mem_size)
74 {
75 struct e820entry *e820entry =
76 (struct e820entry *)(((unsigned char *)e820_page) + E820_MAP_OFFSET);
77 unsigned long long extra_mem_size = 0;
78 unsigned char nr_map = 0;
80 /*
81 * Physical address space from HVM_BELOW_4G_RAM_END to 4G is reserved
82 * for PCI devices MMIO. So if HVM has more than HVM_BELOW_4G_RAM_END
83 * RAM, memory beyond HVM_BELOW_4G_RAM_END will go to 4G above.
84 */
85 if ( mem_size > HVM_BELOW_4G_RAM_END )
86 {
87 extra_mem_size = mem_size - HVM_BELOW_4G_RAM_END;
88 mem_size = HVM_BELOW_4G_RAM_END;
89 }
91 /* 0x0-0x9F000: Ordinary RAM. */
92 e820entry[nr_map].addr = 0x0;
93 e820entry[nr_map].size = 0x9F000;
94 e820entry[nr_map].type = E820_RAM;
95 nr_map++;
97 /*
98 * 0x9F000-0x9F800: SMBIOS tables.
99 * 0x9FC00-0xA0000: Extended BIOS Data Area (EBDA).
100 * TODO: SMBIOS tables should be moved higher (>=0xE0000).
101 * They are unusually low in our memory map: could cause problems?
102 */
103 e820entry[nr_map].addr = 0x9F000;
104 e820entry[nr_map].size = 0x1000;
105 e820entry[nr_map].type = E820_RESERVED;
106 nr_map++;
108 /*
109 * Following regions are standard regions of the PC memory map.
110 * They are not covered by e820 regions. OSes will not use as RAM.
111 * 0xA0000-0xC0000: VGA memory-mapped I/O. Not covered by E820.
112 * 0xC0000-0xE0000: 16-bit devices, expansion ROMs (inc. vgabios).
113 * TODO: hvmloader should free pages which turn out to be unused.
114 */
116 /*
117 * 0xE0000-0x0F0000: PC-specific area. We place ACPI tables here.
118 * We *cannot* mark as E820_ACPI, for two reasons:
119 * 1. ACPI spec. says that E820_ACPI regions below
120 * 16MB must clip INT15h 0x88 and 0xe801 queries.
121 * Our rombios doesn't do this.
122 * 2. The OS is allowed to reclaim ACPI memory after
123 * parsing the tables. But our FACS is in this
124 * region and it must not be reclaimed (it contains
125 * the ACPI global lock!).
126 * 0xF0000-0x100000: System BIOS.
127 * TODO: hvmloader should free pages which turn out to be unused.
128 */
129 e820entry[nr_map].addr = 0xE0000;
130 e820entry[nr_map].size = 0x20000;
131 e820entry[nr_map].type = E820_RESERVED;
132 nr_map++;
134 /* Low RAM goes here. Remove 3 pages for ioreq, bufioreq, and xenstore. */
135 e820entry[nr_map].addr = 0x100000;
136 e820entry[nr_map].size = mem_size - 0x100000 - PAGE_SIZE * 3;
137 e820entry[nr_map].type = E820_RAM;
138 nr_map++;
140 /* Explicitly reserve space for special pages (ioreq and xenstore). */
141 e820entry[nr_map].addr = mem_size - PAGE_SIZE * 3;
142 e820entry[nr_map].size = PAGE_SIZE * 3;
143 e820entry[nr_map].type = E820_RESERVED;
144 nr_map++;
146 if ( extra_mem_size )
147 {
148 e820entry[nr_map].addr = (1ULL << 32);
149 e820entry[nr_map].size = extra_mem_size;
150 e820entry[nr_map].type = E820_RAM;
151 nr_map++;
152 }
154 *(((unsigned char *)e820_page) + E820_MAP_NR_OFFSET) = nr_map;
155 }
157 static int
158 loadelfimage(struct elf_binary *elf, int xch, uint32_t dom, unsigned long *parray)
159 {
160 privcmd_mmap_entry_t *entries = NULL;
161 int pages = (elf->pend - elf->pstart + PAGE_SIZE - 1) >> PAGE_SHIFT;
162 int i, rc = -1;
164 /* map hvmloader address space */
165 entries = malloc(pages * sizeof(privcmd_mmap_entry_t));
166 if (NULL == entries)
167 goto err;
168 elf->dest = mmap(NULL, pages << PAGE_SHIFT, PROT_READ | PROT_WRITE,
169 MAP_SHARED, xch, 0);
170 if (MAP_FAILED == elf->dest)
171 goto err;
173 for (i = 0; i < pages; i++)
174 {
175 entries[i].va = (uintptr_t)elf->dest + (i << PAGE_SHIFT);
176 entries[i].mfn = parray[(elf->pstart >> PAGE_SHIFT) + i];
177 entries[i].npages = 1;
178 }
179 rc = xc_map_foreign_ranges(xch, dom, entries, pages);
180 if (rc < 0)
181 goto err;
183 /* load hvmloader */
184 elf_load_binary(elf);
185 rc = 0;
187 err:
188 /* cleanup */
189 if (elf->dest) {
190 munmap(elf->dest, pages << PAGE_SHIFT);
191 elf->dest = NULL;
192 }
193 if (entries)
194 free(entries);
196 return rc;
197 }
199 static int setup_guest(int xc_handle,
200 uint32_t dom, int memsize,
201 char *image, unsigned long image_size,
202 vcpu_guest_context_either_t *ctxt)
203 {
204 xen_pfn_t *page_array = NULL;
205 unsigned long i, nr_pages = (unsigned long)memsize << (20 - PAGE_SHIFT);
206 unsigned long shared_page_nr;
207 struct xen_add_to_physmap xatp;
208 struct shared_info *shared_info;
209 void *e820_page;
210 struct elf_binary elf;
211 uint64_t v_start, v_end;
212 int rc;
213 xen_capabilities_info_t caps;
215 if (0 != elf_init(&elf, image, image_size))
216 goto error_out;
217 elf_parse_binary(&elf);
218 v_start = 0;
219 v_end = (unsigned long long)memsize << 20;
221 if (xc_version(xc_handle, XENVER_capabilities, &caps) != 0)
222 {
223 PERROR("Could not get Xen capabilities\n");
224 goto error_out;
225 }
227 if ( (elf.pstart & (PAGE_SIZE - 1)) != 0 )
228 {
229 PERROR("Guest OS must load to a page boundary.\n");
230 goto error_out;
231 }
233 IPRINTF("VIRTUAL MEMORY ARRANGEMENT:\n"
234 " Loaded HVM loader: %016"PRIx64"->%016"PRIx64"\n"
235 " TOTAL: %016"PRIx64"->%016"PRIx64"\n"
236 " ENTRY ADDRESS: %016"PRIx64"\n",
237 elf.pstart, elf.pend,
238 v_start, v_end,
239 elf_uval(&elf, elf.ehdr, e_entry));
241 if ( (page_array = malloc(nr_pages * sizeof(xen_pfn_t))) == NULL )
242 {
243 PERROR("Could not allocate memory.\n");
244 goto error_out;
245 }
247 for ( i = 0; i < nr_pages; i++ )
248 page_array[i] = i;
249 for ( i = HVM_BELOW_4G_RAM_END >> PAGE_SHIFT; i < nr_pages; i++ )
250 page_array[i] += HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT;
252 /* Allocate memory for HVM guest, skipping VGA hole 0xA0000-0xC0000. */
253 rc = xc_domain_memory_populate_physmap(
254 xc_handle, dom, (nr_pages > 0xa0) ? 0xa0 : nr_pages,
255 0, 0, &page_array[0x00]);
256 if ( (rc == 0) && (nr_pages > 0xc0) )
257 rc = xc_domain_memory_populate_physmap(
258 xc_handle, dom, nr_pages - 0xc0, 0, 0, &page_array[0xc0]);
259 if ( rc != 0 )
260 {
261 PERROR("Could not allocate memory for HVM guest.\n");
262 goto error_out;
263 }
265 loadelfimage(&elf, xc_handle, dom, page_array);
267 if ( (e820_page = xc_map_foreign_range(
268 xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE,
269 E820_MAP_PAGE >> PAGE_SHIFT)) == NULL )
270 goto error_out;
271 memset(e820_page, 0, PAGE_SIZE);
272 build_e820map(e820_page, v_end);
273 munmap(e820_page, PAGE_SIZE);
275 /* Map and initialise shared_info page. */
276 xatp.domid = dom;
277 xatp.space = XENMAPSPACE_shared_info;
278 xatp.idx = 0;
279 xatp.gpfn = SCRATCH_PFN;
280 if ( (xc_memory_op(xc_handle, XENMEM_add_to_physmap, &xatp) != 0) ||
281 ((shared_info = xc_map_foreign_range(
282 xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE,
283 SCRATCH_PFN)) == NULL) )
284 goto error_out;
285 memset(shared_info, 0, PAGE_SIZE);
286 /* NB. evtchn_upcall_mask is unused: leave as zero. */
287 memset(&shared_info->evtchn_mask[0], 0xff,
288 sizeof(shared_info->evtchn_mask));
289 shared_info->arch.max_pfn = page_array[nr_pages - 1];
290 munmap(shared_info, PAGE_SIZE);
292 if ( v_end > HVM_BELOW_4G_RAM_END )
293 shared_page_nr = (HVM_BELOW_4G_RAM_END >> PAGE_SHIFT) - 1;
294 else
295 shared_page_nr = (v_end >> PAGE_SHIFT) - 1;
297 /* Paranoia: clean pages. */
298 if ( xc_clear_domain_page(xc_handle, dom, shared_page_nr) ||
299 xc_clear_domain_page(xc_handle, dom, shared_page_nr-1) ||
300 xc_clear_domain_page(xc_handle, dom, shared_page_nr-2) )
301 goto error_out;
303 xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, shared_page_nr-1);
304 xc_set_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN, shared_page_nr-2);
305 xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN, shared_page_nr);
307 free(page_array);
309 /* Set [er]ip in the way that's right for Xen */
310 if ( strstr(caps, "x86_64") )
311 {
312 ctxt->c64.user_regs.rip = elf_uval(&elf, elf.ehdr, e_entry);
313 ctxt->c64.flags = VGCF_online;
314 }
315 else
316 {
317 ctxt->c32.user_regs.eip = elf_uval(&elf, elf.ehdr, e_entry);
318 ctxt->c32.flags = VGCF_online;
319 }
321 return 0;
323 error_out:
324 free(page_array);
325 return -1;
326 }
328 static int xc_hvm_build_internal(int xc_handle,
329 uint32_t domid,
330 int memsize,
331 char *image,
332 unsigned long image_size)
333 {
334 struct xen_domctl launch_domctl;
335 vcpu_guest_context_either_t ctxt;
336 int rc;
338 if ( (image == NULL) || (image_size == 0) )
339 {
340 ERROR("Image required");
341 goto error_out;
342 }
344 memset(&ctxt, 0, sizeof(ctxt));
346 if ( setup_guest(xc_handle, domid, memsize, image, image_size, &ctxt) < 0 )
347 {
348 goto error_out;
349 }
351 if ( lock_pages(&ctxt, sizeof(ctxt) ) )
352 {
353 PERROR("%s: ctxt mlock failed", __func__);
354 goto error_out;
355 }
357 memset(&launch_domctl, 0, sizeof(launch_domctl));
358 launch_domctl.domain = (domid_t)domid;
359 launch_domctl.u.vcpucontext.vcpu = 0;
360 set_xen_guest_handle(launch_domctl.u.vcpucontext.ctxt, &ctxt.c);
361 launch_domctl.cmd = XEN_DOMCTL_setvcpucontext;
362 rc = xc_domctl(xc_handle, &launch_domctl);
364 unlock_pages(&ctxt, sizeof(ctxt));
366 return rc;
368 error_out:
369 return -1;
370 }
372 static inline int is_loadable_phdr(Elf32_Phdr *phdr)
373 {
374 return ((phdr->p_type == PT_LOAD) &&
375 ((phdr->p_flags & (PF_W|PF_X)) != 0));
376 }
378 /* xc_hvm_build
379 *
380 * Create a domain for a virtualized Linux, using files/filenames
381 *
382 */
384 int xc_hvm_build(int xc_handle,
385 uint32_t domid,
386 int memsize,
387 const char *image_name)
388 {
389 char *image;
390 int sts;
391 unsigned long image_size;
393 if ( (image_name == NULL) ||
394 ((image = xc_read_image(image_name, &image_size)) == NULL) )
395 return -1;
397 sts = xc_hvm_build_internal(xc_handle, domid, memsize, image, image_size);
399 free(image);
401 return sts;
402 }
404 /* xc_hvm_build_mem
405 *
406 * Create a domain for a virtualized Linux, using buffers
407 *
408 */
410 int xc_hvm_build_mem(int xc_handle,
411 uint32_t domid,
412 int memsize,
413 const char *image_buffer,
414 unsigned long image_size)
415 {
416 int sts;
417 unsigned long img_len;
418 char *img;
420 /* Validate that there is a kernel buffer */
422 if ( (image_buffer == NULL) || (image_size == 0) )
423 {
424 ERROR("kernel image buffer not present");
425 return -1;
426 }
428 img = xc_inflate_buffer(image_buffer, image_size, &img_len);
429 if (img == NULL)
430 {
431 ERROR("unable to inflate ram disk buffer");
432 return -1;
433 }
435 sts = xc_hvm_build_internal(xc_handle, domid, memsize,
436 img, img_len);
438 /* xc_inflate_buffer may return the original buffer pointer (for
439 for already inflated buffers), so exercise some care in freeing */
441 if ( (img != NULL) && (img != image_buffer) )
442 free(img);
444 return sts;
445 }
447 /*
448 * Local variables:
449 * mode: C
450 * c-set-style: "BSD"
451 * c-basic-offset: 4
452 * tab-width: 4
453 * indent-tabs-mode: nil
454 * End:
455 */