direct-io.hg

view tools/libxc/xc_hvm_build.c @ 14099:aa1be6f5150e

x86 hvm domain builder, restore: set shared_info.arch.max_pfn for
dump-core to know the area to dump

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
author Keir Fraser <keir@xensource.com>
date Sat Feb 24 14:19:05 2007 +0000 (2007-02-24)
parents 8f75c7910b9c
children 1966270d89d7
line source
1 /******************************************************************************
2 * xc_hvm_build.c
3 */
5 #include <stddef.h>
6 #include <inttypes.h>
7 #include <stdlib.h>
8 #include <unistd.h>
9 #include <zlib.h>
11 #include "xg_private.h"
12 #include "xc_private.h"
14 #include <xen/hvm/hvm_info_table.h>
15 #include <xen/hvm/params.h>
16 #include <xen/hvm/e820.h>
18 #include <xen/libelf.h>
20 #define SCRATCH_PFN 0xFFFFF
22 int xc_set_hvm_param(
23 int handle, domid_t dom, int param, unsigned long value)
24 {
25 DECLARE_HYPERCALL;
26 xen_hvm_param_t arg;
27 int rc;
29 hypercall.op = __HYPERVISOR_hvm_op;
30 hypercall.arg[0] = HVMOP_set_param;
31 hypercall.arg[1] = (unsigned long)&arg;
32 arg.domid = dom;
33 arg.index = param;
34 arg.value = value;
35 if ( lock_pages(&arg, sizeof(arg)) != 0 )
36 return -1;
37 rc = do_xen_hypercall(handle, &hypercall);
38 unlock_pages(&arg, sizeof(arg));
39 return rc;
40 }
42 int xc_get_hvm_param(
43 int handle, domid_t dom, int param, unsigned long *value)
44 {
45 DECLARE_HYPERCALL;
46 xen_hvm_param_t arg;
47 int rc;
49 hypercall.op = __HYPERVISOR_hvm_op;
50 hypercall.arg[0] = HVMOP_get_param;
51 hypercall.arg[1] = (unsigned long)&arg;
52 arg.domid = dom;
53 arg.index = param;
54 if ( lock_pages(&arg, sizeof(arg)) != 0 )
55 return -1;
56 rc = do_xen_hypercall(handle, &hypercall);
57 unlock_pages(&arg, sizeof(arg));
58 *value = arg.value;
59 return rc;
60 }
62 static void build_e820map(void *e820_page, unsigned long long mem_size)
63 {
64 struct e820entry *e820entry =
65 (struct e820entry *)(((unsigned char *)e820_page) + E820_MAP_OFFSET);
66 unsigned long long extra_mem_size = 0;
67 unsigned char nr_map = 0;
69 /*
70 * Physical address space from HVM_BELOW_4G_RAM_END to 4G is reserved
71 * for PCI devices MMIO. So if HVM has more than HVM_BELOW_4G_RAM_END
72 * RAM, memory beyond HVM_BELOW_4G_RAM_END will go to 4G above.
73 */
74 if ( mem_size > HVM_BELOW_4G_RAM_END )
75 {
76 extra_mem_size = mem_size - HVM_BELOW_4G_RAM_END;
77 mem_size = HVM_BELOW_4G_RAM_END;
78 }
80 /* 0x0-0x9F000: Ordinary RAM. */
81 e820entry[nr_map].addr = 0x0;
82 e820entry[nr_map].size = 0x9F000;
83 e820entry[nr_map].type = E820_RAM;
84 nr_map++;
86 /*
87 * 0x9F000-0x9F800: SMBIOS tables.
88 * 0x9FC00-0xA0000: Extended BIOS Data Area (EBDA).
89 * TODO: SMBIOS tables should be moved higher (>=0xE0000).
90 * They are unusually low in our memory map: could cause problems?
91 */
92 e820entry[nr_map].addr = 0x9F000;
93 e820entry[nr_map].size = 0x1000;
94 e820entry[nr_map].type = E820_RESERVED;
95 nr_map++;
97 /*
98 * Following regions are standard regions of the PC memory map.
99 * They are not covered by e820 regions. OSes will not use as RAM.
100 * 0xA0000-0xC0000: VGA memory-mapped I/O. Not covered by E820.
101 * 0xC0000-0xE0000: 16-bit devices, expansion ROMs (inc. vgabios).
102 * TODO: hvmloader should free pages which turn out to be unused.
103 */
105 /*
106 * 0xE0000-0x0F0000: PC-specific area. We place ACPI tables here.
107 * We *cannot* mark as E820_ACPI, for two reasons:
108 * 1. ACPI spec. says that E820_ACPI regions below
109 * 16MB must clip INT15h 0x88 and 0xe801 queries.
110 * Our rombios doesn't do this.
111 * 2. The OS is allowed to reclaim ACPI memory after
112 * parsing the tables. But our FACS is in this
113 * region and it must not be reclaimed (it contains
114 * the ACPI global lock!).
115 * 0xF0000-0x100000: System BIOS.
116 * TODO: hvmloader should free pages which turn out to be unused.
117 */
118 e820entry[nr_map].addr = 0xE0000;
119 e820entry[nr_map].size = 0x20000;
120 e820entry[nr_map].type = E820_RESERVED;
121 nr_map++;
123 /* Low RAM goes here. Remove 3 pages for ioreq, bufioreq, and xenstore. */
124 e820entry[nr_map].addr = 0x100000;
125 e820entry[nr_map].size = mem_size - 0x100000 - PAGE_SIZE * 3;
126 e820entry[nr_map].type = E820_RAM;
127 nr_map++;
129 if ( extra_mem_size )
130 {
131 e820entry[nr_map].addr = (1ULL << 32);
132 e820entry[nr_map].size = extra_mem_size;
133 e820entry[nr_map].type = E820_RAM;
134 nr_map++;
135 }
137 *(((unsigned char *)e820_page) + E820_MAP_NR_OFFSET) = nr_map;
138 }
140 static int
141 loadelfimage(struct elf_binary *elf, int xch, uint32_t dom, unsigned long *parray)
142 {
143 privcmd_mmap_entry_t *entries = NULL;
144 int pages = (elf->pend - elf->pstart + PAGE_SIZE - 1) >> PAGE_SHIFT;
145 int i, rc = -1;
147 /* map hvmloader address space */
148 entries = malloc(pages * sizeof(privcmd_mmap_entry_t));
149 if (NULL == entries)
150 goto err;
151 elf->dest = mmap(NULL, pages << PAGE_SHIFT, PROT_READ | PROT_WRITE,
152 MAP_SHARED, xch, 0);
153 if (MAP_FAILED == elf->dest)
154 goto err;
156 for (i = 0; i < pages; i++)
157 {
158 entries[i].va = (uintptr_t)elf->dest + (i << PAGE_SHIFT);
159 entries[i].mfn = parray[(elf->pstart >> PAGE_SHIFT) + i];
160 entries[i].npages = 1;
161 }
162 rc = xc_map_foreign_ranges(xch, dom, entries, pages);
163 if (rc < 0)
164 goto err;
166 /* load hvmloader */
167 elf_load_binary(elf);
168 rc = 0;
170 err:
171 /* cleanup */
172 if (elf->dest) {
173 munmap(elf->dest, pages << PAGE_SHIFT);
174 elf->dest = NULL;
175 }
176 if (entries)
177 free(entries);
179 return rc;
180 }
182 static int setup_guest(int xc_handle,
183 uint32_t dom, int memsize,
184 char *image, unsigned long image_size,
185 vcpu_guest_context_t *ctxt)
186 {
187 xen_pfn_t *page_array = NULL;
188 unsigned long i, nr_pages = (unsigned long)memsize << (20 - PAGE_SHIFT);
189 unsigned long shared_page_nr;
190 struct xen_add_to_physmap xatp;
191 struct shared_info *shared_info;
192 void *e820_page;
193 struct elf_binary elf;
194 uint64_t v_start, v_end;
195 int rc;
197 if (0 != elf_init(&elf, image, image_size))
198 goto error_out;
199 elf_parse_binary(&elf);
200 v_start = 0;
201 v_end = (unsigned long long)memsize << 20;
203 if ( (elf.pstart & (PAGE_SIZE - 1)) != 0 )
204 {
205 PERROR("Guest OS must load to a page boundary.\n");
206 goto error_out;
207 }
209 IPRINTF("VIRTUAL MEMORY ARRANGEMENT:\n"
210 " Loaded HVM loader: %016"PRIx64"->%016"PRIx64"\n"
211 " TOTAL: %016"PRIx64"->%016"PRIx64"\n"
212 " ENTRY ADDRESS: %016"PRIx64"\n",
213 elf.pstart, elf.pend,
214 v_start, v_end,
215 elf_uval(&elf, elf.ehdr, e_entry));
217 if ( (page_array = malloc(nr_pages * sizeof(xen_pfn_t))) == NULL )
218 {
219 PERROR("Could not allocate memory.\n");
220 goto error_out;
221 }
223 for ( i = 0; i < nr_pages; i++ )
224 page_array[i] = i;
225 for ( i = HVM_BELOW_4G_RAM_END >> PAGE_SHIFT; i < nr_pages; i++ )
226 page_array[i] += HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT;
228 /* Allocate memory for HVM guest, skipping VGA hole 0xA0000-0xC0000. */
229 rc = xc_domain_memory_populate_physmap(
230 xc_handle, dom, (nr_pages > 0xa0) ? 0xa0 : nr_pages,
231 0, 0, &page_array[0x00]);
232 if ( (rc == 0) && (nr_pages > 0xc0) )
233 rc = xc_domain_memory_populate_physmap(
234 xc_handle, dom, nr_pages - 0xc0, 0, 0, &page_array[0xc0]);
235 if ( rc != 0 )
236 {
237 PERROR("Could not allocate memory for HVM guest.\n");
238 goto error_out;
239 }
241 loadelfimage(&elf, xc_handle, dom, page_array);
243 if ( (e820_page = xc_map_foreign_range(
244 xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE,
245 E820_MAP_PAGE >> PAGE_SHIFT)) == NULL )
246 goto error_out;
247 memset(e820_page, 0, PAGE_SIZE);
248 build_e820map(e820_page, v_end);
249 munmap(e820_page, PAGE_SIZE);
251 /* Map and initialise shared_info page. */
252 xatp.domid = dom;
253 xatp.space = XENMAPSPACE_shared_info;
254 xatp.idx = 0;
255 xatp.gpfn = SCRATCH_PFN;
256 if ( (xc_memory_op(xc_handle, XENMEM_add_to_physmap, &xatp) != 0) ||
257 ((shared_info = xc_map_foreign_range(
258 xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE,
259 SCRATCH_PFN)) == NULL) )
260 goto error_out;
261 memset(shared_info, 0, PAGE_SIZE);
262 /* NB. evtchn_upcall_mask is unused: leave as zero. */
263 memset(&shared_info->evtchn_mask[0], 0xff,
264 sizeof(shared_info->evtchn_mask));
265 shared_info->arch.max_pfn = page_array[nr_pages - 1];
266 munmap(shared_info, PAGE_SIZE);
268 if ( v_end > HVM_BELOW_4G_RAM_END )
269 shared_page_nr = (HVM_BELOW_4G_RAM_END >> PAGE_SHIFT) - 1;
270 else
271 shared_page_nr = (v_end >> PAGE_SHIFT) - 1;
273 /* Paranoia: clean pages. */
274 if ( xc_clear_domain_page(xc_handle, dom, shared_page_nr) ||
275 xc_clear_domain_page(xc_handle, dom, shared_page_nr-1) ||
276 xc_clear_domain_page(xc_handle, dom, shared_page_nr-2) )
277 goto error_out;
279 xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, shared_page_nr-1);
280 xc_set_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN, shared_page_nr-2);
281 xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN, shared_page_nr);
283 free(page_array);
285 ctxt->user_regs.eip = elf_uval(&elf, elf.ehdr, e_entry);
287 return 0;
289 error_out:
290 free(page_array);
291 return -1;
292 }
294 static int xc_hvm_build_internal(int xc_handle,
295 uint32_t domid,
296 int memsize,
297 char *image,
298 unsigned long image_size)
299 {
300 struct xen_domctl launch_domctl;
301 vcpu_guest_context_t ctxt;
302 int rc;
304 if ( (image == NULL) || (image_size == 0) )
305 {
306 ERROR("Image required");
307 goto error_out;
308 }
310 memset(&ctxt, 0, sizeof(ctxt));
312 if ( setup_guest(xc_handle, domid, memsize, image, image_size, &ctxt) < 0 )
313 {
314 goto error_out;
315 }
317 if ( lock_pages(&ctxt, sizeof(ctxt) ) )
318 {
319 PERROR("%s: ctxt mlock failed", __func__);
320 goto error_out;
321 }
323 memset(&launch_domctl, 0, sizeof(launch_domctl));
324 launch_domctl.domain = (domid_t)domid;
325 launch_domctl.u.vcpucontext.vcpu = 0;
326 set_xen_guest_handle(launch_domctl.u.vcpucontext.ctxt, &ctxt);
327 launch_domctl.cmd = XEN_DOMCTL_setvcpucontext;
328 rc = xc_domctl(xc_handle, &launch_domctl);
330 unlock_pages(&ctxt, sizeof(ctxt));
332 return rc;
334 error_out:
335 return -1;
336 }
338 static inline int is_loadable_phdr(Elf32_Phdr *phdr)
339 {
340 return ((phdr->p_type == PT_LOAD) &&
341 ((phdr->p_flags & (PF_W|PF_X)) != 0));
342 }
344 /* xc_hvm_build
345 *
346 * Create a domain for a virtualized Linux, using files/filenames
347 *
348 */
350 int xc_hvm_build(int xc_handle,
351 uint32_t domid,
352 int memsize,
353 const char *image_name)
354 {
355 char *image;
356 int sts;
357 unsigned long image_size;
359 if ( (image_name == NULL) ||
360 ((image = xc_read_image(image_name, &image_size)) == NULL) )
361 return -1;
363 sts = xc_hvm_build_internal(xc_handle, domid, memsize, image, image_size);
365 free(image);
367 return sts;
368 }
370 /* xc_hvm_build_mem
371 *
372 * Create a domain for a virtualized Linux, using buffers
373 *
374 */
376 int xc_hvm_build_mem(int xc_handle,
377 uint32_t domid,
378 int memsize,
379 const char *image_buffer,
380 unsigned long image_size)
381 {
382 int sts;
383 unsigned long img_len;
384 char *img;
386 /* Validate that there is a kernel buffer */
388 if ( (image_buffer == NULL) || (image_size == 0) )
389 {
390 ERROR("kernel image buffer not present");
391 return -1;
392 }
394 img = xc_inflate_buffer(image_buffer, image_size, &img_len);
395 if (img == NULL)
396 {
397 ERROR("unable to inflate ram disk buffer");
398 return -1;
399 }
401 sts = xc_hvm_build_internal(xc_handle, domid, memsize,
402 img, img_len);
404 /* xc_inflate_buffer may return the original buffer pointer (for
405 for already inflated buffers), so exercise some care in freeing */
407 if ( (img != NULL) && (img != image_buffer) )
408 free(img);
410 return sts;
411 }
413 /*
414 * Local variables:
415 * mode: C
416 * c-set-style: "BSD"
417 * c-basic-offset: 4
418 * tab-width: 4
419 * indent-tabs-mode: nil
420 * End:
421 */