ia64/xen-unstable

view tools/libxc/xc_hvm_build.c @ 16029:772674585a1a

hvm: Avoid need for ugly setcpucontext() in HVM domain builder by
pre-setting the vcpu0 to runnable inside Xen, and have the builder
insert a JMP instruction to reach the hvmloader entry point from
address 0x0.
Signed-off-by: Keir Fraser <keir@xensource.com>
author Keir Fraser <keir@xensource.com>
date Mon Oct 01 15:12:05 2007 +0100 (2007-10-01)
parents 7eeddd787d2f
children be960a84e492
line source
1 /******************************************************************************
2 * xc_hvm_build.c
3 */
5 #include <stddef.h>
6 #include <inttypes.h>
7 #include <stdlib.h>
8 #include <unistd.h>
9 #include <zlib.h>
11 #include "xg_private.h"
12 #include "xc_private.h"
14 #include <xen/foreign/x86_32.h>
15 #include <xen/foreign/x86_64.h>
16 #include <xen/hvm/hvm_info_table.h>
17 #include <xen/hvm/params.h>
18 #include "xc_e820.h"
20 #include <xen/libelf.h>
22 #define SCRATCH_PFN 0xFFFFF
24 static void build_e820map(void *e820_page, unsigned long long mem_size)
25 {
26 struct e820entry *e820entry =
27 (struct e820entry *)(((unsigned char *)e820_page) + HVM_E820_OFFSET);
28 unsigned long long extra_mem_size = 0;
29 unsigned char nr_map = 0;
31 /*
32 * Physical address space from HVM_BELOW_4G_RAM_END to 4G is reserved
33 * for PCI devices MMIO. So if HVM has more than HVM_BELOW_4G_RAM_END
34 * RAM, memory beyond HVM_BELOW_4G_RAM_END will go to 4G above.
35 */
36 if ( mem_size > HVM_BELOW_4G_RAM_END )
37 {
38 extra_mem_size = mem_size - HVM_BELOW_4G_RAM_END;
39 mem_size = HVM_BELOW_4G_RAM_END;
40 }
42 /* 0x0-0x9FC00: Ordinary RAM. */
43 e820entry[nr_map].addr = 0x0;
44 e820entry[nr_map].size = 0x9FC00;
45 e820entry[nr_map].type = E820_RAM;
46 nr_map++;
48 /* 0x9FC00-0xA0000: Extended BIOS Data Area (EBDA). */
49 e820entry[nr_map].addr = 0x9FC00;
50 e820entry[nr_map].size = 0x400;
51 e820entry[nr_map].type = E820_RESERVED;
52 nr_map++;
54 /*
55 * Following regions are standard regions of the PC memory map.
56 * They are not covered by e820 regions. OSes will not use as RAM.
57 * 0xA0000-0xC0000: VGA memory-mapped I/O. Not covered by E820.
58 * 0xC0000-0xE0000: 16-bit devices, expansion ROMs (inc. vgabios).
59 * TODO: hvmloader should free pages which turn out to be unused.
60 */
62 /*
63 * 0xE0000-0x0F0000: PC-specific area. We place ACPI tables here.
64 * We *cannot* mark as E820_ACPI, for two reasons:
65 * 1. ACPI spec. says that E820_ACPI regions below
66 * 16MB must clip INT15h 0x88 and 0xe801 queries.
67 * Our rombios doesn't do this.
68 * 2. The OS is allowed to reclaim ACPI memory after
69 * parsing the tables. But our FACS is in this
70 * region and it must not be reclaimed (it contains
71 * the ACPI global lock!).
72 * 0xF0000-0x100000: System BIOS.
73 * TODO: hvmloader should free pages which turn out to be unused.
74 */
75 e820entry[nr_map].addr = 0xE0000;
76 e820entry[nr_map].size = 0x20000;
77 e820entry[nr_map].type = E820_RESERVED;
78 nr_map++;
80 /* Low RAM goes here. Remove 3 pages for ioreq, bufioreq, and xenstore. */
81 e820entry[nr_map].addr = 0x100000;
82 e820entry[nr_map].size = mem_size - 0x100000 - PAGE_SIZE * 3;
83 e820entry[nr_map].type = E820_RAM;
84 nr_map++;
86 /* Explicitly reserve space for special pages (ioreq and xenstore). */
87 e820entry[nr_map].addr = mem_size - PAGE_SIZE * 3;
88 e820entry[nr_map].size = PAGE_SIZE * 3;
89 e820entry[nr_map].type = E820_RESERVED;
90 nr_map++;
92 if ( extra_mem_size )
93 {
94 e820entry[nr_map].addr = (1ULL << 32);
95 e820entry[nr_map].size = extra_mem_size;
96 e820entry[nr_map].type = E820_RAM;
97 nr_map++;
98 }
100 *(((unsigned char *)e820_page) + HVM_E820_NR_OFFSET) = nr_map;
101 }
103 static int loadelfimage(
104 struct elf_binary *elf, int xch, uint32_t dom, unsigned long *parray)
105 {
106 privcmd_mmap_entry_t *entries = NULL;
107 int pages = (elf->pend - elf->pstart + PAGE_SIZE - 1) >> PAGE_SHIFT;
108 int i, rc = -1;
110 /* Map address space for initial elf image. */
111 entries = malloc(pages * sizeof(privcmd_mmap_entry_t));
112 if ( entries == NULL )
113 goto err;
114 elf->dest = mmap(NULL, pages << PAGE_SHIFT, PROT_READ | PROT_WRITE,
115 MAP_SHARED, xch, 0);
116 if ( elf->dest == MAP_FAILED )
117 goto err;
119 for ( i = 0; i < pages; i++ )
120 {
121 entries[i].va = (uintptr_t)elf->dest + (i << PAGE_SHIFT);
122 entries[i].mfn = parray[(elf->pstart >> PAGE_SHIFT) + i];
123 entries[i].npages = 1;
124 }
126 rc = xc_map_foreign_ranges(xch, dom, entries, pages);
127 if ( rc < 0 )
128 goto err;
130 /* Load the initial elf image. */
131 elf_load_binary(elf);
132 rc = 0;
134 err:
135 if ( elf->dest )
136 {
137 munmap(elf->dest, pages << PAGE_SHIFT);
138 elf->dest = NULL;
139 }
141 if ( entries )
142 free(entries);
144 return rc;
145 }
147 static int setup_guest(int xc_handle,
148 uint32_t dom, int memsize,
149 char *image, unsigned long image_size)
150 {
151 xen_pfn_t *page_array = NULL;
152 unsigned long i, nr_pages = (unsigned long)memsize << (20 - PAGE_SHIFT);
153 unsigned long shared_page_nr, entry_eip;
154 struct xen_add_to_physmap xatp;
155 struct shared_info *shared_info;
156 void *e820_page;
157 struct elf_binary elf;
158 uint64_t v_start, v_end;
159 int rc;
160 xen_capabilities_info_t caps;
162 /* An HVM guest must be initialised with at least 2MB memory. */
163 if ( memsize < 2 )
164 goto error_out;
166 if ( elf_init(&elf, image, image_size) != 0 )
167 goto error_out;
168 elf_parse_binary(&elf);
169 v_start = 0;
170 v_end = (unsigned long long)memsize << 20;
172 if ( xc_version(xc_handle, XENVER_capabilities, &caps) != 0 )
173 {
174 PERROR("Could not get Xen capabilities\n");
175 goto error_out;
176 }
178 if ( (elf.pstart & (PAGE_SIZE - 1)) != 0 )
179 {
180 PERROR("Guest OS must load to a page boundary.\n");
181 goto error_out;
182 }
184 IPRINTF("VIRTUAL MEMORY ARRANGEMENT:\n"
185 " Loader: %016"PRIx64"->%016"PRIx64"\n"
186 " TOTAL: %016"PRIx64"->%016"PRIx64"\n"
187 " ENTRY ADDRESS: %016"PRIx64"\n",
188 elf.pstart, elf.pend,
189 v_start, v_end,
190 elf_uval(&elf, elf.ehdr, e_entry));
192 if ( (page_array = malloc(nr_pages * sizeof(xen_pfn_t))) == NULL )
193 {
194 PERROR("Could not allocate memory.\n");
195 goto error_out;
196 }
198 for ( i = 0; i < nr_pages; i++ )
199 page_array[i] = i;
200 for ( i = HVM_BELOW_4G_RAM_END >> PAGE_SHIFT; i < nr_pages; i++ )
201 page_array[i] += HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT;
203 /* Allocate memory for HVM guest, skipping VGA hole 0xA0000-0xC0000. */
204 rc = xc_domain_memory_populate_physmap(
205 xc_handle, dom, 0xa0, 0, 0, &page_array[0x00]);
206 if ( rc == 0 )
207 rc = xc_domain_memory_populate_physmap(
208 xc_handle, dom, nr_pages - 0xc0, 0, 0, &page_array[0xc0]);
209 if ( rc != 0 )
210 {
211 PERROR("Could not allocate memory for HVM guest.\n");
212 goto error_out;
213 }
215 if ( loadelfimage(&elf, xc_handle, dom, page_array) != 0 )
216 goto error_out;
218 if ( (e820_page = xc_map_foreign_range(
219 xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE,
220 HVM_E820_PAGE >> PAGE_SHIFT)) == NULL )
221 goto error_out;
222 memset(e820_page, 0, PAGE_SIZE);
223 build_e820map(e820_page, v_end);
224 munmap(e820_page, PAGE_SIZE);
226 /* Map and initialise shared_info page. */
227 xatp.domid = dom;
228 xatp.space = XENMAPSPACE_shared_info;
229 xatp.idx = 0;
230 xatp.gpfn = SCRATCH_PFN;
231 if ( (xc_memory_op(xc_handle, XENMEM_add_to_physmap, &xatp) != 0) ||
232 ((shared_info = xc_map_foreign_range(
233 xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE,
234 SCRATCH_PFN)) == NULL) )
235 goto error_out;
236 memset(shared_info, 0, PAGE_SIZE);
237 /* NB. evtchn_upcall_mask is unused: leave as zero. */
238 memset(&shared_info->evtchn_mask[0], 0xff,
239 sizeof(shared_info->evtchn_mask));
240 munmap(shared_info, PAGE_SIZE);
242 if ( v_end > HVM_BELOW_4G_RAM_END )
243 shared_page_nr = (HVM_BELOW_4G_RAM_END >> PAGE_SHIFT) - 1;
244 else
245 shared_page_nr = (v_end >> PAGE_SHIFT) - 1;
247 /* Paranoia: clean pages. */
248 if ( xc_clear_domain_page(xc_handle, dom, shared_page_nr) ||
249 xc_clear_domain_page(xc_handle, dom, shared_page_nr-1) ||
250 xc_clear_domain_page(xc_handle, dom, shared_page_nr-2) )
251 goto error_out;
253 xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, shared_page_nr-1);
254 xc_set_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN, shared_page_nr-2);
255 xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN, shared_page_nr);
257 /* Insert JMP <rel32> instruction at address 0x0 to reach entry point. */
258 entry_eip = elf_uval(&elf, elf.ehdr, e_entry);
259 if ( entry_eip != 0 )
260 {
261 char *page0 = xc_map_foreign_range(
262 xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, 0);
263 if ( page0 == NULL )
264 goto error_out;
265 page0[0] = 0xe9;
266 *(uint32_t *)&page0[1] = entry_eip - 5;
267 munmap(page0, PAGE_SIZE);
268 }
270 free(page_array);
271 return 0;
273 error_out:
274 free(page_array);
275 return -1;
276 }
278 static int xc_hvm_build_internal(int xc_handle,
279 uint32_t domid,
280 int memsize,
281 char *image,
282 unsigned long image_size)
283 {
284 if ( (image == NULL) || (image_size == 0) )
285 {
286 ERROR("Image required");
287 return -1;
288 }
290 return setup_guest(xc_handle, domid, memsize, image, image_size);
291 }
293 static inline int is_loadable_phdr(Elf32_Phdr *phdr)
294 {
295 return ((phdr->p_type == PT_LOAD) &&
296 ((phdr->p_flags & (PF_W|PF_X)) != 0));
297 }
299 /* xc_hvm_build:
300 * Create a domain for a virtualized Linux, using files/filenames.
301 */
302 int xc_hvm_build(int xc_handle,
303 uint32_t domid,
304 int memsize,
305 const char *image_name)
306 {
307 char *image;
308 int sts;
309 unsigned long image_size;
311 if ( (image_name == NULL) ||
312 ((image = xc_read_image(image_name, &image_size)) == NULL) )
313 return -1;
315 sts = xc_hvm_build_internal(xc_handle, domid, memsize, image, image_size);
317 free(image);
319 return sts;
320 }
322 /* xc_hvm_build_mem:
323 * Create a domain for a virtualized Linux, using memory buffers.
324 */
325 int xc_hvm_build_mem(int xc_handle,
326 uint32_t domid,
327 int memsize,
328 const char *image_buffer,
329 unsigned long image_size)
330 {
331 int sts;
332 unsigned long img_len;
333 char *img;
335 /* Validate that there is a kernel buffer */
337 if ( (image_buffer == NULL) || (image_size == 0) )
338 {
339 ERROR("kernel image buffer not present");
340 return -1;
341 }
343 img = xc_inflate_buffer(image_buffer, image_size, &img_len);
344 if ( img == NULL )
345 {
346 ERROR("unable to inflate ram disk buffer");
347 return -1;
348 }
350 sts = xc_hvm_build_internal(xc_handle, domid, memsize,
351 img, img_len);
353 /* xc_inflate_buffer may return the original buffer pointer (for
354 for already inflated buffers), so exercise some care in freeing */
356 if ( (img != NULL) && (img != image_buffer) )
357 free(img);
359 return sts;
360 }
362 /*
363 * Local variables:
364 * mode: C
365 * c-set-style: "BSD"
366 * c-basic-offset: 4
367 * tab-width: 4
368 * indent-tabs-mode: nil
369 * End:
370 */