direct-io.hg

annotate tools/libxc/xc_hvm_build.c @ 14099:aa1be6f5150e

x86 hvm domain builder, restore: set shared_info.arch.max_pfn for
dump-core to know the area to dump

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
author Keir Fraser <keir@xensource.com>
date Sat Feb 24 14:19:05 2007 +0000 (2007-02-24)
parents 8f75c7910b9c
children 1966270d89d7
rev   line source
kaf24@8708 1 /******************************************************************************
kaf24@8708 2 * xc_hvm_build.c
kaf24@8708 3 */
kaf24@8708 4
kaf24@8708 5 #include <stddef.h>
kaf24@10580 6 #include <inttypes.h>
kaf24@8708 7 #include <stdlib.h>
kaf24@8708 8 #include <unistd.h>
kaf24@8708 9 #include <zlib.h>
ack@13620 10
ack@13620 11 #include "xg_private.h"
ack@13620 12 #include "xc_private.h"
ack@13620 13
kaf24@8708 14 #include <xen/hvm/hvm_info_table.h>
kfraser@10911 15 #include <xen/hvm/params.h>
kaf24@11071 16 #include <xen/hvm/e820.h>
kaf24@8708 17
ack@13620 18 #include <xen/libelf.h>
kfraser@12569 19
ack@13620 20 #define SCRATCH_PFN 0xFFFFF
kaf24@8708 21
kfraser@12569 22 int xc_set_hvm_param(
kfraser@12569 23 int handle, domid_t dom, int param, unsigned long value)
kfraser@10911 24 {
kfraser@10911 25 DECLARE_HYPERCALL;
kfraser@10911 26 xen_hvm_param_t arg;
kfraser@10911 27 int rc;
kfraser@10911 28
kfraser@10911 29 hypercall.op = __HYPERVISOR_hvm_op;
kfraser@10911 30 hypercall.arg[0] = HVMOP_set_param;
kfraser@10911 31 hypercall.arg[1] = (unsigned long)&arg;
kfraser@10911 32 arg.domid = dom;
kfraser@10911 33 arg.index = param;
kfraser@10911 34 arg.value = value;
kfraser@11857 35 if ( lock_pages(&arg, sizeof(arg)) != 0 )
kfraser@12569 36 return -1;
kfraser@10911 37 rc = do_xen_hypercall(handle, &hypercall);
kfraser@11857 38 unlock_pages(&arg, sizeof(arg));
kfraser@12569 39 return rc;
kfraser@12569 40 }
kfraser@12569 41
kfraser@12569 42 int xc_get_hvm_param(
kfraser@12569 43 int handle, domid_t dom, int param, unsigned long *value)
kfraser@12569 44 {
kfraser@12569 45 DECLARE_HYPERCALL;
kfraser@12569 46 xen_hvm_param_t arg;
kfraser@12569 47 int rc;
kfraser@12569 48
kfraser@12569 49 hypercall.op = __HYPERVISOR_hvm_op;
kfraser@12569 50 hypercall.arg[0] = HVMOP_get_param;
kfraser@12569 51 hypercall.arg[1] = (unsigned long)&arg;
kfraser@12569 52 arg.domid = dom;
kfraser@12569 53 arg.index = param;
kfraser@12569 54 if ( lock_pages(&arg, sizeof(arg)) != 0 )
kfraser@12569 55 return -1;
kfraser@12569 56 rc = do_xen_hypercall(handle, &hypercall);
kfraser@12569 57 unlock_pages(&arg, sizeof(arg));
kfraser@12569 58 *value = arg.value;
kfraser@12569 59 return rc;
kfraser@10911 60 }
kfraser@10911 61
kfraser@10752 62 static void build_e820map(void *e820_page, unsigned long long mem_size)
kaf24@8708 63 {
kaf24@8708 64 struct e820entry *e820entry =
kaf24@8708 65 (struct e820entry *)(((unsigned char *)e820_page) + E820_MAP_OFFSET);
Christian@11186 66 unsigned long long extra_mem_size = 0;
kaf24@8708 67 unsigned char nr_map = 0;
kaf24@8708 68
Christian@11186 69 /*
kfraser@12212 70 * Physical address space from HVM_BELOW_4G_RAM_END to 4G is reserved
Christian@11186 71 * for PCI devices MMIO. So if HVM has more than HVM_BELOW_4G_RAM_END
Christian@11186 72 * RAM, memory beyond HVM_BELOW_4G_RAM_END will go to 4G above.
Christian@11186 73 */
kfraser@12212 74 if ( mem_size > HVM_BELOW_4G_RAM_END )
kfraser@12212 75 {
Christian@11186 76 extra_mem_size = mem_size - HVM_BELOW_4G_RAM_END;
Christian@11186 77 mem_size = HVM_BELOW_4G_RAM_END;
Christian@11186 78 }
Christian@11186 79
kfraser@12327 80 /* 0x0-0x9F000: Ordinary RAM. */
kaf24@8708 81 e820entry[nr_map].addr = 0x0;
kfraser@11093 82 e820entry[nr_map].size = 0x9F000;
kaf24@8708 83 e820entry[nr_map].type = E820_RAM;
kaf24@8708 84 nr_map++;
kaf24@8708 85
kfraser@12327 86 /*
kfraser@12327 87 * 0x9F000-0x9F800: SMBIOS tables.
kfraser@12327 88 * 0x9FC00-0xA0000: Extended BIOS Data Area (EBDA).
kfraser@12327 89 * TODO: SMBIOS tables should be moved higher (>=0xE0000).
kfraser@12327 90 * They are unusually low in our memory map: could cause problems?
kfraser@12327 91 */
kfraser@11093 92 e820entry[nr_map].addr = 0x9F000;
kfraser@11093 93 e820entry[nr_map].size = 0x1000;
kaf24@8708 94 e820entry[nr_map].type = E820_RESERVED;
kaf24@8708 95 nr_map++;
kaf24@8708 96
kfraser@12327 97 /*
kfraser@12327 98 * Following regions are standard regions of the PC memory map.
kfraser@12327 99 * They are not covered by e820 regions. OSes will not use as RAM.
kfraser@12327 100 * 0xA0000-0xC0000: VGA memory-mapped I/O. Not covered by E820.
kfraser@12327 101 * 0xC0000-0xE0000: 16-bit devices, expansion ROMs (inc. vgabios).
kfraser@12327 102 * TODO: hvmloader should free pages which turn out to be unused.
kfraser@12327 103 */
kfraser@11810 104
kfraser@12327 105 /*
kfraser@12327 106 * 0xE0000-0x0F0000: PC-specific area. We place ACPI tables here.
kfraser@12327 107 * We *cannot* mark as E820_ACPI, for two reasons:
kfraser@12327 108 * 1. ACPI spec. says that E820_ACPI regions below
kfraser@12327 109 * 16MB must clip INT15h 0x88 and 0xe801 queries.
kfraser@12327 110 * Our rombios doesn't do this.
kfraser@12327 111 * 2. The OS is allowed to reclaim ACPI memory after
kfraser@12327 112 * parsing the tables. But our FACS is in this
kfraser@12327 113 * region and it must not be reclaimed (it contains
kfraser@12327 114 * the ACPI global lock!).
kfraser@12327 115 * 0xF0000-0x100000: System BIOS.
kfraser@12327 116 * TODO: hvmloader should free pages which turn out to be unused.
kfraser@12327 117 */
kfraser@12327 118 e820entry[nr_map].addr = 0xE0000;
kfraser@12327 119 e820entry[nr_map].size = 0x20000;
kaf24@8708 120 e820entry[nr_map].type = E820_RESERVED;
kaf24@8708 121 nr_map++;
kaf24@8708 122
kfraser@12212 123 /* Low RAM goes here. Remove 3 pages for ioreq, bufioreq, and xenstore. */
kaf24@8708 124 e820entry[nr_map].addr = 0x100000;
kfraser@12212 125 e820entry[nr_map].size = mem_size - 0x100000 - PAGE_SIZE * 3;
kaf24@8708 126 e820entry[nr_map].type = E820_RAM;
kaf24@8708 127 nr_map++;
kaf24@8708 128
kfraser@12212 129 if ( extra_mem_size )
kfraser@12212 130 {
Christian@11186 131 e820entry[nr_map].addr = (1ULL << 32);
Christian@11186 132 e820entry[nr_map].size = extra_mem_size;
Christian@11186 133 e820entry[nr_map].type = E820_RAM;
Christian@11186 134 nr_map++;
Christian@11186 135 }
Christian@11186 136
kfraser@10752 137 *(((unsigned char *)e820_page) + E820_MAP_NR_OFFSET) = nr_map;
kaf24@8708 138 }
kaf24@8708 139
ack@13620 140 static int
ack@13620 141 loadelfimage(struct elf_binary *elf, int xch, uint32_t dom, unsigned long *parray)
ack@13620 142 {
ack@13620 143 privcmd_mmap_entry_t *entries = NULL;
ack@13620 144 int pages = (elf->pend - elf->pstart + PAGE_SIZE - 1) >> PAGE_SHIFT;
ack@13620 145 int i, rc = -1;
ack@13620 146
ack@13620 147 /* map hvmloader address space */
ack@13620 148 entries = malloc(pages * sizeof(privcmd_mmap_entry_t));
ack@13620 149 if (NULL == entries)
ack@13620 150 goto err;
ack@13620 151 elf->dest = mmap(NULL, pages << PAGE_SHIFT, PROT_READ | PROT_WRITE,
ack@13620 152 MAP_SHARED, xch, 0);
ack@13620 153 if (MAP_FAILED == elf->dest)
ack@13620 154 goto err;
ack@13620 155
ack@13620 156 for (i = 0; i < pages; i++)
ack@13620 157 {
ack@13620 158 entries[i].va = (uintptr_t)elf->dest + (i << PAGE_SHIFT);
ack@13620 159 entries[i].mfn = parray[(elf->pstart >> PAGE_SHIFT) + i];
ack@13620 160 entries[i].npages = 1;
ack@13620 161 }
ack@13620 162 rc = xc_map_foreign_ranges(xch, dom, entries, pages);
ack@13620 163 if (rc < 0)
ack@13620 164 goto err;
ack@13620 165
ack@13620 166 /* load hvmloader */
ack@13620 167 elf_load_binary(elf);
ack@13620 168 rc = 0;
ack@13620 169
ack@13620 170 err:
ack@13620 171 /* cleanup */
ack@13620 172 if (elf->dest) {
ack@13620 173 munmap(elf->dest, pages << PAGE_SHIFT);
ack@13620 174 elf->dest = NULL;
ack@13620 175 }
ack@13620 176 if (entries)
ack@13620 177 free(entries);
ack@13620 178
ack@13620 179 return rc;
ack@13620 180 }
ack@13620 181
kaf24@8708 182 static int setup_guest(int xc_handle,
kaf24@8708 183 uint32_t dom, int memsize,
kaf24@8708 184 char *image, unsigned long image_size,
kfraser@12569 185 vcpu_guest_context_t *ctxt)
kaf24@8708 186 {
kaf24@10276 187 xen_pfn_t *page_array = NULL;
kfraser@12199 188 unsigned long i, nr_pages = (unsigned long)memsize << (20 - PAGE_SHIFT);
kfraser@12199 189 unsigned long shared_page_nr;
kfraser@12569 190 struct xen_add_to_physmap xatp;
kfraser@12569 191 struct shared_info *shared_info;
kaf24@8708 192 void *e820_page;
ack@13620 193 struct elf_binary elf;
ack@13620 194 uint64_t v_start, v_end;
kfraser@12212 195 int rc;
kaf24@8708 196
ack@13620 197 if (0 != elf_init(&elf, image, image_size))
ack@13620 198 goto error_out;
ack@13620 199 elf_parse_binary(&elf);
ack@13620 200 v_start = 0;
ack@13620 201 v_end = (unsigned long long)memsize << 20;
kaf24@8708 202
ack@13620 203 if ( (elf.pstart & (PAGE_SIZE - 1)) != 0 )
kaf24@8708 204 {
kaf24@8708 205 PERROR("Guest OS must load to a page boundary.\n");
kaf24@8708 206 goto error_out;
kaf24@8708 207 }
kaf24@8708 208
ack@13620 209 IPRINTF("VIRTUAL MEMORY ARRANGEMENT:\n"
ack@13620 210 " Loaded HVM loader: %016"PRIx64"->%016"PRIx64"\n"
ack@13620 211 " TOTAL: %016"PRIx64"->%016"PRIx64"\n"
ack@13620 212 " ENTRY ADDRESS: %016"PRIx64"\n",
ack@13620 213 elf.pstart, elf.pend,
ack@13620 214 v_start, v_end,
ack@13620 215 elf_uval(&elf, elf.ehdr, e_entry));
kaf24@8708 216
kaf24@10276 217 if ( (page_array = malloc(nr_pages * sizeof(xen_pfn_t))) == NULL )
kaf24@8708 218 {
kaf24@9325 219 PERROR("Could not allocate memory.\n");
kaf24@8708 220 goto error_out;
kaf24@8708 221 }
kaf24@8708 222
kfraser@12199 223 for ( i = 0; i < nr_pages; i++ )
kfraser@12199 224 page_array[i] = i;
kfraser@12199 225 for ( i = HVM_BELOW_4G_RAM_END >> PAGE_SHIFT; i < nr_pages; i++ )
kfraser@12199 226 page_array[i] += HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT;
kfraser@12199 227
kfraser@12212 228 /* Allocate memory for HVM guest, skipping VGA hole 0xA0000-0xC0000. */
kfraser@12212 229 rc = xc_domain_memory_populate_physmap(
kfraser@12212 230 xc_handle, dom, (nr_pages > 0xa0) ? 0xa0 : nr_pages,
kfraser@12212 231 0, 0, &page_array[0x00]);
kfraser@12212 232 if ( (rc == 0) && (nr_pages > 0xc0) )
kfraser@12212 233 rc = xc_domain_memory_populate_physmap(
kfraser@12212 234 xc_handle, dom, nr_pages - 0xc0, 0, 0, &page_array[0xc0]);
kfraser@12212 235 if ( rc != 0 )
kaf24@8708 236 {
kfraser@12199 237 PERROR("Could not allocate memory for HVM guest.\n");
kaf24@8708 238 goto error_out;
kaf24@8708 239 }
kaf24@8708 240
ack@13620 241 loadelfimage(&elf, xc_handle, dom, page_array);
kaf24@8708 242
kaf24@8708 243 if ( (e820_page = xc_map_foreign_range(
kaf24@9325 244 xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE,
kfraser@12569 245 E820_MAP_PAGE >> PAGE_SHIFT)) == NULL )
kaf24@8708 246 goto error_out;
kaf24@8708 247 memset(e820_page, 0, PAGE_SIZE);
kfraser@10752 248 build_e820map(e820_page, v_end);
kaf24@8708 249 munmap(e820_page, PAGE_SIZE);
kaf24@8708 250
kfraser@12569 251 /* Map and initialise shared_info page. */
kfraser@12569 252 xatp.domid = dom;
kfraser@12569 253 xatp.space = XENMAPSPACE_shared_info;
kfraser@12569 254 xatp.idx = 0;
kfraser@12569 255 xatp.gpfn = SCRATCH_PFN;
kfraser@12569 256 if ( (xc_memory_op(xc_handle, XENMEM_add_to_physmap, &xatp) != 0) ||
kfraser@12569 257 ((shared_info = xc_map_foreign_range(
kfraser@12569 258 xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE,
kfraser@12569 259 SCRATCH_PFN)) == NULL) )
kaf24@8708 260 goto error_out;
kaf24@11069 261 memset(shared_info, 0, PAGE_SIZE);
kfraser@13316 262 /* NB. evtchn_upcall_mask is unused: leave as zero. */
kfraser@12212 263 memset(&shared_info->evtchn_mask[0], 0xff,
kfraser@12212 264 sizeof(shared_info->evtchn_mask));
keir@14099 265 shared_info->arch.max_pfn = page_array[nr_pages - 1];
kaf24@8708 266 munmap(shared_info, PAGE_SIZE);
kaf24@8708 267
Christian@11186 268 if ( v_end > HVM_BELOW_4G_RAM_END )
Christian@11186 269 shared_page_nr = (HVM_BELOW_4G_RAM_END >> PAGE_SHIFT) - 1;
Christian@11186 270 else
Christian@11186 271 shared_page_nr = (v_end >> PAGE_SHIFT) - 1;
Christian@11186 272
kfraser@12212 273 /* Paranoia: clean pages. */
kfraser@12569 274 if ( xc_clear_domain_page(xc_handle, dom, shared_page_nr) ||
kfraser@12569 275 xc_clear_domain_page(xc_handle, dom, shared_page_nr-1) ||
kfraser@12569 276 xc_clear_domain_page(xc_handle, dom, shared_page_nr-2) )
kaf24@8708 277 goto error_out;
kaf24@8708 278
kfraser@12212 279 xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, shared_page_nr-1);
kfraser@12212 280 xc_set_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN, shared_page_nr-2);
kfraser@12212 281 xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN, shared_page_nr);
kaf24@8708 282
kaf24@8708 283 free(page_array);
kaf24@8708 284
ack@13620 285 ctxt->user_regs.eip = elf_uval(&elf, elf.ehdr, e_entry);
kaf24@8708 286
kaf24@8708 287 return 0;
kaf24@8708 288
kaf24@8708 289 error_out:
kaf24@8708 290 free(page_array);
kaf24@8708 291 return -1;
kaf24@8708 292 }
kaf24@8708 293
kaf24@9180 294 static int xc_hvm_build_internal(int xc_handle,
kaf24@9180 295 uint32_t domid,
kaf24@9180 296 int memsize,
kaf24@9180 297 char *image,
kfraser@12569 298 unsigned long image_size)
kaf24@8708 299 {
kfraser@12569 300 struct xen_domctl launch_domctl;
kfraser@12248 301 vcpu_guest_context_t ctxt;
kfraser@12248 302 int rc;
kaf24@8708 303
kaf24@9180 304 if ( (image == NULL) || (image_size == 0) )
kaf24@9180 305 {
kaf24@9180 306 ERROR("Image required");
kaf24@9180 307 goto error_out;
kaf24@9180 308 }
kaf24@9180 309
kfraser@12248 310 memset(&ctxt, 0, sizeof(ctxt));
tdeegan@11151 311
kfraser@12569 312 if ( setup_guest(xc_handle, domid, memsize, image, image_size, &ctxt) < 0 )
kaf24@8708 313 {
kaf24@8708 314 goto error_out;
kaf24@8708 315 }
kaf24@8708 316
kfraser@12248 317 if ( lock_pages(&ctxt, sizeof(ctxt) ) )
kaf24@8708 318 {
kfraser@12248 319 PERROR("%s: ctxt mlock failed", __func__);
kfraser@12248 320 goto error_out;
kaf24@8708 321 }
kaf24@8708 322
kfraser@11257 323 memset(&launch_domctl, 0, sizeof(launch_domctl));
kfraser@11257 324 launch_domctl.domain = (domid_t)domid;
kfraser@11257 325 launch_domctl.u.vcpucontext.vcpu = 0;
kfraser@12248 326 set_xen_guest_handle(launch_domctl.u.vcpucontext.ctxt, &ctxt);
kfraser@11257 327 launch_domctl.cmd = XEN_DOMCTL_setvcpucontext;
kfraser@11257 328 rc = xc_domctl(xc_handle, &launch_domctl);
kaf24@8708 329
kfraser@12248 330 unlock_pages(&ctxt, sizeof(ctxt));
kfraser@12248 331
kaf24@8708 332 return rc;
kaf24@8708 333
kaf24@8708 334 error_out:
kaf24@8708 335 return -1;
kaf24@8708 336 }
kaf24@8708 337
kaf24@8708 338 static inline int is_loadable_phdr(Elf32_Phdr *phdr)
kaf24@8708 339 {
kaf24@8708 340 return ((phdr->p_type == PT_LOAD) &&
kaf24@8708 341 ((phdr->p_flags & (PF_W|PF_X)) != 0));
kaf24@8708 342 }
kaf24@8708 343
kaf24@9180 344 /* xc_hvm_build
kaf24@9180 345 *
kaf24@9180 346 * Create a domain for a virtualized Linux, using files/filenames
kaf24@9180 347 *
kaf24@9180 348 */
kaf24@9180 349
kaf24@9180 350 int xc_hvm_build(int xc_handle,
kaf24@9180 351 uint32_t domid,
kaf24@9180 352 int memsize,
kfraser@12569 353 const char *image_name)
kaf24@9180 354 {
kaf24@9180 355 char *image;
kaf24@9180 356 int sts;
kaf24@9180 357 unsigned long image_size;
kaf24@9180 358
kaf24@9180 359 if ( (image_name == NULL) ||
kaf24@9180 360 ((image = xc_read_image(image_name, &image_size)) == NULL) )
kaf24@9180 361 return -1;
kaf24@9180 362
kfraser@12569 363 sts = xc_hvm_build_internal(xc_handle, domid, memsize, image, image_size);
kaf24@9180 364
kaf24@9180 365 free(image);
kaf24@9180 366
kaf24@9180 367 return sts;
kaf24@9180 368 }
kaf24@9180 369
kaf24@9180 370 /* xc_hvm_build_mem
kaf24@9180 371 *
kaf24@9180 372 * Create a domain for a virtualized Linux, using buffers
kaf24@9180 373 *
kaf24@9180 374 */
kaf24@9180 375
kaf24@9180 376 int xc_hvm_build_mem(int xc_handle,
kaf24@9180 377 uint32_t domid,
kaf24@9180 378 int memsize,
kaf24@9181 379 const char *image_buffer,
kfraser@12569 380 unsigned long image_size)
kaf24@9180 381 {
kaf24@9180 382 int sts;
kaf24@9180 383 unsigned long img_len;
kaf24@9180 384 char *img;
kaf24@9180 385
kaf24@9180 386 /* Validate that there is a kernel buffer */
kaf24@9180 387
kaf24@9180 388 if ( (image_buffer == NULL) || (image_size == 0) )
kaf24@9180 389 {
kaf24@9180 390 ERROR("kernel image buffer not present");
kaf24@9182 391 return -1;
kaf24@9180 392 }
kaf24@9180 393
kaf24@9180 394 img = xc_inflate_buffer(image_buffer, image_size, &img_len);
kaf24@9180 395 if (img == NULL)
kaf24@9180 396 {
kaf24@9180 397 ERROR("unable to inflate ram disk buffer");
kaf24@9180 398 return -1;
kaf24@9180 399 }
kaf24@9180 400
kaf24@9180 401 sts = xc_hvm_build_internal(xc_handle, domid, memsize,
kfraser@12569 402 img, img_len);
kaf24@9180 403
kaf24@9180 404 /* xc_inflate_buffer may return the original buffer pointer (for
kaf24@9180 405 for already inflated buffers), so exercise some care in freeing */
kaf24@9180 406
kaf24@9180 407 if ( (img != NULL) && (img != image_buffer) )
kaf24@9180 408 free(img);
kaf24@9180 409
kaf24@9180 410 return sts;
kaf24@9180 411 }
kaf24@9180 412
kaf24@8708 413 /*
kaf24@8708 414 * Local variables:
kaf24@8708 415 * mode: C
kaf24@8708 416 * c-set-style: "BSD"
kaf24@8708 417 * c-basic-offset: 4
kaf24@8708 418 * tab-width: 4
kaf24@8708 419 * indent-tabs-mode: nil
kaf24@8708 420 * End:
kaf24@8708 421 */