direct-io.hg

view tools/libxc/xc_linux_restore.c @ 10276:b3d901ba705d

Represent PFNs with their own type, rather than 'unsigned long'.
('long' changes size and alignment between 32- and 64-bit ABIs.)
Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
author kaf24@firebug.cl.cam.ac.uk
date Tue Jun 06 09:48:17 2006 +0100 (2006-06-06)
parents 8aca850f66ad
children f8af7041bf5b
line source
1 /******************************************************************************
2 * xc_linux_restore.c
3 *
4 * Restore the state of a Linux session.
5 *
6 * Copyright (c) 2003, K A Fraser.
7 */
9 #include <stdlib.h>
10 #include <unistd.h>
12 #include "xg_private.h"
13 #include "xg_save_restore.h"
15 /* max mfn of the whole machine */
16 static unsigned long max_mfn;
18 /* virtual starting address of the hypervisor */
19 static unsigned long hvirt_start;
21 /* #levels of page tables used by the currrent guest */
22 static unsigned int pt_levels;
24 /* total number of pages used by the current guest */
25 static unsigned long max_pfn;
27 /* Live mapping of the table mapping each PFN to its current MFN. */
28 static xen_pfn_t *live_p2m = NULL;
30 /* A table mapping each PFN to its new MFN. */
31 static xen_pfn_t *p2m = NULL;
34 static ssize_t
35 read_exact(int fd, void *buf, size_t count)
36 {
37 int r = 0, s;
38 unsigned char *b = buf;
40 while (r < count) {
41 s = read(fd, &b[r], count - r);
42 if ((s == -1) && (errno == EINTR))
43 continue;
44 if (s <= 0) {
45 break;
46 }
47 r += s;
48 }
50 return (r == count) ? 1 : 0;
51 }
53 /*
54 ** In the state file (or during transfer), all page-table pages are
55 ** converted into a 'canonical' form where references to actual mfns
56 ** are replaced with references to the corresponding pfns.
57 ** This function inverts that operation, replacing the pfn values with
58 ** the (now known) appropriate mfn values.
59 */
60 int uncanonicalize_pagetable(unsigned long type, void *page)
61 {
62 int i, pte_last;
63 unsigned long pfn;
64 uint64_t pte;
66 pte_last = PAGE_SIZE / ((pt_levels == 2)? 4 : 8);
68 /* Now iterate through the page table, uncanonicalizing each PTE */
69 for(i = 0; i < pte_last; i++) {
71 if(pt_levels == 2)
72 pte = ((uint32_t *)page)[i];
73 else
74 pte = ((uint64_t *)page)[i];
76 if(pte & _PAGE_PRESENT) {
78 pfn = (pte >> PAGE_SHIFT) & 0xffffffff;
80 if(pfn >= max_pfn) {
81 /* This "page table page" is probably not one; bail. */
82 ERR("Frame number in type %lu page table is out of range: "
83 "i=%d pfn=0x%lx max_pfn=%lu",
84 type >> 28, i, pfn, max_pfn);
85 return 0;
86 }
89 pte &= 0xffffff0000000fffULL;
90 pte |= (uint64_t)p2m[pfn] << PAGE_SHIFT;
92 if(pt_levels == 2)
93 ((uint32_t *)page)[i] = (uint32_t)pte;
94 else
95 ((uint64_t *)page)[i] = (uint64_t)pte;
99 }
100 }
102 return 1;
103 }
105 int xc_linux_restore(int xc_handle, int io_fd,
106 uint32_t dom, unsigned long nr_pfns,
107 unsigned int store_evtchn, unsigned long *store_mfn,
108 unsigned int console_evtchn, unsigned long *console_mfn)
109 {
110 DECLARE_DOM0_OP;
111 int rc = 1, i, n, pae_extended_cr3 = 0;
112 unsigned long mfn, pfn;
113 unsigned int prev_pc, this_pc;
114 int verify = 0;
115 int nraces = 0;
117 /* The new domain's shared-info frame number. */
118 unsigned long shared_info_frame;
119 unsigned char shared_info_page[PAGE_SIZE]; /* saved contents from file */
120 shared_info_t *shared_info = (shared_info_t *)shared_info_page;
122 /* A copy of the CPU context of the guest. */
123 vcpu_guest_context_t ctxt;
125 /* A table containing the type of each PFN (/not/ MFN!). */
126 unsigned long *pfn_type = NULL;
128 /* A table of MFNs to map in the current region */
129 xen_pfn_t *region_mfn = NULL;
131 /* Types of the pfns in the current region */
132 unsigned long region_pfn_type[MAX_BATCH_SIZE];
134 /* A temporary mapping, and a copy, of one frame of guest memory. */
135 unsigned long *page = NULL;
137 /* A copy of the pfn-to-mfn table frame list. */
138 xen_pfn_t *p2m_frame_list = NULL;
140 /* A temporary mapping of the guest's start_info page. */
141 start_info_t *start_info;
143 char *region_base;
145 xc_mmu_t *mmu = NULL;
147 /* used by debug verify code */
148 unsigned long buf[PAGE_SIZE/sizeof(unsigned long)];
150 struct mmuext_op pin[MAX_PIN_BATCH];
151 unsigned int nr_pins;
154 max_pfn = nr_pfns;
156 DPRINTF("xc_linux_restore start: max_pfn = %lx\n", max_pfn);
159 if(!get_platform_info(xc_handle, dom,
160 &max_mfn, &hvirt_start, &pt_levels)) {
161 ERR("Unable to get platform info.");
162 return 1;
163 }
165 if (mlock(&ctxt, sizeof(ctxt))) {
166 /* needed for build dom0 op, but might as well do early */
167 ERR("Unable to mlock ctxt");
168 return 1;
169 }
171 if (!(p2m_frame_list = malloc(P2M_FL_SIZE))) {
172 ERR("Couldn't allocate p2m_frame_list array");
173 goto out;
174 }
176 /* Read first entry of P2M list, or extended-info signature (~0UL). */
177 if (!read_exact(io_fd, p2m_frame_list, sizeof(long))) {
178 ERR("read extended-info signature failed");
179 goto out;
180 }
182 if (p2m_frame_list[0] == ~0UL) {
183 uint32_t tot_bytes;
185 /* Next 4 bytes: total size of following extended info. */
186 if (!read_exact(io_fd, &tot_bytes, sizeof(tot_bytes))) {
187 ERR("read extended-info size failed");
188 goto out;
189 }
191 while (tot_bytes) {
192 uint32_t chunk_bytes;
193 char chunk_sig[4];
195 /* 4-character chunk signature + 4-byte remaining chunk size. */
196 if (!read_exact(io_fd, chunk_sig, sizeof(chunk_sig)) ||
197 !read_exact(io_fd, &chunk_bytes, sizeof(chunk_bytes))) {
198 ERR("read extended-info chunk signature failed");
199 goto out;
200 }
201 tot_bytes -= 8;
203 /* VCPU context structure? */
204 if (!strncmp(chunk_sig, "vcpu", 4)) {
205 if (!read_exact(io_fd, &ctxt, sizeof(ctxt))) {
206 ERR("read extended-info vcpu context failed");
207 goto out;
208 }
209 tot_bytes -= sizeof(struct vcpu_guest_context);
210 chunk_bytes -= sizeof(struct vcpu_guest_context);
212 if (ctxt.vm_assist & (1UL << VMASST_TYPE_pae_extended_cr3))
213 pae_extended_cr3 = 1;
214 }
216 /* Any remaining bytes of this chunk: read and discard. */
217 while (chunk_bytes) {
218 unsigned long sz = chunk_bytes;
219 if ( sz > P2M_FL_SIZE )
220 sz = P2M_FL_SIZE;
221 if (!read_exact(io_fd, p2m_frame_list, sz)) {
222 ERR("read-and-discard extended-info chunk bytes failed");
223 goto out;
224 }
225 chunk_bytes -= sz;
226 tot_bytes -= sz;
227 }
228 }
230 /* Now read the real first entry of P2M list. */
231 if (!read_exact(io_fd, p2m_frame_list, sizeof(long))) {
232 ERR("read first entry of p2m_frame_list failed");
233 goto out;
234 }
235 }
237 /* First entry is already read into the p2m array. */
238 if (!read_exact(io_fd, &p2m_frame_list[1], P2M_FL_SIZE - sizeof(long))) {
239 ERR("read p2m_frame_list failed");
240 goto out;
241 }
243 /* We want zeroed memory so use calloc rather than malloc. */
244 p2m = calloc(max_pfn, sizeof(xen_pfn_t));
245 pfn_type = calloc(max_pfn, sizeof(unsigned long));
246 region_mfn = calloc(MAX_BATCH_SIZE, sizeof(xen_pfn_t));
248 if ((p2m == NULL) || (pfn_type == NULL) || (region_mfn == NULL)) {
249 ERR("memory alloc failed");
250 errno = ENOMEM;
251 goto out;
252 }
254 if (mlock(region_mfn, sizeof(xen_pfn_t) * MAX_BATCH_SIZE)) {
255 ERR("Could not mlock region_mfn");
256 goto out;
257 }
259 /* Get the domain's shared-info frame. */
260 op.cmd = DOM0_GETDOMAININFO;
261 op.u.getdomaininfo.domain = (domid_t)dom;
262 if (xc_dom0_op(xc_handle, &op) < 0) {
263 ERR("Could not get information on new domain");
264 goto out;
265 }
266 shared_info_frame = op.u.getdomaininfo.shared_info_frame;
268 if(xc_domain_setmaxmem(xc_handle, dom, PFN_TO_KB(max_pfn)) != 0) {
269 errno = ENOMEM;
270 goto out;
271 }
273 if(xc_domain_memory_increase_reservation(
274 xc_handle, dom, max_pfn, 0, 0, NULL) != 0) {
275 ERR("Failed to increase reservation by %lx KB", PFN_TO_KB(max_pfn));
276 errno = ENOMEM;
277 goto out;
278 }
280 DPRINTF("Increased domain reservation by %lx KB\n", PFN_TO_KB(max_pfn));
282 /* Build the pfn-to-mfn table. We choose MFN ordering returned by Xen. */
283 if (xc_get_pfn_list(xc_handle, dom, p2m, max_pfn) != max_pfn) {
284 ERR("Did not read correct number of frame numbers for new dom");
285 goto out;
286 }
288 if(!(mmu = xc_init_mmu_updates(xc_handle, dom))) {
289 ERR("Could not initialise for MMU updates");
290 goto out;
291 }
294 DPRINTF("Reloading memory pages: 0%%\n");
296 /*
297 * Now simply read each saved frame into its new machine frame.
298 * We uncanonicalise page tables as we go.
299 */
300 prev_pc = 0;
302 n = 0;
303 while (1) {
305 int j;
307 this_pc = (n * 100) / max_pfn;
308 if ( (this_pc - prev_pc) >= 5 )
309 {
310 PPRINTF("\b\b\b\b%3d%%", this_pc);
311 prev_pc = this_pc;
312 }
314 if (!read_exact(io_fd, &j, sizeof(int))) {
315 ERR("Error when reading batch size");
316 goto out;
317 }
319 PPRINTF("batch %d\n",j);
321 if (j == -1) {
322 verify = 1;
323 fprintf(stderr, "Entering page verify mode\n");
324 continue;
325 }
327 if (j == 0)
328 break; /* our work here is done */
330 if (j > MAX_BATCH_SIZE) {
331 ERR("Max batch size exceeded. Giving up.");
332 goto out;
333 }
335 if (!read_exact(io_fd, region_pfn_type, j*sizeof(unsigned long))) {
336 ERR("Error when reading region pfn types");
337 goto out;
338 }
340 for (i = 0; i < j; i++) {
342 if ((region_pfn_type[i] & LTAB_MASK) == XTAB)
343 region_mfn[i] = 0; /* we know map will fail, but don't care */
344 else
345 region_mfn[i] = p2m[region_pfn_type[i] & ~LTAB_MASK];
347 }
349 if (!(region_base = xc_map_foreign_batch(
350 xc_handle, dom, PROT_WRITE, region_mfn, j))) {
351 ERR("map batch failed");
352 goto out;
353 }
355 for ( i = 0; i < j; i++ )
356 {
357 void *page;
358 unsigned long pagetype;
360 pfn = region_pfn_type[i] & ~LTAB_MASK;
361 pagetype = region_pfn_type[i] & LTAB_MASK;
363 if (pagetype == XTAB)
364 /* a bogus/unmapped page: skip it */
365 continue;
367 if (pfn > max_pfn) {
368 ERR("pfn out of range");
369 goto out;
370 }
372 pfn_type[pfn] = pagetype;
374 mfn = p2m[pfn];
376 /* In verify mode, we use a copy; otherwise we work in place */
377 page = verify ? (void *)buf : (region_base + i*PAGE_SIZE);
379 if (!read_exact(io_fd, page, PAGE_SIZE)) {
380 ERR("Error when reading page (type was %lx)", pagetype);
381 goto out;
382 }
384 pagetype &= LTABTYPE_MASK;
386 if(pagetype >= L1TAB && pagetype <= L4TAB) {
388 /*
389 ** A page table page - need to 'uncanonicalize' it, i.e.
390 ** replace all the references to pfns with the corresponding
391 ** mfns for the new domain.
392 **
393 ** On PAE we need to ensure that PGDs are in MFNs < 4G, and
394 ** so we may need to update the p2m after the main loop.
395 ** Hence we defer canonicalization of L1s until then.
396 */
397 if ((pt_levels != 3) ||
398 pae_extended_cr3 ||
399 (pagetype != L1TAB)) {
401 if (!uncanonicalize_pagetable(pagetype, page)) {
402 /*
403 ** Failing to uncanonicalize a page table can be ok
404 ** under live migration since the pages type may have
405 ** changed by now (and we'll get an update later).
406 */
407 DPRINTF("PT L%ld race on pfn=%08lx mfn=%08lx\n",
408 pagetype >> 28, pfn, mfn);
409 nraces++;
410 continue;
411 }
413 }
415 } else if(pagetype != NOTAB) {
417 ERR("Bogus page type %lx page table is out of range: "
418 "i=%d max_pfn=%lu", pagetype, i, max_pfn);
419 goto out;
421 }
424 if (verify) {
426 int res = memcmp(buf, (region_base + i*PAGE_SIZE), PAGE_SIZE);
428 if (res) {
430 int v;
432 DPRINTF("************** pfn=%lx type=%lx gotcs=%08lx "
433 "actualcs=%08lx\n", pfn, pfn_type[pfn],
434 csum_page(region_base + i*PAGE_SIZE),
435 csum_page(buf));
437 for (v = 0; v < 4; v++) {
439 unsigned long *p = (unsigned long *)
440 (region_base + i*PAGE_SIZE);
441 if (buf[v] != p[v])
442 DPRINTF(" %d: %08lx %08lx\n", v, buf[v], p[v]);
443 }
444 }
445 }
447 if (xc_add_mmu_update(xc_handle, mmu,
448 (((unsigned long long)mfn) << PAGE_SHIFT)
449 | MMU_MACHPHYS_UPDATE, pfn)) {
450 ERR("failed machpys update mfn=%lx pfn=%lx", mfn, pfn);
451 goto out;
452 }
453 } /* end of 'batch' for loop */
455 munmap(region_base, j*PAGE_SIZE);
456 n+= j; /* crude stats */
457 }
459 DPRINTF("Received all pages (%d races)\n", nraces);
461 if ((pt_levels == 3) && !pae_extended_cr3) {
463 /*
464 ** XXX SMH on PAE we need to ensure PGDs are in MFNs < 4G. This
465 ** is a little awkward and involves (a) finding all such PGDs and
466 ** replacing them with 'lowmem' versions; (b) upating the p2m[]
467 ** with the new info; and (c) canonicalizing all the L1s using the
468 ** (potentially updated) p2m[].
469 **
470 ** This is relatively slow (and currently involves two passes through
471 ** the pfn_type[] array), but at least seems to be correct. May wish
472 ** to consider more complex approaches to optimize this later.
473 */
475 int j, k;
477 /* First pass: find all L3TABs current in > 4G mfns and get new mfns */
478 for (i = 0; i < max_pfn; i++) {
480 if (((pfn_type[i] & LTABTYPE_MASK)==L3TAB) && (p2m[i]>0xfffffUL)) {
482 unsigned long new_mfn;
483 uint64_t l3ptes[4];
484 uint64_t *l3tab;
486 l3tab = (uint64_t *)
487 xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
488 PROT_READ, p2m[i]);
490 for(j = 0; j < 4; j++)
491 l3ptes[j] = l3tab[j];
493 munmap(l3tab, PAGE_SIZE);
495 if (!(new_mfn=xc_make_page_below_4G(xc_handle, dom, p2m[i]))) {
496 ERR("Couldn't get a page below 4GB :-(");
497 goto out;
498 }
500 p2m[i] = new_mfn;
501 if (xc_add_mmu_update(xc_handle, mmu,
502 (((unsigned long long)new_mfn)
503 << PAGE_SHIFT) |
504 MMU_MACHPHYS_UPDATE, i)) {
505 ERR("Couldn't m2p on PAE root pgdir");
506 goto out;
507 }
509 l3tab = (uint64_t *)
510 xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
511 PROT_READ | PROT_WRITE, p2m[i]);
513 for(j = 0; j < 4; j++)
514 l3tab[j] = l3ptes[j];
516 munmap(l3tab, PAGE_SIZE);
518 }
519 }
521 /* Second pass: find all L1TABs and uncanonicalize them */
522 j = 0;
524 for(i = 0; i < max_pfn; i++) {
526 if (((pfn_type[i] & LTABTYPE_MASK)==L1TAB)) {
527 region_mfn[j] = p2m[i];
528 j++;
529 }
531 if(i == (max_pfn-1) || j == MAX_BATCH_SIZE) {
533 if (!(region_base = xc_map_foreign_batch(
534 xc_handle, dom, PROT_READ | PROT_WRITE,
535 region_mfn, j))) {
536 ERR("map batch failed");
537 goto out;
538 }
540 for(k = 0; k < j; k++) {
541 if(!uncanonicalize_pagetable(L1TAB,
542 region_base + k*PAGE_SIZE)) {
543 ERR("failed uncanonicalize pt!");
544 goto out;
545 }
546 }
548 munmap(region_base, j*PAGE_SIZE);
549 j = 0;
550 }
551 }
553 }
556 if (xc_finish_mmu_updates(xc_handle, mmu)) {
557 ERR("Error doing finish_mmu_updates()");
558 goto out;
559 }
562 /*
563 * Pin page tables. Do this after writing to them as otherwise Xen
564 * will barf when doing the type-checking.
565 */
566 nr_pins = 0;
567 for (i = 0; i < max_pfn; i++) {
569 if (i == (max_pfn-1) || nr_pins == MAX_PIN_BATCH) {
570 if (xc_mmuext_op(xc_handle, pin, nr_pins, dom) < 0) {
571 ERR("Failed to pin batch of %d page tables", nr_pins);
572 goto out;
573 }
574 nr_pins = 0;
575 }
577 if ( (pfn_type[i] & LPINTAB) == 0 )
578 continue;
580 switch(pfn_type[i]) {
582 case (L1TAB|LPINTAB):
583 pin[nr_pins].cmd = MMUEXT_PIN_L1_TABLE;
584 break;
586 case (L2TAB|LPINTAB):
587 pin[nr_pins].cmd = MMUEXT_PIN_L2_TABLE;
588 break;
590 case (L3TAB|LPINTAB):
591 pin[nr_pins].cmd = MMUEXT_PIN_L3_TABLE;
592 break;
594 case (L4TAB|LPINTAB):
595 pin[nr_pins].cmd = MMUEXT_PIN_L4_TABLE;
596 break;
598 default:
599 continue;
600 }
602 pin[nr_pins].arg1.mfn = p2m[i];
603 nr_pins++;
605 }
607 DPRINTF("\b\b\b\b100%%\n");
608 DPRINTF("Memory reloaded.\n");
610 /* Get the list of PFNs that are not in the psuedo-phys map */
611 {
612 unsigned int count;
613 unsigned long *pfntab;
614 int rc;
616 if (!read_exact(io_fd, &count, sizeof(count))) {
617 ERR("Error when reading pfn count");
618 goto out;
619 }
621 if(!(pfntab = malloc(sizeof(unsigned long) * count))) {
622 ERR("Out of memory");
623 goto out;
624 }
626 if (!read_exact(io_fd, pfntab, sizeof(unsigned long)*count)) {
627 ERR("Error when reading pfntab");
628 goto out;
629 }
631 for (i = 0; i < count; i++) {
633 unsigned long pfn = pfntab[i];
635 if(pfn > max_pfn)
636 /* shouldn't happen - continue optimistically */
637 continue;
639 pfntab[i] = p2m[pfn];
640 p2m[pfn] = INVALID_P2M_ENTRY; // not in pseudo-physical map
641 }
643 if (count > 0) {
645 struct xen_memory_reservation reservation = {
646 .nr_extents = count,
647 .extent_order = 0,
648 .domid = dom
649 };
650 set_xen_guest_handle(reservation.extent_start, pfntab);
652 if ((rc = xc_memory_op(xc_handle, XENMEM_decrease_reservation,
653 &reservation)) != count) {
654 ERR("Could not decrease reservation : %d", rc);
655 goto out;
656 } else
657 DPRINTF("Decreased reservation by %d pages\n", count);
658 }
659 }
661 if (!read_exact(io_fd, &ctxt, sizeof(ctxt)) ||
662 !read_exact(io_fd, shared_info_page, PAGE_SIZE)) {
663 ERR("Error when reading ctxt or shared info page");
664 goto out;
665 }
667 /* Uncanonicalise the suspend-record frame number and poke resume rec. */
668 pfn = ctxt.user_regs.edx;
669 if ((pfn >= max_pfn) || (pfn_type[pfn] != NOTAB)) {
670 ERR("Suspend record frame number is bad");
671 goto out;
672 }
673 ctxt.user_regs.edx = mfn = p2m[pfn];
674 start_info = xc_map_foreign_range(
675 xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, mfn);
676 start_info->nr_pages = max_pfn;
677 start_info->shared_info = shared_info_frame << PAGE_SHIFT;
678 start_info->flags = 0;
679 *store_mfn = start_info->store_mfn = p2m[start_info->store_mfn];
680 start_info->store_evtchn = store_evtchn;
681 *console_mfn = start_info->console_mfn = p2m[start_info->console_mfn];
682 start_info->console_evtchn = console_evtchn;
683 munmap(start_info, PAGE_SIZE);
685 /* Uncanonicalise each GDT frame number. */
686 if (ctxt.gdt_ents > 8192) {
687 ERR("GDT entry count out of range");
688 goto out;
689 }
691 for (i = 0; i < ctxt.gdt_ents; i += 512) {
692 pfn = ctxt.gdt_frames[i];
693 if ((pfn >= max_pfn) || (pfn_type[pfn] != NOTAB)) {
694 ERR("GDT frame number is bad");
695 goto out;
696 }
697 ctxt.gdt_frames[i] = p2m[pfn];
698 }
700 /* Uncanonicalise the page table base pointer. */
701 pfn = xen_cr3_to_pfn(ctxt.ctrlreg[3]);
703 if (pfn >= max_pfn) {
704 ERR("PT base is bad: pfn=%lu max_pfn=%lu type=%08lx",
705 pfn, max_pfn, pfn_type[pfn]);
706 goto out;
707 }
709 if ( (pfn_type[pfn] & LTABTYPE_MASK) !=
710 ((unsigned long)pt_levels<<LTAB_SHIFT) ) {
711 ERR("PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx",
712 pfn, max_pfn, pfn_type[pfn],
713 (unsigned long)pt_levels<<LTAB_SHIFT);
714 goto out;
715 }
717 ctxt.ctrlreg[3] = xen_pfn_to_cr3(p2m[pfn]);
719 /* clear any pending events and the selector */
720 memset(&(shared_info->evtchn_pending[0]), 0,
721 sizeof (shared_info->evtchn_pending));
722 for ( i = 0; i < MAX_VIRT_CPUS; i++ )
723 shared_info->vcpu_info[i].evtchn_pending_sel = 0;
725 /* Copy saved contents of shared-info page. No checking needed. */
726 page = xc_map_foreign_range(
727 xc_handle, dom, PAGE_SIZE, PROT_WRITE, shared_info_frame);
728 memcpy(page, shared_info, sizeof(shared_info_t));
729 munmap(page, PAGE_SIZE);
731 /* Uncanonicalise the pfn-to-mfn table frame-number list. */
732 for (i = 0; i < P2M_FL_ENTRIES; i++) {
733 pfn = p2m_frame_list[i];
734 if ((pfn >= max_pfn) || (pfn_type[pfn] != NOTAB)) {
735 ERR("PFN-to-MFN frame number is bad");
736 goto out;
737 }
739 p2m_frame_list[i] = p2m[pfn];
740 }
742 /* Copy the P2M we've constructed to the 'live' P2M */
743 if (!(live_p2m = xc_map_foreign_batch(xc_handle, dom, PROT_WRITE,
744 p2m_frame_list, P2M_FL_ENTRIES))) {
745 ERR("Couldn't map p2m table");
746 goto out;
747 }
749 memcpy(live_p2m, p2m, P2M_SIZE);
750 munmap(live_p2m, P2M_SIZE);
752 /*
753 * Safety checking of saved context:
754 * 1. user_regs is fine, as Xen checks that on context switch.
755 * 2. fpu_ctxt is fine, as it can't hurt Xen.
756 * 3. trap_ctxt needs the code selectors checked.
757 * 4. ldt base must be page-aligned, no more than 8192 ents, ...
758 * 5. gdt already done, and further checking is done by Xen.
759 * 6. check that kernel_ss is safe.
760 * 7. pt_base is already done.
761 * 8. debugregs are checked by Xen.
762 * 9. callback code selectors need checking.
763 */
764 for ( i = 0; i < 256; i++ ) {
765 ctxt.trap_ctxt[i].vector = i;
766 if ((ctxt.trap_ctxt[i].cs & 3) == 0)
767 ctxt.trap_ctxt[i].cs = FLAT_KERNEL_CS;
768 }
769 if ((ctxt.kernel_ss & 3) == 0)
770 ctxt.kernel_ss = FLAT_KERNEL_DS;
771 #if defined(__i386__)
772 if ((ctxt.event_callback_cs & 3) == 0)
773 ctxt.event_callback_cs = FLAT_KERNEL_CS;
774 if ((ctxt.failsafe_callback_cs & 3) == 0)
775 ctxt.failsafe_callback_cs = FLAT_KERNEL_CS;
776 #endif
777 if (((ctxt.ldt_base & (PAGE_SIZE - 1)) != 0) ||
778 (ctxt.ldt_ents > 8192) ||
779 (ctxt.ldt_base > hvirt_start) ||
780 ((ctxt.ldt_base + ctxt.ldt_ents*8) > hvirt_start)) {
781 ERR("Bad LDT base or size");
782 goto out;
783 }
785 DPRINTF("Domain ready to be built.\n");
787 op.cmd = DOM0_SETVCPUCONTEXT;
788 op.u.setvcpucontext.domain = (domid_t)dom;
789 op.u.setvcpucontext.vcpu = 0;
790 set_xen_guest_handle(op.u.setvcpucontext.ctxt, &ctxt);
791 rc = xc_dom0_op(xc_handle, &op);
793 if (rc != 0) {
794 ERR("Couldn't build the domain");
795 goto out;
796 }
798 out:
799 if ( (rc != 0) && (dom != 0) )
800 xc_domain_destroy(xc_handle, dom);
801 free(mmu);
802 free(p2m);
803 free(pfn_type);
805 DPRINTF("Restore exit with rc=%d\n", rc);
807 return rc;
808 }