direct-io.hg

view tools/libxc/xc_linux_restore.c @ 12765:2dd4569e0640

[LIBXC] Add an error reporting API to the libxc library.

- An 'xc_error' struct is used to pass around error
details. Currently contains two members 'code' an enumeration of
error types, and 'message' a free text description of the specific
problem.

- The xc_get_last_error() method returns a const pointer to the
internal instance of this struct manged by libxc. By returning a
const pointer we can add extra members to the end of the struct at
any time without worrying about ABI of callers. This will let us
provide more fine-grained info if needed in the future.

- The xc_error instance is statically defined inside libxc and marked
__thread. This ensures that errors are recorded per-thread, and
that when dealing with errors we never need to call malloc - all
storage needed is statically allocated.

- The xc_clear_last_error() method resets any currently recorded
error details

- The xc_error_code_to_desc() method converts the integer error code
into a generic user facing messsage. eg "Invalid kernel". Together
with the 'message' field from xc_error, this provides the user
visible feedback. eg "Invalid kernel: Non PAE-kernel on PAE host."

- A callback can be registered with xc_set_error_handler to receive
notification whenever an error is recorded, rather than querying
for error details after the fact with xc_get_last_error

- If built with -DDEBUG set, a default error handler will be
registered which calls fprintf(stderr), thus maintaining current
behaviour of logging errors to stderr during developer builds.

- The python binding for libxc is updated to use xc_get_last_error
to pull out error details whenever appropriate, instead of
returning info based on 'errno'

- The xc_set_error method is private to libxc internals, and is used
for setting error details

- The ERROR and PERROR macros have been updated to call xc_set_error
automatically specifying XC_INTERNAL_ERROR as the error code. This
gives a generic error report for all current failure points

- Some uses of the ERROR macro have been replaced with explicit
calls to xc_set_error to enable finer grained error reporting. In
particular the code dealing with invalid kernel types uses this
to report about PAE/architecture/wordsize mismatches

The patch has been tested by calling xm create against a varietry of
config files defining invalid kernels of various kinds. It has also
been tested with libvirt talking to xend. In both cases the error
messages were propagated all the way back up the stack.

There is only one place where I need to do further work. The suspend
& restore APIs in Xend invoke external helper programs rather than
calling libxc directly. This means that error details are essentially
lost. Since there is already code in XenD which scans STDERR from
these programs I will investigate adapting this to extract actual
error messages from these helpers.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
author kfraser@localhost.localdomain
date Thu Dec 07 11:36:26 2006 +0000 (2006-12-07)
parents 2ae4e4e89d6d
children 1818b322ede9
line source
1 /******************************************************************************
2 * xc_linux_restore.c
3 *
4 * Restore the state of a Linux session.
5 *
6 * Copyright (c) 2003, K A Fraser.
7 */
9 #include <stdlib.h>
10 #include <unistd.h>
12 #include "xg_private.h"
13 #include "xg_save_restore.h"
15 /* max mfn of the whole machine */
16 static unsigned long max_mfn;
18 /* virtual starting address of the hypervisor */
19 static unsigned long hvirt_start;
21 /* #levels of page tables used by the currrent guest */
22 static unsigned int pt_levels;
24 /* total number of pages used by the current guest */
25 static unsigned long max_pfn;
27 /* Live mapping of the table mapping each PFN to its current MFN. */
28 static xen_pfn_t *live_p2m = NULL;
30 /* A table mapping each PFN to its new MFN. */
31 static xen_pfn_t *p2m = NULL;
34 static ssize_t
35 read_exact(int fd, void *buf, size_t count)
36 {
37 int r = 0, s;
38 unsigned char *b = buf;
40 while (r < count) {
41 s = read(fd, &b[r], count - r);
42 if ((s == -1) && (errno == EINTR))
43 continue;
44 if (s <= 0) {
45 break;
46 }
47 r += s;
48 }
50 return (r == count) ? 1 : 0;
51 }
53 /*
54 ** In the state file (or during transfer), all page-table pages are
55 ** converted into a 'canonical' form where references to actual mfns
56 ** are replaced with references to the corresponding pfns.
57 ** This function inverts that operation, replacing the pfn values with
58 ** the (now known) appropriate mfn values.
59 */
60 static int uncanonicalize_pagetable(unsigned long type, void *page)
61 {
62 int i, pte_last;
63 unsigned long pfn;
64 uint64_t pte;
66 pte_last = PAGE_SIZE / ((pt_levels == 2)? 4 : 8);
68 /* Now iterate through the page table, uncanonicalizing each PTE */
69 for(i = 0; i < pte_last; i++) {
71 if(pt_levels == 2)
72 pte = ((uint32_t *)page)[i];
73 else
74 pte = ((uint64_t *)page)[i];
76 if(pte & _PAGE_PRESENT) {
78 pfn = (pte >> PAGE_SHIFT) & 0xffffffff;
80 if(pfn >= max_pfn) {
81 /* This "page table page" is probably not one; bail. */
82 ERROR("Frame number in type %lu page table is out of range: "
83 "i=%d pfn=0x%lx max_pfn=%lu",
84 type >> 28, i, pfn, max_pfn);
85 return 0;
86 }
89 pte &= 0xffffff0000000fffULL;
90 pte |= (uint64_t)p2m[pfn] << PAGE_SHIFT;
92 if(pt_levels == 2)
93 ((uint32_t *)page)[i] = (uint32_t)pte;
94 else
95 ((uint64_t *)page)[i] = (uint64_t)pte;
99 }
100 }
102 return 1;
103 }
105 int xc_linux_restore(int xc_handle, int io_fd,
106 uint32_t dom, unsigned long nr_pfns,
107 unsigned int store_evtchn, unsigned long *store_mfn,
108 unsigned int console_evtchn, unsigned long *console_mfn)
109 {
110 DECLARE_DOMCTL;
111 int rc = 1, i, n, pae_extended_cr3 = 0;
112 unsigned long mfn, pfn;
113 unsigned int prev_pc, this_pc;
114 int verify = 0;
115 int nraces = 0;
117 /* The new domain's shared-info frame number. */
118 unsigned long shared_info_frame;
119 unsigned char shared_info_page[PAGE_SIZE]; /* saved contents from file */
120 shared_info_t *shared_info = (shared_info_t *)shared_info_page;
122 /* A copy of the CPU context of the guest. */
123 vcpu_guest_context_t ctxt;
125 /* A table containing the type of each PFN (/not/ MFN!). */
126 unsigned long *pfn_type = NULL;
128 /* A table of MFNs to map in the current region */
129 xen_pfn_t *region_mfn = NULL;
131 /* Types of the pfns in the current region */
132 unsigned long region_pfn_type[MAX_BATCH_SIZE];
134 /* A temporary mapping, and a copy, of one frame of guest memory. */
135 unsigned long *page = NULL;
137 /* A copy of the pfn-to-mfn table frame list. */
138 xen_pfn_t *p2m_frame_list = NULL;
140 /* A temporary mapping of the guest's start_info page. */
141 start_info_t *start_info;
143 char *region_base;
145 xc_mmu_t *mmu = NULL;
147 /* used by debug verify code */
148 unsigned long buf[PAGE_SIZE/sizeof(unsigned long)];
150 struct mmuext_op pin[MAX_PIN_BATCH];
151 unsigned int nr_pins;
154 max_pfn = nr_pfns;
156 DPRINTF("xc_linux_restore start: max_pfn = %lx\n", max_pfn);
159 if(!get_platform_info(xc_handle, dom,
160 &max_mfn, &hvirt_start, &pt_levels)) {
161 ERROR("Unable to get platform info.");
162 return 1;
163 }
165 if (lock_pages(&ctxt, sizeof(ctxt))) {
166 /* needed for build domctl, but might as well do early */
167 ERROR("Unable to lock ctxt");
168 return 1;
169 }
171 if (!(p2m_frame_list = malloc(P2M_FL_SIZE))) {
172 ERROR("Couldn't allocate p2m_frame_list array");
173 goto out;
174 }
176 /* Read first entry of P2M list, or extended-info signature (~0UL). */
177 if (!read_exact(io_fd, p2m_frame_list, sizeof(long))) {
178 ERROR("read extended-info signature failed");
179 goto out;
180 }
182 if (p2m_frame_list[0] == ~0UL) {
183 uint32_t tot_bytes;
185 /* Next 4 bytes: total size of following extended info. */
186 if (!read_exact(io_fd, &tot_bytes, sizeof(tot_bytes))) {
187 ERROR("read extended-info size failed");
188 goto out;
189 }
191 while (tot_bytes) {
192 uint32_t chunk_bytes;
193 char chunk_sig[4];
195 /* 4-character chunk signature + 4-byte remaining chunk size. */
196 if (!read_exact(io_fd, chunk_sig, sizeof(chunk_sig)) ||
197 !read_exact(io_fd, &chunk_bytes, sizeof(chunk_bytes))) {
198 ERROR("read extended-info chunk signature failed");
199 goto out;
200 }
201 tot_bytes -= 8;
203 /* VCPU context structure? */
204 if (!strncmp(chunk_sig, "vcpu", 4)) {
205 if (!read_exact(io_fd, &ctxt, sizeof(ctxt))) {
206 ERROR("read extended-info vcpu context failed");
207 goto out;
208 }
209 tot_bytes -= sizeof(struct vcpu_guest_context);
210 chunk_bytes -= sizeof(struct vcpu_guest_context);
212 if (ctxt.vm_assist & (1UL << VMASST_TYPE_pae_extended_cr3))
213 pae_extended_cr3 = 1;
214 }
216 /* Any remaining bytes of this chunk: read and discard. */
217 while (chunk_bytes) {
218 unsigned long sz = chunk_bytes;
219 if ( sz > P2M_FL_SIZE )
220 sz = P2M_FL_SIZE;
221 if (!read_exact(io_fd, p2m_frame_list, sz)) {
222 ERROR("read-and-discard extended-info chunk bytes failed");
223 goto out;
224 }
225 chunk_bytes -= sz;
226 tot_bytes -= sz;
227 }
228 }
230 /* Now read the real first entry of P2M list. */
231 if (!read_exact(io_fd, p2m_frame_list, sizeof(long))) {
232 ERROR("read first entry of p2m_frame_list failed");
233 goto out;
234 }
235 }
237 /* First entry is already read into the p2m array. */
238 if (!read_exact(io_fd, &p2m_frame_list[1], P2M_FL_SIZE - sizeof(long))) {
239 ERROR("read p2m_frame_list failed");
240 goto out;
241 }
243 /* We want zeroed memory so use calloc rather than malloc. */
244 p2m = calloc(max_pfn, sizeof(xen_pfn_t));
245 pfn_type = calloc(max_pfn, sizeof(unsigned long));
246 region_mfn = calloc(MAX_BATCH_SIZE, sizeof(xen_pfn_t));
248 if ((p2m == NULL) || (pfn_type == NULL) || (region_mfn == NULL)) {
249 ERROR("memory alloc failed");
250 errno = ENOMEM;
251 goto out;
252 }
254 if (lock_pages(region_mfn, sizeof(xen_pfn_t) * MAX_BATCH_SIZE)) {
255 ERROR("Could not lock region_mfn");
256 goto out;
257 }
259 /* Get the domain's shared-info frame. */
260 domctl.cmd = XEN_DOMCTL_getdomaininfo;
261 domctl.domain = (domid_t)dom;
262 if (xc_domctl(xc_handle, &domctl) < 0) {
263 ERROR("Could not get information on new domain");
264 goto out;
265 }
266 shared_info_frame = domctl.u.getdomaininfo.shared_info_frame;
268 if (xc_domain_setmaxmem(xc_handle, dom, PFN_TO_KB(max_pfn)) != 0) {
269 errno = ENOMEM;
270 goto out;
271 }
273 for ( pfn = 0; pfn < max_pfn; pfn++ )
274 p2m[pfn] = pfn;
276 if (xc_domain_memory_populate_physmap(xc_handle, dom, max_pfn,
277 0, 0, p2m) != 0) {
278 ERROR("Failed to increase reservation by %lx KB", PFN_TO_KB(max_pfn));
279 errno = ENOMEM;
280 goto out;
281 }
283 DPRINTF("Increased domain reservation by %lx KB\n", PFN_TO_KB(max_pfn));
285 if(!(mmu = xc_init_mmu_updates(xc_handle, dom))) {
286 ERROR("Could not initialise for MMU updates");
287 goto out;
288 }
290 DPRINTF("Reloading memory pages: 0%%\n");
292 /*
293 * Now simply read each saved frame into its new machine frame.
294 * We uncanonicalise page tables as we go.
295 */
296 prev_pc = 0;
298 n = 0;
299 while (1) {
301 int j;
303 this_pc = (n * 100) / max_pfn;
304 if ( (this_pc - prev_pc) >= 5 )
305 {
306 PPRINTF("\b\b\b\b%3d%%", this_pc);
307 prev_pc = this_pc;
308 }
310 if (!read_exact(io_fd, &j, sizeof(int))) {
311 ERROR("Error when reading batch size");
312 goto out;
313 }
315 PPRINTF("batch %d\n",j);
317 if (j == -1) {
318 verify = 1;
319 DPRINTF("Entering page verify mode\n");
320 continue;
321 }
323 if (j == 0)
324 break; /* our work here is done */
326 if (j > MAX_BATCH_SIZE) {
327 ERROR("Max batch size exceeded. Giving up.");
328 goto out;
329 }
331 if (!read_exact(io_fd, region_pfn_type, j*sizeof(unsigned long))) {
332 ERROR("Error when reading region pfn types");
333 goto out;
334 }
336 for ( i = 0; i < j; i++ )
337 {
338 unsigned long pfn, pagetype;
339 pfn = region_pfn_type[i] & ~XEN_DOMCTL_PFINFO_LTAB_MASK;
340 pagetype = region_pfn_type[i] & XEN_DOMCTL_PFINFO_LTAB_MASK;
342 if ( pagetype == XEN_DOMCTL_PFINFO_XTAB)
343 region_mfn[i] = 0; /* we know map will fail, but don't care */
344 else
345 region_mfn[i] = p2m[pfn];
346 }
348 region_base = xc_map_foreign_batch(
349 xc_handle, dom, PROT_WRITE, region_mfn, j);
350 if ( region_base == NULL )
351 {
352 ERROR("map batch failed");
353 goto out;
354 }
356 for ( i = 0; i < j; i++ )
357 {
358 void *page;
359 unsigned long pagetype;
361 pfn = region_pfn_type[i] & ~XEN_DOMCTL_PFINFO_LTAB_MASK;
362 pagetype = region_pfn_type[i] & XEN_DOMCTL_PFINFO_LTAB_MASK;
364 if ( pagetype == XEN_DOMCTL_PFINFO_XTAB )
365 /* a bogus/unmapped page: skip it */
366 continue;
368 if ( pfn > max_pfn )
369 {
370 ERROR("pfn out of range");
371 goto out;
372 }
374 pfn_type[pfn] = pagetype;
376 mfn = p2m[pfn];
378 /* In verify mode, we use a copy; otherwise we work in place */
379 page = verify ? (void *)buf : (region_base + i*PAGE_SIZE);
381 if (!read_exact(io_fd, page, PAGE_SIZE)) {
382 ERROR("Error when reading page (type was %lx)", pagetype);
383 goto out;
384 }
386 pagetype &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK;
388 if ( (pagetype >= XEN_DOMCTL_PFINFO_L1TAB) &&
389 (pagetype <= XEN_DOMCTL_PFINFO_L4TAB) )
390 {
391 /*
392 ** A page table page - need to 'uncanonicalize' it, i.e.
393 ** replace all the references to pfns with the corresponding
394 ** mfns for the new domain.
395 **
396 ** On PAE we need to ensure that PGDs are in MFNs < 4G, and
397 ** so we may need to update the p2m after the main loop.
398 ** Hence we defer canonicalization of L1s until then.
399 */
400 if ((pt_levels != 3) ||
401 pae_extended_cr3 ||
402 (pagetype != XEN_DOMCTL_PFINFO_L1TAB)) {
404 if (!uncanonicalize_pagetable(pagetype, page)) {
405 /*
406 ** Failing to uncanonicalize a page table can be ok
407 ** under live migration since the pages type may have
408 ** changed by now (and we'll get an update later).
409 */
410 DPRINTF("PT L%ld race on pfn=%08lx mfn=%08lx\n",
411 pagetype >> 28, pfn, mfn);
412 nraces++;
413 continue;
414 }
416 }
418 }
419 else if ( pagetype != XEN_DOMCTL_PFINFO_NOTAB )
420 {
421 ERROR("Bogus page type %lx page table is out of range: "
422 "i=%d max_pfn=%lu", pagetype, i, max_pfn);
423 goto out;
425 }
428 if (verify) {
430 int res = memcmp(buf, (region_base + i*PAGE_SIZE), PAGE_SIZE);
432 if (res) {
434 int v;
436 DPRINTF("************** pfn=%lx type=%lx gotcs=%08lx "
437 "actualcs=%08lx\n", pfn, pfn_type[pfn],
438 csum_page(region_base + i*PAGE_SIZE),
439 csum_page(buf));
441 for (v = 0; v < 4; v++) {
443 unsigned long *p = (unsigned long *)
444 (region_base + i*PAGE_SIZE);
445 if (buf[v] != p[v])
446 DPRINTF(" %d: %08lx %08lx\n", v, buf[v], p[v]);
447 }
448 }
449 }
451 if (xc_add_mmu_update(xc_handle, mmu,
452 (((unsigned long long)mfn) << PAGE_SHIFT)
453 | MMU_MACHPHYS_UPDATE, pfn)) {
454 ERROR("failed machpys update mfn=%lx pfn=%lx", mfn, pfn);
455 goto out;
456 }
457 } /* end of 'batch' for loop */
459 munmap(region_base, j*PAGE_SIZE);
460 n+= j; /* crude stats */
461 }
463 /*
464 * Ensure we flush all machphys updates before potential PAE-specific
465 * reallocations below.
466 */
467 if (xc_finish_mmu_updates(xc_handle, mmu)) {
468 ERROR("Error doing finish_mmu_updates()");
469 goto out;
470 }
472 DPRINTF("Received all pages (%d races)\n", nraces);
474 if ((pt_levels == 3) && !pae_extended_cr3) {
476 /*
477 ** XXX SMH on PAE we need to ensure PGDs are in MFNs < 4G. This
478 ** is a little awkward and involves (a) finding all such PGDs and
479 ** replacing them with 'lowmem' versions; (b) upating the p2m[]
480 ** with the new info; and (c) canonicalizing all the L1s using the
481 ** (potentially updated) p2m[].
482 **
483 ** This is relatively slow (and currently involves two passes through
484 ** the pfn_type[] array), but at least seems to be correct. May wish
485 ** to consider more complex approaches to optimize this later.
486 */
488 int j, k;
490 /* First pass: find all L3TABs current in > 4G mfns and get new mfns */
491 for ( i = 0; i < max_pfn; i++ )
492 {
493 if ( ((pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) ==
494 XEN_DOMCTL_PFINFO_L3TAB) &&
495 (p2m[i] > 0xfffffUL) )
496 {
497 unsigned long new_mfn;
498 uint64_t l3ptes[4];
499 uint64_t *l3tab;
501 l3tab = (uint64_t *)
502 xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
503 PROT_READ, p2m[i]);
505 for(j = 0; j < 4; j++)
506 l3ptes[j] = l3tab[j];
508 munmap(l3tab, PAGE_SIZE);
510 if (!(new_mfn=xc_make_page_below_4G(xc_handle, dom, p2m[i]))) {
511 ERROR("Couldn't get a page below 4GB :-(");
512 goto out;
513 }
515 p2m[i] = new_mfn;
516 if (xc_add_mmu_update(xc_handle, mmu,
517 (((unsigned long long)new_mfn)
518 << PAGE_SHIFT) |
519 MMU_MACHPHYS_UPDATE, i)) {
520 ERROR("Couldn't m2p on PAE root pgdir");
521 goto out;
522 }
524 l3tab = (uint64_t *)
525 xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
526 PROT_READ | PROT_WRITE, p2m[i]);
528 for(j = 0; j < 4; j++)
529 l3tab[j] = l3ptes[j];
531 munmap(l3tab, PAGE_SIZE);
533 }
534 }
536 /* Second pass: find all L1TABs and uncanonicalize them */
537 j = 0;
539 for ( i = 0; i < max_pfn; i++ )
540 {
541 if ( ((pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) ==
542 XEN_DOMCTL_PFINFO_L1TAB) )
543 {
544 region_mfn[j] = p2m[i];
545 j++;
546 }
548 if(i == (max_pfn-1) || j == MAX_BATCH_SIZE) {
550 if (!(region_base = xc_map_foreign_batch(
551 xc_handle, dom, PROT_READ | PROT_WRITE,
552 region_mfn, j))) {
553 ERROR("map batch failed");
554 goto out;
555 }
557 for(k = 0; k < j; k++) {
558 if(!uncanonicalize_pagetable(XEN_DOMCTL_PFINFO_L1TAB,
559 region_base + k*PAGE_SIZE)) {
560 ERROR("failed uncanonicalize pt!");
561 goto out;
562 }
563 }
565 munmap(region_base, j*PAGE_SIZE);
566 j = 0;
567 }
568 }
570 if (xc_finish_mmu_updates(xc_handle, mmu)) {
571 ERROR("Error doing finish_mmu_updates()");
572 goto out;
573 }
574 }
576 /*
577 * Pin page tables. Do this after writing to them as otherwise Xen
578 * will barf when doing the type-checking.
579 */
580 nr_pins = 0;
581 for ( i = 0; i < max_pfn; i++ )
582 {
583 if ( (pfn_type[i] & XEN_DOMCTL_PFINFO_LPINTAB) == 0 )
584 continue;
586 switch ( pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK )
587 {
588 case XEN_DOMCTL_PFINFO_L1TAB:
589 pin[nr_pins].cmd = MMUEXT_PIN_L1_TABLE;
590 break;
592 case XEN_DOMCTL_PFINFO_L2TAB:
593 pin[nr_pins].cmd = MMUEXT_PIN_L2_TABLE;
594 break;
596 case XEN_DOMCTL_PFINFO_L3TAB:
597 pin[nr_pins].cmd = MMUEXT_PIN_L3_TABLE;
598 break;
600 case XEN_DOMCTL_PFINFO_L4TAB:
601 pin[nr_pins].cmd = MMUEXT_PIN_L4_TABLE;
602 break;
604 default:
605 continue;
606 }
608 pin[nr_pins].arg1.mfn = p2m[i];
609 nr_pins++;
611 /* Batch full? Then flush. */
612 if (nr_pins == MAX_PIN_BATCH) {
613 if (xc_mmuext_op(xc_handle, pin, nr_pins, dom) < 0) {
614 ERROR("Failed to pin batch of %d page tables", nr_pins);
615 goto out;
616 }
617 nr_pins = 0;
618 }
619 }
621 /* Flush final partial batch. */
622 if ((nr_pins != 0) && (xc_mmuext_op(xc_handle, pin, nr_pins, dom) < 0)) {
623 ERROR("Failed to pin batch of %d page tables", nr_pins);
624 goto out;
625 }
627 DPRINTF("\b\b\b\b100%%\n");
628 DPRINTF("Memory reloaded.\n");
630 /* Get the list of PFNs that are not in the psuedo-phys map */
631 {
632 unsigned int count;
633 unsigned long *pfntab;
634 int rc;
636 if (!read_exact(io_fd, &count, sizeof(count))) {
637 ERROR("Error when reading pfn count");
638 goto out;
639 }
641 if(!(pfntab = malloc(sizeof(unsigned long) * count))) {
642 ERROR("Out of memory");
643 goto out;
644 }
646 if (!read_exact(io_fd, pfntab, sizeof(unsigned long)*count)) {
647 ERROR("Error when reading pfntab");
648 goto out;
649 }
651 for (i = 0; i < count; i++) {
653 unsigned long pfn = pfntab[i];
655 if(pfn > max_pfn)
656 /* shouldn't happen - continue optimistically */
657 continue;
659 pfntab[i] = p2m[pfn];
660 p2m[pfn] = INVALID_P2M_ENTRY; // not in pseudo-physical map
661 }
663 if (count > 0) {
665 struct xen_memory_reservation reservation = {
666 .nr_extents = count,
667 .extent_order = 0,
668 .domid = dom
669 };
670 set_xen_guest_handle(reservation.extent_start, pfntab);
672 if ((rc = xc_memory_op(xc_handle, XENMEM_decrease_reservation,
673 &reservation)) != count) {
674 ERROR("Could not decrease reservation : %d", rc);
675 goto out;
676 } else
677 DPRINTF("Decreased reservation by %d pages\n", count);
678 }
679 }
681 if (!read_exact(io_fd, &ctxt, sizeof(ctxt)) ||
682 !read_exact(io_fd, shared_info_page, PAGE_SIZE)) {
683 ERROR("Error when reading ctxt or shared info page");
684 goto out;
685 }
687 /* Uncanonicalise the suspend-record frame number and poke resume rec. */
688 pfn = ctxt.user_regs.edx;
689 if ((pfn >= max_pfn) || (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB)) {
690 ERROR("Suspend record frame number is bad");
691 goto out;
692 }
693 ctxt.user_regs.edx = mfn = p2m[pfn];
694 start_info = xc_map_foreign_range(
695 xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, mfn);
696 start_info->nr_pages = max_pfn;
697 start_info->shared_info = shared_info_frame << PAGE_SHIFT;
698 start_info->flags = 0;
699 *store_mfn = start_info->store_mfn = p2m[start_info->store_mfn];
700 start_info->store_evtchn = store_evtchn;
701 start_info->console.domU.mfn = p2m[start_info->console.domU.mfn];
702 start_info->console.domU.evtchn = console_evtchn;
703 *console_mfn = start_info->console.domU.mfn;
704 munmap(start_info, PAGE_SIZE);
706 /* Uncanonicalise each GDT frame number. */
707 if (ctxt.gdt_ents > 8192) {
708 ERROR("GDT entry count out of range");
709 goto out;
710 }
712 for (i = 0; i < ctxt.gdt_ents; i += 512) {
713 pfn = ctxt.gdt_frames[i];
714 if ((pfn >= max_pfn) || (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB)) {
715 ERROR("GDT frame number is bad");
716 goto out;
717 }
718 ctxt.gdt_frames[i] = p2m[pfn];
719 }
721 /* Uncanonicalise the page table base pointer. */
722 pfn = xen_cr3_to_pfn(ctxt.ctrlreg[3]);
724 if (pfn >= max_pfn) {
725 ERROR("PT base is bad: pfn=%lu max_pfn=%lu type=%08lx",
726 pfn, max_pfn, pfn_type[pfn]);
727 goto out;
728 }
730 if ( (pfn_type[pfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) !=
731 ((unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT) ) {
732 ERROR("PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx",
733 pfn, max_pfn, pfn_type[pfn],
734 (unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT);
735 goto out;
736 }
738 ctxt.ctrlreg[3] = xen_pfn_to_cr3(p2m[pfn]);
740 /* clear any pending events and the selector */
741 memset(&(shared_info->evtchn_pending[0]), 0,
742 sizeof (shared_info->evtchn_pending));
743 for ( i = 0; i < MAX_VIRT_CPUS; i++ )
744 shared_info->vcpu_info[i].evtchn_pending_sel = 0;
746 /* Copy saved contents of shared-info page. No checking needed. */
747 page = xc_map_foreign_range(
748 xc_handle, dom, PAGE_SIZE, PROT_WRITE, shared_info_frame);
749 memcpy(page, shared_info, PAGE_SIZE);
750 munmap(page, PAGE_SIZE);
752 /* Uncanonicalise the pfn-to-mfn table frame-number list. */
753 for (i = 0; i < P2M_FL_ENTRIES; i++) {
754 pfn = p2m_frame_list[i];
755 if ((pfn >= max_pfn) || (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB)) {
756 ERROR("PFN-to-MFN frame number is bad");
757 goto out;
758 }
760 p2m_frame_list[i] = p2m[pfn];
761 }
763 /* Copy the P2M we've constructed to the 'live' P2M */
764 if (!(live_p2m = xc_map_foreign_batch(xc_handle, dom, PROT_WRITE,
765 p2m_frame_list, P2M_FL_ENTRIES))) {
766 ERROR("Couldn't map p2m table");
767 goto out;
768 }
770 memcpy(live_p2m, p2m, P2M_SIZE);
771 munmap(live_p2m, P2M_SIZE);
773 DPRINTF("Domain ready to be built.\n");
775 domctl.cmd = XEN_DOMCTL_setvcpucontext;
776 domctl.domain = (domid_t)dom;
777 domctl.u.vcpucontext.vcpu = 0;
778 set_xen_guest_handle(domctl.u.vcpucontext.ctxt, &ctxt);
779 rc = xc_domctl(xc_handle, &domctl);
781 if (rc != 0) {
782 ERROR("Couldn't build the domain");
783 goto out;
784 }
786 out:
787 if ( (rc != 0) && (dom != 0) )
788 xc_domain_destroy(xc_handle, dom);
789 free(mmu);
790 free(p2m);
791 free(pfn_type);
793 DPRINTF("Restore exit with rc=%d\n", rc);
795 return rc;
796 }