ia64/xen-unstable

view tools/internal/xi_restore_linux.c @ 910:7c2e9edb5190

bitkeeper revision 1.576 (3facdede5nZbIb45xqApby8e8U5CQA)

xi_save_linux.c, xi_restore_linux.c, Makefile:
Suspend/resume now uses zlib to reduce the state file size.
author kaf24@scramble.cl.cam.ac.uk
date Sat Nov 08 12:17:34 2003 +0000 (2003-11-08)
parents 90ae2bc0ebee
children
line source
1 /******************************************************************************
2 * xi_restore_linux.c
3 *
4 * Restore the state of a Xenolinux session.
5 *
6 * Copyright (c) 2003, K A Fraser.
7 */
9 #include "dom0_defs.h"
10 #include "mem_defs.h"
11 #include <asm-xeno/suspend.h>
13 #include <zlib.h>
15 static char *argv0 = "internal_restore_linux";
17 /* A table mapping each PFN to its new MFN. */
18 static unsigned long *pfn_to_mfn_table;
20 /* This may allow us to create a 'quiet' command-line option, if necessary. */
21 #define verbose_printf(_f, _a...) \
22 do { \
23 printf( _f , ## _a ); \
24 fflush(stdout); \
25 } while ( 0 )
27 static int get_pfn_list(
28 int domain_id, unsigned long *pfn_buf, unsigned long max_pfns)
29 {
30 dom0_op_t op;
31 int ret;
32 op.cmd = DOM0_GETMEMLIST;
33 op.u.getmemlist.domain = domain_id;
34 op.u.getmemlist.max_pfns = max_pfns;
35 op.u.getmemlist.buffer = pfn_buf;
37 if ( mlock(pfn_buf, max_pfns * sizeof(unsigned long)) != 0 )
38 {
39 PERROR("Could not lock pfn list buffer");
40 return -1;
41 }
43 ret = do_dom0_op(&op);
45 (void)munlock(pfn_buf, max_pfns * sizeof(unsigned long));
47 return (ret < 0) ? -1 : op.u.getmemlist.num_pfns;
48 }
50 #define MAX_MMU_UPDATES 1024
51 static mmu_update_t mmu_updates[MAX_MMU_UPDATES];
52 static int mmu_update_idx;
54 static int flush_mmu_updates(void)
55 {
56 int err = 0;
57 privcmd_hypercall_t hypercall;
59 if ( mmu_update_idx == 0 )
60 return 0;
62 hypercall.op = __HYPERVISOR_mmu_update;
63 hypercall.arg[0] = (unsigned long)mmu_updates;
64 hypercall.arg[1] = (unsigned long)mmu_update_idx;
66 if ( mlock(mmu_updates, sizeof(mmu_updates)) != 0 )
67 {
68 PERROR("Could not lock pagetable update array");
69 err = 1;
70 goto out;
71 }
73 if ( do_xen_hypercall(&hypercall) < 0 )
74 {
75 ERROR("Failure when submitting mmu updates");
76 err = 1;
77 }
79 mmu_update_idx = 0;
81 (void)munlock(mmu_updates, sizeof(mmu_updates));
83 out:
84 return err;
85 }
87 static int add_mmu_update(unsigned long ptr, unsigned long val)
88 {
89 mmu_updates[mmu_update_idx].ptr = ptr;
90 mmu_updates[mmu_update_idx].val = val;
91 if ( ++mmu_update_idx == MAX_MMU_UPDATES )
92 return flush_mmu_updates();
93 return 0;
94 }
96 static int devmem_fd;
98 static int init_pfn_mapper(void)
99 {
100 if ( (devmem_fd = open("/dev/mem", O_RDWR)) < 0 )
101 {
102 PERROR("Could not open /dev/mem");
103 return -1;
104 }
105 return 0;
106 }
108 static void *map_pfn(unsigned long pfn)
109 {
110 void *vaddr = mmap(NULL, PAGE_SIZE, PROT_READ|PROT_WRITE,
111 MAP_SHARED, devmem_fd, pfn << PAGE_SHIFT);
112 if ( vaddr == MAP_FAILED )
113 {
114 PERROR("Could not mmap a domain pfn using /dev/mem");
115 return NULL;
116 }
117 return vaddr;
118 }
120 static void unmap_pfn(void *vaddr)
121 {
122 (void)munmap(vaddr, PAGE_SIZE);
123 }
125 static int checked_read(gzFile fd, void *buf, size_t count)
126 {
127 int rc;
128 while ( ((rc = gzread(fd, buf, count)) == -1) && (errno == EINTR) )
129 continue;
130 return rc == count;
131 }
133 int main(int argc, char **argv)
134 {
135 dom0_op_t op;
136 int rc = 1, i, j;
137 unsigned long mfn, pfn, dom = 0;
138 unsigned int prev_pc, this_pc;
140 /* Number of page frames in use by this XenoLinux session. */
141 unsigned long nr_pfns;
143 /* The new domain's shared-info frame number. */
144 unsigned long shared_info_frame;
145 unsigned char shared_info[PAGE_SIZE]; /* saved contents from file */
147 /* A copy of the CPU context of the guest. */
148 full_execution_context_t ctxt;
150 /* First 16 bytes of the state file must contain 'XenoLinuxSuspend'. */
151 char signature[16];
153 /* A copy of the domain's name. */
154 char name[MAX_DOMAIN_NAME];
156 /* A table containg the type of each PFN (/not/ MFN!). */
157 unsigned long *pfn_type;
159 /* A temporary mapping, and a copy, of one frame of guest memory. */
160 unsigned long *ppage, page[1024];
162 /* A copy of the pfn-to-mfn table frame list. */
163 unsigned long pfn_to_mfn_frame_list[1024];
165 /* A temporary mapping of the guest's suspend record. */
166 suspend_record_t *p_srec;
168 /* The name and descriptor of the file that we are reading from. */
169 char *filename;
170 int fd;
171 gzFile gfd;
173 if ( argv[0] != NULL )
174 argv0 = argv[0];
176 if ( argc != 2 )
177 {
178 fprintf(stderr, "Usage: %s <state file>\n", argv0);
179 return 1;
180 }
182 filename = argv[1];
183 if ( (fd = open(filename, O_RDONLY)) == -1 )
184 {
185 PERROR("Could not open state file for reading");
186 return 1;
187 }
189 if ( (gfd = gzdopen(fd, "rb")) == NULL )
190 {
191 ERROR("Could not allocate decompression state for state file");
192 close(fd);
193 return 1;
194 }
196 /* Start writing out the saved-domain record. */
197 if ( !checked_read(gfd, signature, 16) ||
198 (memcmp(signature, "XenoLinuxSuspend", 16) != 0) )
199 {
200 ERROR("Unrecognised state format -- no signature found");
201 goto out;
202 }
204 if ( !checked_read(gfd, name, sizeof(name)) ||
205 !checked_read(gfd, &nr_pfns, sizeof(unsigned long)) ||
206 !checked_read(gfd, &ctxt, sizeof(ctxt)) ||
207 !checked_read(gfd, shared_info, PAGE_SIZE) ||
208 !checked_read(gfd, pfn_to_mfn_frame_list, PAGE_SIZE) )
209 {
210 ERROR("Error when reading from state file");
211 goto out;
212 }
214 for ( i = 0; i < MAX_DOMAIN_NAME; i++ )
215 {
216 if ( name[i] == '\0' ) break;
217 if ( name[i] & 0x80 )
218 {
219 ERROR("Random characters in domain name");
220 goto out;
221 }
222 }
223 name[MAX_DOMAIN_NAME-1] = '\0';
225 if ( nr_pfns > 1024*1024 )
226 {
227 ERROR("Invalid state file -- pfn count out of range");
228 goto out;
229 }
231 /* We want zeroed memory so use calloc rather than malloc. */
232 pfn_to_mfn_table = calloc(1, 4 * nr_pfns);
233 pfn_type = calloc(1, 4 * nr_pfns);
235 if ( !checked_read(gfd, pfn_type, 4 * nr_pfns) )
236 {
237 ERROR("Error when reading from state file");
238 goto out;
239 }
241 /* Create a new domain of the appropriate size, and find it's dom_id. */
242 op.cmd = DOM0_CREATEDOMAIN;
243 op.u.createdomain.memory_kb = nr_pfns * (PAGE_SIZE / 1024);
244 memcpy(op.u.createdomain.name, name, MAX_DOMAIN_NAME);
245 if ( do_dom0_op(&op) < 0 )
246 {
247 ERROR("Could not create new domain");
248 goto out;
249 }
250 dom = op.u.createdomain.domain;
252 /* Get the domain's shared-info frame. */
253 op.cmd = DOM0_GETDOMAININFO;
254 op.u.getdomaininfo.domain = dom;
255 if ( do_dom0_op(&op) < 0 )
256 {
257 ERROR("Could not get information on new domain");
258 goto out;
259 }
260 shared_info_frame = op.u.getdomaininfo.shared_info_frame;
262 if ( init_pfn_mapper() < 0 )
263 goto out;
265 /* Copy saved contents of shared-info page. No checking needed. */
266 ppage = map_pfn(shared_info_frame);
267 memcpy(ppage, shared_info, PAGE_SIZE);
268 unmap_pfn(ppage);
270 /* Build the pfn-to-mfn table. We choose MFN ordering returned by Xen. */
271 if ( get_pfn_list(dom, pfn_to_mfn_table, nr_pfns) != nr_pfns )
272 {
273 ERROR("Did not read correct number of frame numbers for new dom");
274 goto out;
275 }
277 verbose_printf("Reloading memory pages: 0%%");
279 /*
280 * Now simply read each saved frame into its new machine frame.
281 * We uncanonicalise page tables as we go.
282 */
283 prev_pc = 0;
284 for ( i = 0; i < nr_pfns; i++ )
285 {
286 this_pc = (i * 100) / nr_pfns;
287 if ( (this_pc - prev_pc) >= 5 )
288 {
289 verbose_printf("\b\b\b\b%3d%%", this_pc);
290 prev_pc = this_pc;
291 }
293 mfn = pfn_to_mfn_table[i];
295 if ( !checked_read(gfd, page, PAGE_SIZE) )
296 {
297 ERROR("Error when reading from state file");
298 goto out;
299 }
301 ppage = map_pfn(mfn);
302 switch ( pfn_type[i] )
303 {
304 case L1TAB:
305 memset(ppage, 0, PAGE_SIZE);
306 if ( add_mmu_update((mfn<<PAGE_SHIFT) | MMU_EXTENDED_COMMAND,
307 MMUEXT_PIN_L1_TABLE) )
308 goto out;
309 for ( j = 0; j < 1024; j++ )
310 {
311 if ( page[j] & _PAGE_PRESENT )
312 {
313 if ( (pfn = page[j] >> PAGE_SHIFT) >= nr_pfns )
314 {
315 ERROR("Frame number in page table is out of range");
316 goto out;
317 }
318 if ( (pfn_type[pfn] != NONE) && (page[j] & _PAGE_RW) )
319 {
320 ERROR("Write access requested for a restricted frame");
321 goto out;
322 }
323 page[j] &= (PAGE_SIZE - 1) & ~(_PAGE_GLOBAL | _PAGE_PAT);
324 page[j] |= pfn_to_mfn_table[pfn] << PAGE_SHIFT;
325 }
326 if ( add_mmu_update((unsigned long)&ppage[j], page[j]) )
327 goto out;
328 }
329 break;
330 case L2TAB:
331 memset(ppage, 0, PAGE_SIZE);
332 if ( add_mmu_update((mfn<<PAGE_SHIFT) | MMU_EXTENDED_COMMAND,
333 MMUEXT_PIN_L2_TABLE) )
334 goto out;
335 for ( j = 0; j < (HYPERVISOR_VIRT_START>>L2_PAGETABLE_SHIFT); j++ )
336 {
337 if ( page[j] & _PAGE_PRESENT )
338 {
339 if ( (pfn = page[j] >> PAGE_SHIFT) >= nr_pfns )
340 {
341 ERROR("Frame number in page table is out of range");
342 goto out;
343 }
344 if ( pfn_type[pfn] != L1TAB )
345 {
346 ERROR("Page table mistyping");
347 goto out;
348 }
349 /* Haven't reached the L1 table yet. Ensure it is safe! */
350 if ( pfn > i )
351 {
352 unsigned long **l1 = map_pfn(pfn_to_mfn_table[pfn]);
353 memset(l1, 0, PAGE_SIZE);
354 unmap_pfn(l1);
355 }
356 page[j] &= (PAGE_SIZE - 1) & ~(_PAGE_GLOBAL | _PAGE_PSE);
357 page[j] |= pfn_to_mfn_table[pfn] << PAGE_SHIFT;
358 }
359 if ( add_mmu_update((unsigned long)&ppage[j], page[j]) )
360 goto out;
361 }
362 break;
363 default:
364 memcpy(ppage, page, PAGE_SIZE);
365 break;
366 }
367 /* NB. Must flush before unmapping page, as pass VAs to Xen. */
368 if ( flush_mmu_updates() )
369 goto out;
370 unmap_pfn(ppage);
372 if ( add_mmu_update((mfn<<PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, i) )
373 goto out;
374 }
376 if ( flush_mmu_updates() )
377 goto out;
379 verbose_printf("\b\b\b\b100%%\nMemory reloaded.\n");
381 /* Uncanonicalise the suspend-record frame number and poke resume rec. */
382 pfn = ctxt.i386_ctxt.esi;
383 if ( (pfn >= nr_pfns) || (pfn_type[pfn] != NONE) )
384 {
385 ERROR("Suspend record frame number is bad");
386 goto out;
387 }
388 ctxt.i386_ctxt.esi = mfn = pfn_to_mfn_table[pfn];
389 p_srec = map_pfn(mfn);
390 p_srec->resume_info.nr_pages = nr_pfns;
391 p_srec->resume_info.shared_info = shared_info_frame << PAGE_SHIFT;
392 p_srec->resume_info.dom_id = dom;
393 p_srec->resume_info.flags = 0;
394 unmap_pfn(p_srec);
396 /* Uncanonicalise each GDT frame number. */
397 if ( ctxt.gdt_ents > 8192 )
398 {
399 ERROR("GDT entry count out of range");
400 goto out;
401 }
402 for ( i = 0; i < ctxt.gdt_ents; i += 512 )
403 {
404 pfn = ctxt.gdt_frames[i];
405 if ( (pfn >= nr_pfns) || (pfn_type[pfn] != NONE) )
406 {
407 ERROR("GDT frame number is bad");
408 goto out;
409 }
410 ctxt.gdt_frames[i] = pfn_to_mfn_table[pfn];
411 }
413 /* Uncanonicalise the page table base pointer. */
414 pfn = ctxt.pt_base >> PAGE_SHIFT;
415 if ( (pfn >= nr_pfns) || (pfn_type[pfn] != L2TAB) )
416 {
417 ERROR("PT base is bad");
418 goto out;
419 }
420 ctxt.pt_base = pfn_to_mfn_table[pfn] << PAGE_SHIFT;
422 /* Uncanonicalise the pfn-to-mfn table frame-number list. */
423 for ( i = 0; i < nr_pfns; i += 1024 )
424 {
425 unsigned long copy_size = (nr_pfns - i) * sizeof(unsigned long);
426 if ( copy_size > PAGE_SIZE ) copy_size = PAGE_SIZE;
427 pfn = pfn_to_mfn_frame_list[i/1024];
428 if ( (pfn >= nr_pfns) || (pfn_type[pfn] != NONE) )
429 {
430 ERROR("PFN-to-MFN frame number is bad");
431 goto out;
432 }
433 ppage = map_pfn(pfn_to_mfn_table[pfn]);
434 memcpy(ppage, &pfn_to_mfn_table[i], copy_size);
435 unmap_pfn(ppage);
436 }
438 /*
439 * Safety checking of saved context:
440 * 1. i386_ctxt is fine, as Xen checks that on context switch.
441 * 2. i387_ctxt is fine, as it can't hurt Xen.
442 * 3. trap_ctxt needs the code selectors checked.
443 * 4. fast_trap_idx is checked by Xen.
444 * 5. ldt base must be page-aligned, no more than 8192 ents, ...
445 * 6. gdt already done, and further checking is done by Xen.
446 * 7. check that ring1_ss is safe.
447 * 8. pt_base is already done.
448 * 9. debugregs are checked by Xen.
449 * 10. callback code selectors need checking.
450 */
451 for ( i = 0; i < 256; i++ )
452 {
453 ctxt.trap_ctxt[i].vector = i;
454 if ( (ctxt.trap_ctxt[i].cs & 3) == 0 )
455 ctxt.trap_ctxt[i].cs = FLAT_RING1_CS;
456 }
457 if ( (ctxt.ring1_ss & 3) == 0 )
458 ctxt.ring1_ss = FLAT_RING1_DS;
459 if ( (ctxt.event_callback_cs & 3) == 0 )
460 ctxt.event_callback_cs = FLAT_RING1_CS;
461 if ( (ctxt.failsafe_callback_cs & 3) == 0 )
462 ctxt.failsafe_callback_cs = FLAT_RING1_CS;
463 if ( ((ctxt.ldt_base & (PAGE_SIZE - 1)) != 0) ||
464 (ctxt.ldt_ents > 8192) ||
465 (ctxt.ldt_base > HYPERVISOR_VIRT_START) ||
466 ((ctxt.ldt_base + ctxt.ldt_ents*8) > HYPERVISOR_VIRT_START) )
467 {
468 ERROR("Bad LDT base or size");
469 goto out;
470 }
472 op.cmd = DOM0_BUILDDOMAIN;
473 op.u.builddomain.domain = dom;
474 op.u.builddomain.num_vifs = 1;
475 memcpy(&op.u.builddomain.ctxt, &ctxt, sizeof(ctxt));
476 rc = do_dom0_op(&op);
478 out:
479 if ( rc != 0 )
480 {
481 if ( dom != 0 )
482 {
483 op.cmd = DOM0_DESTROYDOMAIN;
484 op.u.destroydomain.domain = dom;
485 op.u.destroydomain.force = 1;
486 (void)do_dom0_op(&op);
487 }
488 }
489 else
490 {
491 /* Success: print the domain id. */
492 printf("DOM=%ld\n", dom);
493 }
495 gzclose(gfd);
497 return !!rc;
498 }