ia64/xen-unstable

view tools/internal/xi_save_linux.c @ 910:7c2e9edb5190

bitkeeper revision 1.576 (3facdede5nZbIb45xqApby8e8U5CQA)

xi_save_linux.c, xi_restore_linux.c, Makefile:
Suspend/resume now uses zlib to reduce the state file size.
author kaf24@scramble.cl.cam.ac.uk
date Sat Nov 08 12:17:34 2003 +0000 (2003-11-08)
parents 90ae2bc0ebee
children
line source
1 /******************************************************************************
2 * xi_save_linux.c
3 *
4 * Save the state of a running Xenolinux session.
5 *
6 * Copyright (c) 2003, K A Fraser.
7 */
9 #include "dom0_defs.h"
10 #include "mem_defs.h"
11 #include <asm-xeno/suspend.h>
13 #include <zlib.h>
15 static char *argv0 = "internal_save_linux";
17 /* A table mapping each PFN to its current MFN. */
18 static unsigned long *pfn_to_mfn_table;
19 /* A table mapping each current MFN to its canonical PFN. */
20 static unsigned long *mfn_to_pfn_table;
22 /* This may allow us to create a 'quiet' command-line option, if necessary. */
23 #define verbose_printf(_f, _a...) \
24 do { \
25 printf( _f , ## _a ); \
26 fflush(stdout); \
27 } while ( 0 )
29 static int devmem_fd;
31 static int init_pfn_mapper(void)
32 {
33 if ( (devmem_fd = open("/dev/mem", O_RDWR)) < 0 )
34 {
35 PERROR("Could not open /dev/mem");
36 return -1;
37 }
38 return 0;
39 }
41 static void *map_pfn(unsigned long pfn)
42 {
43 void *vaddr = mmap(NULL, PAGE_SIZE, PROT_READ|PROT_WRITE,
44 MAP_SHARED, devmem_fd, pfn << PAGE_SHIFT);
45 if ( vaddr == MAP_FAILED )
46 {
47 PERROR("Could not mmap a domain pfn using /dev/mem");
48 return NULL;
49 }
50 return vaddr;
51 }
53 static void unmap_pfn(void *vaddr)
54 {
55 (void)munmap(vaddr, PAGE_SIZE);
56 }
58 /*
59 * Returns TRUE if the given machine frame number has a unique mapping
60 * in the guest's pseudophysical map.
61 */
62 #define MFN_IS_IN_PSEUDOPHYS_MAP(_mfn) \
63 (((_mfn) < (1024*1024)) && \
64 (pfn_to_mfn_table[mfn_to_pfn_table[_mfn]] == (_mfn)))
66 /* Returns TRUE if MFN is successfully converted to a PFN. */
67 static int translate_mfn_to_pfn(unsigned long *pmfn)
68 {
69 unsigned long mfn = *pmfn;
70 if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )
71 return 0;
72 *pmfn = mfn_to_pfn_table[mfn];
73 return 1;
74 }
76 static int check_pfn_ownership(unsigned long mfn, unsigned int dom)
77 {
78 dom0_op_t op;
79 op.cmd = DOM0_GETPAGEFRAMEINFO;
80 op.u.getpageframeinfo.pfn = mfn;
81 if ( (do_dom0_op(&op) < 0) || (op.u.getpageframeinfo.domain != dom) )
82 return 0;
83 return 1;
84 }
86 static unsigned int get_pfn_type(unsigned long mfn)
87 {
88 dom0_op_t op;
89 op.cmd = DOM0_GETPAGEFRAMEINFO;
90 op.u.getpageframeinfo.pfn = mfn;
91 if ( do_dom0_op(&op) < 0 )
92 {
93 PERROR("Unexpected failure when getting page frame info!");
94 exit(1);
95 }
96 return op.u.getpageframeinfo.type;
97 }
99 static int checked_write(gzFile fd, void *buf, size_t count)
100 {
101 int rc;
102 while ( ((rc = gzwrite(fd, buf, count)) == -1) && (errno = EINTR) )
103 continue;
104 return rc == count;
105 }
107 int main(int argc, char **argv)
108 {
109 dom0_op_t op;
110 int rc = 1, i, j;
111 unsigned long mfn, dom;
112 unsigned int prev_pc, this_pc;
114 /* Remember if we stopped the guest, so we can restart it on exit. */
115 int we_stopped_it = 0;
117 /* The new domain's shared-info frame number. */
118 unsigned long shared_info_frame;
120 /* A copy of the CPU context of the guest. */
121 full_execution_context_t ctxt;
123 /* A copy of the domain's name. */
124 char name[MAX_DOMAIN_NAME];
126 /* A table containg the type of each PFN (/not/ MFN!). */
127 unsigned long *pfn_type;
129 /* A temporary mapping, and a copy, of one frame of guest memory. */
130 unsigned long *ppage, page[1024];
132 /* A temporary mapping, and a copy, of the pfn-to-mfn table frame list. */
133 unsigned long *p_pfn_to_mfn_frame_list, pfn_to_mfn_frame_list[1024];
134 /* A temporary mapping of one frame in the above list. */
135 unsigned long *pfn_to_mfn_frame;
137 /* A temporary mapping, and a copy, of the guest's suspend record. */
138 suspend_record_t *p_srec, srec;
140 /* The name and descriptor of the file that we are writing to. */
141 char *filename;
142 int fd;
143 gzFile gfd;
145 if ( argv[0] != NULL )
146 argv0 = argv[0];
148 if ( argc != 3 )
149 {
150 fprintf(stderr, "Usage: %s <domain_id> <state file>\n", argv0);
151 return 1;
152 }
154 dom = atoi(argv[1]);
155 if ( dom == 0 )
156 {
157 ERROR("Did you really mean domain 0?");
158 return 1;
159 }
161 filename = argv[2];
162 if ( (fd = open(filename, O_CREAT|O_EXCL|O_WRONLY, 0644)) == -1 )
163 {
164 PERROR("Could not open file for writing");
165 return 1;
166 }
168 /*
169 * Compression rate 1: we want speed over compression. We're mainly going
170 * for those zero pages, after all.
171 */
172 if ( (gfd = gzdopen(fd, "wb1")) == NULL )
173 {
174 ERROR("Could not allocate compression state for state file");
175 close(fd);
176 return 1;
177 }
179 /* Ensure that the domain exists, and that it is stopped. */
180 for ( ; ; )
181 {
182 op.cmd = DOM0_GETDOMAININFO;
183 op.u.getdomaininfo.domain = dom;
184 if ( (do_dom0_op(&op) < 0) || (op.u.getdomaininfo.domain != dom) )
185 {
186 PERROR("Could not get info on domain");
187 goto out;
188 }
190 memcpy(&ctxt, &op.u.getdomaininfo.ctxt, sizeof(ctxt));
191 memcpy(name, op.u.getdomaininfo.name, sizeof(name));
192 shared_info_frame = op.u.getdomaininfo.shared_info_frame;
194 if ( op.u.getdomaininfo.state == DOMSTATE_STOPPED )
195 break;
197 we_stopped_it = 1;
199 op.cmd = DOM0_STOPDOMAIN;
200 op.u.stopdomain.domain = dom;
201 (void)do_dom0_op(&op);
203 sleep(1);
204 }
206 /* A cheesy test to see whether the domain contains valid state. */
207 if ( ctxt.pt_base == 0 )
208 {
209 ERROR("Domain is not in a valid Xenolinux state");
210 goto out;
211 }
213 if ( init_pfn_mapper() < 0 )
214 goto out;
216 /* Is the suspend-record MFN actually valid for this domain? */
217 if ( !check_pfn_ownership(ctxt.i386_ctxt.esi, dom) )
218 {
219 ERROR("Invalid state record pointer");
220 goto out;
221 }
223 /* If the suspend-record MFN is okay then grab a copy of it to @srec. */
224 p_srec = map_pfn(ctxt.i386_ctxt.esi);
225 memcpy(&srec, p_srec, sizeof(srec));
226 unmap_pfn(p_srec);
228 if ( srec.nr_pfns > 1024*1024 )
229 {
230 ERROR("Invalid state record -- pfn count out of range");
231 goto out;
232 }
234 if ( !check_pfn_ownership(srec.pfn_to_mfn_frame_list, dom) )
235 {
236 ERROR("Invalid pfn-to-mfn frame list pointer");
237 goto out;
238 }
240 /* Grab a copy of the pfn-to-mfn table frame list. */
241 p_pfn_to_mfn_frame_list = map_pfn(srec.pfn_to_mfn_frame_list);
242 memcpy(pfn_to_mfn_frame_list, p_pfn_to_mfn_frame_list, PAGE_SIZE);
243 unmap_pfn(p_pfn_to_mfn_frame_list);
245 /* We want zeroed memory so use calloc rather than malloc. */
246 mfn_to_pfn_table = calloc(1, 4 * 1024 * 1024);
247 pfn_to_mfn_table = calloc(1, 4 * srec.nr_pfns);
248 pfn_type = calloc(1, 4 * srec.nr_pfns);
250 /*
251 * Construct the local pfn-to-mfn and mfn-to-pfn tables. On exit from this
252 * loop we have each MFN mapped at most once. Note that there may be MFNs
253 * that aren't mapped at all: we detect these by MFN_IS_IN_PSEUDOPHYS_MAP.
254 */
255 pfn_to_mfn_frame = NULL;
256 for ( i = 0; i < srec.nr_pfns; i++ )
257 {
258 /* Each frameful of table frames must be checked & mapped on demand. */
259 if ( (i & 1023) == 0 )
260 {
261 mfn = pfn_to_mfn_frame_list[i/1024];
262 if ( !check_pfn_ownership(mfn, dom) )
263 {
264 ERROR("Invalid frame number if pfn-to-mfn frame list");
265 goto out;
266 }
267 if ( pfn_to_mfn_frame != NULL )
268 unmap_pfn(pfn_to_mfn_frame);
269 pfn_to_mfn_frame = map_pfn(mfn);
270 }
272 mfn = pfn_to_mfn_frame[i & 1023];
274 if ( !check_pfn_ownership(mfn, dom) )
275 {
276 ERROR("Invalid frame specified with pfn-to-mfn table");
277 goto out;
278 }
280 /* Did we map this MFN already? That would be invalid! */
281 if ( MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )
282 {
283 ERROR("A machine frame appears twice in pseudophys space");
284 goto out;
285 }
287 pfn_to_mfn_table[i] = mfn;
288 mfn_to_pfn_table[mfn] = i;
290 /* Query page type by MFN, but store it by PFN. */
291 pfn_type[i] = get_pfn_type(mfn);
292 }
294 /* Canonicalise the suspend-record frame number. */
295 if ( !translate_mfn_to_pfn(&ctxt.i386_ctxt.esi) )
296 {
297 ERROR("State record is not in range of pseudophys map");
298 goto out;
299 }
301 /* Canonicalise each GDT frame number. */
302 for ( i = 0; i < ctxt.gdt_ents; i += 512 )
303 {
304 if ( !translate_mfn_to_pfn(&ctxt.gdt_frames[i]) )
305 {
306 ERROR("GDT frame is not in range of pseudophys map");
307 goto out;
308 }
309 }
311 /* Canonicalise the page table base pointer. */
312 if ( !MFN_IS_IN_PSEUDOPHYS_MAP(ctxt.pt_base >> PAGE_SHIFT) )
313 {
314 ERROR("PT base is not in range of pseudophys map");
315 goto out;
316 }
317 ctxt.pt_base = mfn_to_pfn_table[ctxt.pt_base >> PAGE_SHIFT] << PAGE_SHIFT;
319 /* Canonicalise the pfn-to-mfn table frame-number list. */
320 for ( i = 0; i < srec.nr_pfns; i += 1024 )
321 {
322 if ( !translate_mfn_to_pfn(&pfn_to_mfn_frame_list[i/1024]) )
323 {
324 ERROR("Frame # in pfn-to-mfn frame list is not in pseudophys");
325 goto out;
326 }
327 }
329 /* Start writing out the saved-domain record. */
330 ppage = map_pfn(shared_info_frame);
331 if ( !checked_write(gfd, "XenoLinuxSuspend", 16) ||
332 !checked_write(gfd, name, sizeof(name)) ||
333 !checked_write(gfd, &srec.nr_pfns, sizeof(unsigned long)) ||
334 !checked_write(gfd, &ctxt, sizeof(ctxt)) ||
335 !checked_write(gfd, ppage, PAGE_SIZE) ||
336 !checked_write(gfd, pfn_to_mfn_frame_list, PAGE_SIZE) ||
337 !checked_write(gfd, pfn_type, 4 * srec.nr_pfns) )
338 {
339 ERROR("Error when writing to state file");
340 goto out;
341 }
342 unmap_pfn(ppage);
344 verbose_printf("Saving memory pages: 0%%");
346 /* Now write out each data page, canonicalising page tables as we go... */
347 prev_pc = 0;
348 for ( i = 0; i < srec.nr_pfns; i++ )
349 {
350 this_pc = (i * 100) / srec.nr_pfns;
351 if ( (this_pc - prev_pc) >= 5 )
352 {
353 verbose_printf("\b\b\b\b%3d%%", this_pc);
354 prev_pc = this_pc;
355 }
357 mfn = pfn_to_mfn_table[i];
359 ppage = map_pfn(mfn);
360 memcpy(page, ppage, PAGE_SIZE);
361 unmap_pfn(ppage);
363 if ( (pfn_type[i] == L1TAB) || (pfn_type[i] == L2TAB) )
364 {
365 for ( j = 0;
366 j < ((pfn_type[i] == L2TAB) ?
367 (HYPERVISOR_VIRT_START >> L2_PAGETABLE_SHIFT) : 1024);
368 j++ )
369 {
370 if ( !(page[j] & _PAGE_PRESENT) ) continue;
371 mfn = page[j] >> PAGE_SHIFT;
372 if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )
373 {
374 ERROR("Frame number in pagetable page is invalid");
375 goto out;
376 }
377 page[j] &= PAGE_SIZE - 1;
378 page[j] |= mfn_to_pfn_table[mfn] << PAGE_SHIFT;
379 }
380 }
382 if ( !checked_write(gfd, page, PAGE_SIZE) )
383 {
384 ERROR("Error when writing to state file");
385 goto out;
386 }
387 }
389 verbose_printf("\b\b\b\b100%%\nMemory saved.\n");
391 /* Success! */
392 rc = 0;
394 out:
395 /* Restart the domain if we had to stop it to save its state. */
396 if ( we_stopped_it )
397 {
398 op.cmd = DOM0_STARTDOMAIN;
399 op.u.startdomain.domain = dom;
400 (void)do_dom0_op(&op);
401 }
403 gzclose(gfd);
405 /* On error, make sure the file is deleted. */
406 if ( rc != 0 )
407 unlink(filename);
409 return !!rc;
410 }