ia64/xen-unstable

view tools/xcutils/xc_save.c @ 18447:f2bd9bbb0593

xc_save: ignore the first suspend event channel notification

I've noticed that the suspend event channel becomes pending as soon as
it is bound. I'm not sure why or whether this is intentional, but it
means that the suspend function will return before the domain has
completed suspending unless the first notification is cleared. Without
this patch, xc_domain_save may find that the guest has not suspended
and sleep in 10ms chunks until it does. Typically this is several
milliseconds of wasted time.

From: Brendan Cully <brendan@cs.ubc.ca>
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Mon Sep 08 11:16:23 2008 +0100 (2008-09-08)
parents a7586ec158d0
children 694b7daa353c
line source
1 /*
2 * This file is subject to the terms and conditions of the GNU General
3 * Public License. See the file "COPYING" in the main directory of
4 * this archive for more details.
5 *
6 * Copyright (C) 2005 by Christian Limpach
7 *
8 */
10 #include <err.h>
11 #include <stdlib.h>
12 #include <stdint.h>
13 #include <string.h>
14 #include <stdio.h>
15 #include <sys/ipc.h>
16 #include <sys/shm.h>
17 #include <sys/types.h>
18 #include <sys/stat.h>
19 #include <fcntl.h>
20 #include <err.h>
22 #include <xs.h>
23 #include <xenctrl.h>
24 #include <xenguest.h>
26 static struct suspendinfo {
27 int xce; /* event channel handle */
28 int suspend_evtchn;
29 } si;
31 /**
32 * Issue a suspend request through stdout, and receive the acknowledgement
33 * from stdin. This is handled by XendCheckpoint in the Python layer.
34 */
35 static int compat_suspend(void)
36 {
37 char ans[30];
39 printf("suspend\n");
40 fflush(stdout);
42 return (fgets(ans, sizeof(ans), stdin) != NULL &&
43 !strncmp(ans, "done\n", 5));
44 }
46 static int suspend_evtchn_release(void)
47 {
48 if (si.suspend_evtchn >= 0) {
49 xc_evtchn_unbind(si.xce, si.suspend_evtchn);
50 si.suspend_evtchn = -1;
51 }
52 if (si.xce >= 0) {
53 xc_evtchn_close(si.xce);
54 si.xce = -1;
55 }
57 return 0;
58 }
60 static int await_suspend(void)
61 {
62 int rc;
64 do {
65 rc = xc_evtchn_pending(si.xce);
66 if (rc < 0) {
67 warnx("error polling suspend notification channel: %d", rc);
68 return -1;
69 }
70 } while (rc != si.suspend_evtchn);
72 /* harmless for one-off suspend */
73 if (xc_evtchn_unmask(si.xce, si.suspend_evtchn) < 0)
74 warnx("failed to unmask suspend notification channel: %d", rc);
76 return 0;
77 }
79 static int suspend_evtchn_init(int xc, int domid)
80 {
81 struct xs_handle *xs;
82 char path[128];
83 char *portstr;
84 unsigned int plen;
85 int port;
86 int rc;
88 si.xce = -1;
89 si.suspend_evtchn = -1;
91 xs = xs_daemon_open();
92 if (!xs) {
93 warnx("failed to get xenstore handle");
94 return -1;
95 }
96 sprintf(path, "/local/domain/%d/device/suspend/event-channel", domid);
97 portstr = xs_read(xs, XBT_NULL, path, &plen);
98 xs_daemon_close(xs);
100 if (!portstr || !plen) {
101 warnx("could not read suspend event channel");
102 return -1;
103 }
105 port = atoi(portstr);
106 free(portstr);
108 si.xce = xc_evtchn_open();
109 if (si.xce < 0) {
110 warnx("failed to open event channel handle");
111 goto cleanup;
112 }
114 si.suspend_evtchn = xc_evtchn_bind_interdomain(si.xce, domid, port);
115 if (si.suspend_evtchn < 0) {
116 warnx("failed to bind suspend event channel: %d", si.suspend_evtchn);
117 goto cleanup;
118 }
120 rc = xc_domain_subscribe_for_suspend(xc, domid, port);
121 if (rc < 0) {
122 warnx("failed to subscribe to domain: %d", rc);
123 goto cleanup;
124 }
126 /* event channel is pending immediately after binding */
127 await_suspend();
129 return 0;
131 cleanup:
132 suspend_evtchn_release();
134 return -1;
135 }
137 /**
138 * Issue a suspend request to a dedicated event channel in the guest, and
139 * receive the acknowledgement from the subscribe event channel. */
140 static int evtchn_suspend(void)
141 {
142 int rc;
144 rc = xc_evtchn_notify(si.xce, si.suspend_evtchn);
145 if (rc < 0) {
146 warnx("failed to notify suspend request channel: %d", rc);
147 return 0;
148 }
150 if (await_suspend() < 0) {
151 warnx("suspend failed");
152 return 0;
153 }
155 /* notify xend that it can do device migration */
156 printf("suspended\n");
157 fflush(stdout);
159 return 1;
160 }
162 static int suspend(void)
163 {
164 if (si.suspend_evtchn >= 0)
165 return evtchn_suspend();
167 return compat_suspend();
168 }
170 /* For HVM guests, there are two sources of dirty pages: the Xen shadow
171 * log-dirty bitmap, which we get with a hypercall, and qemu's version.
172 * The protocol for getting page-dirtying data from qemu uses a
173 * double-buffered shared memory interface directly between xc_save and
174 * qemu-dm.
175 *
176 * xc_save calculates the size of the bitmaps and notifies qemu-dm
177 * through the store that it wants to share the bitmaps. qemu-dm then
178 * starts filling in the 'active' buffer.
179 *
180 * To change the buffers over, xc_save writes the other buffer number to
181 * the store and waits for qemu to acknowledge that it is now writing to
182 * the new active buffer. xc_save can then process and clear the old
183 * active buffer. */
185 static char *qemu_active_path;
186 static char *qemu_next_active_path;
187 static int qemu_shmid = -1;
188 static struct xs_handle *xs;
191 /* Mark the shared-memory segment for destruction */
192 static void qemu_destroy_buffer(void)
193 {
194 if (qemu_shmid != -1)
195 shmctl(qemu_shmid, IPC_RMID, NULL);
196 qemu_shmid = -1;
197 }
199 /* Get qemu to change buffers. */
200 static void qemu_flip_buffer(int domid, int next_active)
201 {
202 char digit = '0' + next_active;
203 unsigned int len;
204 char *active_str, **watch;
205 struct timeval tv;
206 fd_set fdset;
208 /* Tell qemu that we want it to start writing log-dirty bits to the
209 * other buffer */
210 if (!xs_write(xs, XBT_NULL, qemu_next_active_path, &digit, 1))
211 errx(1, "can't write next-active to store path (%s)\n",
212 qemu_next_active_path);
214 /* Wait a while for qemu to signal that it has switched to the new
215 * active buffer */
216 read_again:
217 tv.tv_sec = 5;
218 tv.tv_usec = 0;
219 FD_ZERO(&fdset);
220 FD_SET(xs_fileno(xs), &fdset);
221 if ((select(xs_fileno(xs) + 1, &fdset, NULL, NULL, &tv)) != 1)
222 errx(1, "timed out waiting for qemu to switch buffers\n");
223 watch = xs_read_watch(xs, &len);
224 free(watch);
226 active_str = xs_read(xs, XBT_NULL, qemu_active_path, &len);
227 if (active_str == NULL || active_str[0] - '0' != next_active)
228 /* Watch fired but value is not yet right */
229 goto read_again;
230 }
232 static void *init_qemu_maps(int domid, unsigned int bitmap_size)
233 {
234 key_t key;
235 char key_ascii[17] = {0,};
236 void *seg;
237 char *path, *p;
239 /* Make a shared-memory segment */
240 do {
241 key = rand(); /* No security, just a sequence of numbers */
242 qemu_shmid = shmget(key, 2 * bitmap_size,
243 IPC_CREAT|IPC_EXCL|S_IRUSR|S_IWUSR);
244 if (qemu_shmid == -1 && errno != EEXIST)
245 errx(1, "can't get shmem to talk to qemu-dm");
246 } while (qemu_shmid == -1);
248 /* Remember to tidy up after ourselves */
249 atexit(qemu_destroy_buffer);
251 /* Map it into our address space */
252 seg = shmat(qemu_shmid, NULL, 0);
253 if (seg == (void *) -1)
254 errx(1, "can't map shmem to talk to qemu-dm");
255 memset(seg, 0, 2 * bitmap_size);
257 /* Write the size of it into the first 32 bits */
258 *(uint32_t *)seg = bitmap_size;
260 /* Tell qemu about it */
261 if ((xs = xs_daemon_open()) == NULL)
262 errx(1, "Couldn't contact xenstore");
263 if (!(path = strdup("/local/domain/0/device-model/")))
264 errx(1, "can't get domain path in store");
265 if (!(path = realloc(path, strlen(path)
266 + 10
267 + strlen("/logdirty/next-active") + 1)))
268 errx(1, "no memory for constructing xenstore path");
269 snprintf(path + strlen(path), 11, "%i", domid);
270 strcat(path, "/logdirty/");
271 p = path + strlen(path);
273 strcpy(p, "key");
274 snprintf(key_ascii, 17, "%16.16llx", (unsigned long long) key);
275 if (!xs_write(xs, XBT_NULL, path, key_ascii, 16))
276 errx(1, "can't write key (%s) to store path (%s)\n", key_ascii, path);
278 /* Watch for qemu's indication of the active buffer, and request it
279 * to start writing to buffer 0 */
280 strcpy(p, "active");
281 if (!xs_watch(xs, path, "qemu-active-buffer"))
282 errx(1, "can't set watch in store (%s)\n", path);
283 if (!(qemu_active_path = strdup(path)))
284 errx(1, "no memory for copying xenstore path");
286 strcpy(p, "next-active");
287 if (!(qemu_next_active_path = strdup(path)))
288 errx(1, "no memory for copying xenstore path");
290 qemu_flip_buffer(domid, 0);
292 free(path);
293 return seg;
294 }
297 int
298 main(int argc, char **argv)
299 {
300 unsigned int xc_fd, io_fd, domid, maxit, max_f, flags;
301 int ret;
303 if (argc != 6)
304 errx(1, "usage: %s iofd domid maxit maxf flags", argv[0]);
306 xc_fd = xc_interface_open();
307 if (xc_fd < 0)
308 errx(1, "failed to open control interface");
310 io_fd = atoi(argv[1]);
311 domid = atoi(argv[2]);
312 maxit = atoi(argv[3]);
313 max_f = atoi(argv[4]);
314 flags = atoi(argv[5]);
316 if (suspend_evtchn_init(xc_fd, domid) < 0)
317 warnx("suspend event channel initialization failed, using slow path");
319 ret = xc_domain_save(xc_fd, io_fd, domid, maxit, max_f, flags,
320 &suspend, !!(flags & XCFLAGS_HVM),
321 &init_qemu_maps, &qemu_flip_buffer);
323 suspend_evtchn_release();
325 xc_interface_close(xc_fd);
327 return ret;
328 }