ia64/xen-unstable

changeset 15646:c585f993385c

[HVM] Control qemu's state-save via xenstore, instead of SIGUSR1
This lets us verify that qemu has indeed stopped processing before
we start saving guest memory. Also allow qemu to continue processing
after the save has happened, instead of exiting immediately.
Signed-off-by: Tim Deegan <Tim.Deegan@xensource.com>
author Tim Deegan <Tim.Deegan@xensource.com>
date Tue Jul 24 14:52:16 2007 +0100 (2007-07-24)
parents 531b8ccda973
children cc48264ed647
files tools/ioemu/hw/cirrus_vga.c tools/ioemu/target-i386-dm/helper2.c tools/ioemu/vl.c tools/ioemu/vl.h tools/ioemu/xenstore.c tools/python/xen/xend/XendCheckpoint.py tools/python/xen/xend/XendDomainInfo.py tools/python/xen/xend/image.py
line diff
     1.1 --- a/tools/ioemu/hw/cirrus_vga.c	Tue Jul 24 11:10:08 2007 +0100
     1.2 +++ b/tools/ioemu/hw/cirrus_vga.c	Tue Jul 24 14:52:16 2007 +0100
     1.3 @@ -3096,8 +3096,6 @@ static void cirrus_vga_save(QEMUFile *f,
     1.4      qemu_put_be64s(f, (uint64_t*)&s->cirrus_lfb_addr);
     1.5      qemu_put_be64s(f, (uint64_t*)&s->cirrus_lfb_end);
     1.6      qemu_put_buffer(f, s->vram_ptr, VGA_RAM_SIZE); 
     1.7 -    if (vga_acc)
     1.8 -        cirrus_stop_acc(s);
     1.9  }
    1.10  
    1.11  static int cirrus_vga_load(QEMUFile *f, void *opaque, int version_id)
     2.1 --- a/tools/ioemu/target-i386-dm/helper2.c	Tue Jul 24 11:10:08 2007 +0100
     2.2 +++ b/tools/ioemu/target-i386-dm/helper2.c	Tue Jul 24 14:52:16 2007 +0100
     2.3 @@ -618,6 +618,7 @@ int main_loop(void)
     2.4      CPUState *env = cpu_single_env;
     2.5      int evtchn_fd = xc_evtchn_fd(xce_handle);
     2.6      char qemu_file[PATH_MAX];
     2.7 +    fd_set fds;
     2.8  
     2.9      buffered_io_timer = qemu_new_timer(rt_clock, handle_buffered_io,
    2.10  				       cpu_single_env);
    2.11 @@ -625,19 +626,34 @@ int main_loop(void)
    2.12  
    2.13      qemu_set_fd_handler(evtchn_fd, cpu_handle_ioreq, NULL, env);
    2.14  
    2.15 -    while (!(vm_running && suspend_requested))
    2.16 -        /* Wait up to 10 msec. */
    2.17 -        main_loop_wait(10);
    2.18 +    xenstore_record_dm_state("running");
    2.19 +    while (1) {
    2.20 +        while (!(vm_running && suspend_requested))
    2.21 +            /* Wait up to 10 msec. */
    2.22 +            main_loop_wait(10);
    2.23  
    2.24 -    fprintf(logfile, "device model received suspend signal!\n");
    2.25 +        xenstore_record_dm_state("paused");
    2.26 +        fprintf(logfile, "device model saving state\n");
    2.27 +
    2.28 +        /* Pull all outstanding ioreqs through the system */
    2.29 +        handle_buffered_io(env);
    2.30 +        main_loop_wait(1); /* For the select() on events */
    2.31  
    2.32 -    /* Pull all outstanding ioreqs through the system */
    2.33 -    handle_buffered_io(env);
    2.34 -    main_loop_wait(1); /* For the select() on events */
    2.35 +        /* Save the device state */
    2.36 +        snprintf(qemu_file, sizeof(qemu_file), 
    2.37 +                 "/var/lib/xen/qemu-save.%d", domid);
    2.38 +        do_savevm(qemu_file);
    2.39  
    2.40 -    /* Save the device state */
    2.41 -    snprintf(qemu_file, sizeof(qemu_file), "/var/lib/xen/qemu-save.%d", domid);
    2.42 -    do_savevm(qemu_file);
    2.43 +        /* Wait to be allowed to continue */
    2.44 +        while (suspend_requested) {
    2.45 +            FD_ZERO(&fds);
    2.46 +            FD_SET(xenstore_fd(), &fds);
    2.47 +            if (select(xenstore_fd() + 1, &fds, NULL, NULL, NULL) > 0)
    2.48 +                xenstore_process_event(NULL);
    2.49 +        }
    2.50 +
    2.51 +        xenstore_record_dm_state("running");
    2.52 +    }
    2.53  
    2.54      return 0;
    2.55  }
     3.1 --- a/tools/ioemu/vl.c	Tue Jul 24 11:10:08 2007 +0100
     3.2 +++ b/tools/ioemu/vl.c	Tue Jul 24 14:52:16 2007 +0100
     3.3 @@ -6856,15 +6856,6 @@ int set_mm_mapping(int xc_handle, uint32
     3.4      return 0;
     3.5  }
     3.6  
     3.7 -void suspend(int sig)
     3.8 -{
     3.9 -    fprintf(logfile, "suspend sig handler called with requested=%d!\n",
    3.10 -            suspend_requested);
    3.11 -    if (sig != SIGUSR1)
    3.12 -        fprintf(logfile, "suspend signal dismatch, get sig=%d!\n", sig);
    3.13 -    suspend_requested = 1;
    3.14 -}
    3.15 -
    3.16  #if defined(MAPCACHE)
    3.17  
    3.18  #if defined(__i386__) 
    3.19 @@ -7057,6 +7048,7 @@ int main(int argc, char **argv)
    3.20      xen_pfn_t *page_array;
    3.21      extern void *buffered_pio_page;
    3.22  #endif
    3.23 +    sigset_t set;
    3.24  
    3.25      char qemu_dm_logfilename[128];
    3.26      
    3.27 @@ -7982,24 +7974,11 @@ int main(int argc, char **argv)
    3.28  	close(fd);
    3.29      }
    3.30  
    3.31 -    /* register signal for the suspend request when save */
    3.32 -    {
    3.33 -        struct sigaction act;
    3.34 -        sigset_t set;
    3.35 -        act.sa_handler = suspend;
    3.36 -        act.sa_flags = SA_RESTART;
    3.37 -        sigemptyset(&act.sa_mask);
    3.38 -
    3.39 -        sigaction(SIGUSR1, &act, NULL);
    3.40 -
    3.41 -        /* control panel mask some signals when spawn qemu, need unmask here*/
    3.42 -        sigemptyset(&set);
    3.43 -        sigaddset(&set, SIGUSR1);
    3.44 -        sigaddset(&set, SIGTERM);
    3.45 -        if (sigprocmask(SIG_UNBLOCK, &set, NULL) == -1)
    3.46 -            fprintf(stderr, "unblock signal fail, possible issue for HVM save!\n");
    3.47 -
    3.48 -    }
    3.49 +    /* Unblock SIGTERM, which may have been blocked by the caller */
    3.50 +    sigemptyset(&set);
    3.51 +    sigaddset(&set, SIGTERM);
    3.52 +    if (sigprocmask(SIG_UNBLOCK, &set, NULL) == -1)
    3.53 +        fprintf(stderr, "Failed to unblock SIGTERM\n");
    3.54  
    3.55      main_loop();
    3.56      quit_timers();
     4.1 --- a/tools/ioemu/vl.h	Tue Jul 24 11:10:08 2007 +0100
     4.2 +++ b/tools/ioemu/vl.h	Tue Jul 24 14:52:16 2007 +0100
     4.3 @@ -1456,6 +1456,7 @@ void readline_start(const char *prompt, 
     4.4  void xenstore_parse_domain_config(int domid);
     4.5  int xenstore_fd(void);
     4.6  void xenstore_process_event(void *opaque);
     4.7 +void xenstore_record_dm_state(char *state);
     4.8  void xenstore_check_new_media_present(int timeout);
     4.9  void xenstore_write_vncport(int vnc_display);
    4.10  int xenstore_read_vncpasswd(int domid);
     5.1 --- a/tools/ioemu/xenstore.c	Tue Jul 24 11:10:08 2007 +0100
     5.2 +++ b/tools/ioemu/xenstore.c	Tue Jul 24 14:52:16 2007 +0100
     5.3 @@ -186,6 +186,12 @@ void xenstore_parse_domain_config(int do
     5.4          fprintf(logfile, "Watching %s\n", buf);
     5.5      }
     5.6  
     5.7 +    /* Set a watch for suspend requests from the migration tools */
     5.8 +    if (pasprintf(&buf, 
     5.9 +                  "/local/domain/0/device-model/%u/command", domid) != -1) {
    5.10 +        xs_watch(xsh, buf, "dm-command");
    5.11 +        fprintf(logfile, "Watching %s\n", buf);
    5.12 +    }
    5.13  
    5.14   out:
    5.15      free(type);
    5.16 @@ -310,6 +316,52 @@ void xenstore_process_logdirty_event(voi
    5.17  }
    5.18  
    5.19  
    5.20 +/* Accept state change commands from the control tools */
    5.21 +static void xenstore_process_dm_command_event(void)
    5.22 +{
    5.23 +    char *path = NULL, *command = NULL;
    5.24 +    unsigned int len;
    5.25 +    extern int suspend_requested;
    5.26 +
    5.27 +    if (pasprintf(&path, 
    5.28 +                  "/local/domain/0/device-model/%u/command", domid) == -1) {
    5.29 +        fprintf(logfile, "out of memory reading dm command\n");
    5.30 +        goto out;
    5.31 +    }
    5.32 +    command = xs_read(xsh, XBT_NULL, path, &len);
    5.33 +    if (!command)
    5.34 +        goto out;
    5.35 +    
    5.36 +    if (!strncmp(command, "save", len)) {
    5.37 +        fprintf(logfile, "dm-command: pause and save state\n");
    5.38 +        suspend_requested = 1;
    5.39 +    } else if (!strncmp(command, "continue", len)) {
    5.40 +        fprintf(logfile, "dm-command: continue after state save\n");
    5.41 +        suspend_requested = 0;
    5.42 +    } else {
    5.43 +        fprintf(logfile, "dm-command: unknown command\"%*s\"\n", len, command);
    5.44 +    }
    5.45 +
    5.46 + out:
    5.47 +    free(path);
    5.48 +    free(command);
    5.49 +}
    5.50 +
    5.51 +void xenstore_record_dm_state(char *state)
    5.52 +{
    5.53 +    char *path = NULL;
    5.54 +
    5.55 +    if (pasprintf(&path, 
    5.56 +                  "/local/domain/0/device-model/%u/state", domid) == -1) {
    5.57 +        fprintf(logfile, "out of memory recording dm state\n");
    5.58 +        goto out;
    5.59 +    }
    5.60 +    if (!xs_write(xsh, XBT_NULL, path, state, strlen(state)))
    5.61 +        fprintf(logfile, "error recording dm state\n");
    5.62 +
    5.63 + out:
    5.64 +    free(path);
    5.65 +}
    5.66  
    5.67  void xenstore_process_event(void *opaque)
    5.68  {
    5.69 @@ -325,6 +377,11 @@ void xenstore_process_event(void *opaque
    5.70          goto out;
    5.71      }
    5.72  
    5.73 +    if (!strcmp(vec[XS_WATCH_TOKEN], "dm-command")) {
    5.74 +        xenstore_process_dm_command_event();
    5.75 +        goto out;
    5.76 +    }
    5.77 +
    5.78      if (strncmp(vec[XS_WATCH_TOKEN], "hd", 2) ||
    5.79          strlen(vec[XS_WATCH_TOKEN]) != 3)
    5.80          goto out;
     6.1 --- a/tools/python/xen/xend/XendCheckpoint.py	Tue Jul 24 11:10:08 2007 +0100
     6.2 +++ b/tools/python/xen/xend/XendCheckpoint.py	Tue Jul 24 14:52:16 2007 +0100
     6.3 @@ -98,10 +98,6 @@ def save(fd, dominfo, network, live, dst
     6.4                  log.info("Domain %d suspended.", dominfo.getDomid())
     6.5                  dominfo.migrateDevices(network, dst, DEV_MIGRATE_STEP3,
     6.6                                         domain_name)
     6.7 -                #send signal to device model for save
     6.8 -                if hvm:
     6.9 -                    log.info("release_devices for hvm domain")
    6.10 -                    dominfo._releaseDevices(True)
    6.11                  tochild.write("done\n")
    6.12                  tochild.flush()
    6.13                  log.debug('Written done')
    6.14 @@ -139,7 +135,7 @@ def save(fd, dominfo, network, live, dst
    6.15      except Exception, exn:
    6.16          log.exception("Save failed on domain %s (%s).", domain_name,
    6.17                        dominfo.getDomid())
    6.18 -
    6.19 +        
    6.20          dominfo.resumeDomain()
    6.21          log.debug("XendCheckpoint.save: resumeDomain")
    6.22  
     7.1 --- a/tools/python/xen/xend/XendDomainInfo.py	Tue Jul 24 11:10:08 2007 +0100
     7.2 +++ b/tools/python/xen/xend/XendDomainInfo.py	Tue Jul 24 14:52:16 2007 +0100
     7.3 @@ -1122,6 +1122,8 @@ class XendDomainInfo:
     7.4                      self._clearRestart()
     7.5  
     7.6                      if reason == 'suspend':
     7.7 +                        if self._stateGet() != DOM_STATE_SUSPENDED:
     7.8 +                            self.image.saveDeviceModel()
     7.9                          self._stateSet(DOM_STATE_SUSPENDED)
    7.10                          # Don't destroy the domain.  XendCheckpoint will do
    7.11                          # this once it has finished.  However, stop watching
    7.12 @@ -1764,6 +1766,9 @@ class XendDomainInfo:
    7.13              ResumeDomain(self.domid)
    7.14          except:
    7.15              log.exception("XendDomainInfo.resume: xc.domain_resume failed on domain %s." % (str(self.domid)))
    7.16 +        if self.is_hvm():
    7.17 +            self.image.resumeDeviceModel()
    7.18 +
    7.19  
    7.20      #
    7.21      # Channels for xenstore and console
     8.1 --- a/tools/python/xen/xend/image.py	Tue Jul 24 11:10:08 2007 +0100
     8.2 +++ b/tools/python/xen/xend/image.py	Tue Jul 24 14:52:16 2007 +0100
     8.3 @@ -20,6 +20,7 @@
     8.4  import os, string
     8.5  import re
     8.6  import math
     8.7 +import time
     8.8  import signal
     8.9  
    8.10  import xen.lowlevel.xc
    8.11 @@ -27,6 +28,7 @@ from xen.xend.XendConstants import REVER
    8.12  from xen.xend.XendError import VmError, XendError, HVMRequired
    8.13  from xen.xend.XendLogging import log
    8.14  from xen.xend.XendOptions import instance as xenopts
    8.15 +from xen.xend.xenstore.xstransact import xstransact
    8.16  from xen.xend.xenstore.xswatch import xswatch
    8.17  from xen.xend import arch
    8.18  
    8.19 @@ -175,6 +177,14 @@ class ImageHandler:
    8.20          """Create device model for the domain (define in subclass if needed)."""
    8.21          pass
    8.22      
    8.23 +    def saveDeviceModel(self):
    8.24 +        """Save device model for the domain (define in subclass if needed)."""
    8.25 +        pass
    8.26 +
    8.27 +    def resumeDeviceModel(self):
    8.28 +        """Unpause device model for the domain (define in subclass if needed)."""
    8.29 +        pass
    8.30 +
    8.31      def destroy(self):
    8.32          """Extra cleanup on domain destroy (define in subclass if needed)."""
    8.33          pass
    8.34 @@ -443,17 +453,34 @@ class HVMImageHandler(ImageHandler):
    8.35          self.vm.storeDom("image/device-model-pid", self.pid)
    8.36          log.info("device model pid: %d", self.pid)
    8.37  
    8.38 +    def saveDeviceModel(self):
    8.39 +        # Signal the device model to pause itself and save its state
    8.40 +        xstransact.Store("/local/domain/0/device-model/%i"
    8.41 +                         % self.vm.getDomid(), ('command', 'save'))
    8.42 +        # Wait for confirmation.  Could do this with a watch but we'd
    8.43 +        # still end up spinning here waiting for the watch to fire. 
    8.44 +        state = ''
    8.45 +        count = 0
    8.46 +        while state != 'paused':
    8.47 +            state = xstransact.Read("/local/domain/0/device-model/%i/state"
    8.48 +                                    % self.vm.getDomid())
    8.49 +            time.sleep(0.1)
    8.50 +            count += 1
    8.51 +            if count > 100:
    8.52 +                raise VmError('Timed out waiting for device model to save')
    8.53 +
    8.54 +    def resumeDeviceModel(self):
    8.55 +        # Signal the device model to resume activity after pausing to save.
    8.56 +        xstransact.Store("/local/domain/0/device-model/%i"
    8.57 +                         % self.vm.getDomid(), ('command', 'continue'))
    8.58 +
    8.59      def recreate(self):
    8.60          self.pid = self.vm.gatherDom(('image/device-model-pid', int))
    8.61  
    8.62      def destroy(self, suspend = False):
    8.63 -        if self.pid:
    8.64 +        if self.pid and not suspend:
    8.65              try:
    8.66 -                sig = signal.SIGKILL
    8.67 -                if suspend:
    8.68 -                    log.info("use sigusr1 to signal qemu %d", self.pid)
    8.69 -                    sig = signal.SIGUSR1
    8.70 -                os.kill(self.pid, sig)
    8.71 +                os.kill(self.pid, signal.SIGKILL)
    8.72              except OSError, exn:
    8.73                  log.exception(exn)
    8.74              try:
    8.75 @@ -464,6 +491,8 @@ class HVMImageHandler(ImageHandler):
    8.76                  # but we can't wait for it because it's not our child.
    8.77                  pass
    8.78              self.pid = None
    8.79 +            state = xstransact.Remove("/local/domain/0/device-model/%i"
    8.80 +                                      % self.vm.getDomid())
    8.81  
    8.82  
    8.83  class IA64_HVM_ImageHandler(HVMImageHandler):
    8.84 @@ -507,6 +536,7 @@ class X86_HVM_ImageHandler(HVMImageHandl
    8.85          return max(4 * (256 * self.vm.getVCpuCount() + 2 * (maxmem_kb / 1024)),
    8.86                     shadow_mem_kb)
    8.87  
    8.88 +
    8.89  class X86_Linux_ImageHandler(LinuxImageHandler):
    8.90  
    8.91      def buildDomain(self):