]> xenbits.xensource.com Git - libvirt.git/commitdiff
qemu: Make qemuMigrationSrcCancel optionally synchronous
authorJiri Denemark <jdenemar@redhat.com>
Tue, 6 Sep 2022 16:21:31 +0000 (18:21 +0200)
committerJiri Denemark <jdenemar@redhat.com>
Tue, 6 Sep 2022 16:28:10 +0000 (18:28 +0200)
We have always considered "migrate_cancel" QMP command to return after
successfully cancelling the migration. But this is no longer true (to be
honest I'm not sure it ever was) as it just changes the migration state
to "cancelling". In most cases the migration is canceled pretty quickly
and we don't really notice anything, but sometimes it takes so long we
even get to clearing migration capabilities before the migration is
actually canceled, which fails as capabilities can only be changed when
no migration is running. So to avoid this issue, we can wait for the
migration to be really canceled after sending migrate_cancel. The only
place where we don't need synchronous behavior is when we're cancelling
migration on user's request while it is actively watched by another
thread.

https://bugzilla.redhat.com/show_bug.cgi?id=2114866

Signed-off-by: Jiri Denemark <jdenemar@redhat.com>
Reviewed-by: Peter Krempa <pkrempa@redhat.com>
src/qemu/qemu_driver.c
src/qemu/qemu_migration.c
src/qemu/qemu_migration.h
src/qemu/qemu_process.c

index bb6607054e57a538fa84bef7079c95b46840456b..ea7d74806c9a182b36cc46cea94f81f436447a17 100644 (file)
@@ -12813,7 +12813,7 @@ qemuDomainAbortJobMigration(virDomainObj *vm)
     VIR_DEBUG("Cancelling migration job at client request");
 
     qemuDomainObjAbortAsyncJob(vm);
-    return qemuMigrationSrcCancel(vm, VIR_ASYNC_JOB_NONE);
+    return qemuMigrationSrcCancel(vm, VIR_ASYNC_JOB_NONE, false);
 }
 
 
index 5c08e81c1ba8259432be1fe9b4473cf08512c079..8273fff67fda9b02acb4535b1e487901055798ba 100644 (file)
@@ -4611,8 +4611,36 @@ qemuMigrationSrcStart(virDomainObj *vm,
 }
 
 
+static bool
+qemuMigrationSrcIsCanceled(virDomainObj *vm)
+{
+    qemuDomainObjPrivate *priv = vm->privateData;
+    virDomainJobData *jobData = priv->job.current;
+
+    qemuMigrationUpdateJobType(jobData);
+    switch (jobData->status) {
+    case VIR_DOMAIN_JOB_STATUS_FAILED:
+    case VIR_DOMAIN_JOB_STATUS_CANCELED:
+    case VIR_DOMAIN_JOB_STATUS_COMPLETED:
+    case VIR_DOMAIN_JOB_STATUS_NONE:
+        return true;
+
+    case VIR_DOMAIN_JOB_STATUS_MIGRATING:
+    case VIR_DOMAIN_JOB_STATUS_POSTCOPY:
+    case VIR_DOMAIN_JOB_STATUS_PAUSED:
+    case VIR_DOMAIN_JOB_STATUS_HYPERVISOR_COMPLETED:
+    case VIR_DOMAIN_JOB_STATUS_POSTCOPY_PAUSED:
+    case VIR_DOMAIN_JOB_STATUS_ACTIVE:
+        break;
+    }
+
+    return false;
+}
+
+
 /**
- * Requests outgoing migration to be canceled.
+ * Requests outgoing migration to be canceled and optionally waits for the
+ * cancellation to complete.
  *
  * The thread (the caller itself in most cases) which is watching the migration
  * will do all the cleanup once migration is canceled. If no thread is watching
@@ -4620,7 +4648,8 @@ qemuMigrationSrcStart(virDomainObj *vm,
  */
 int
 qemuMigrationSrcCancel(virDomainObj *vm,
-                       virDomainAsyncJob asyncJob)
+                       virDomainAsyncJob asyncJob,
+                       bool wait)
 {
     qemuDomainObjPrivate *priv = vm->privateData;
     int rc;
@@ -4633,7 +4662,19 @@ qemuMigrationSrcCancel(virDomainObj *vm,
     rc = qemuMonitorMigrateCancel(priv->mon);
     qemuDomainObjExitMonitor(vm);
 
-    return rc;
+    if (rc < 0)
+        return -1;
+
+    if (virDomainObjIsActive(vm) && wait) {
+        VIR_DEBUG("Waiting for migration to be canceled");
+
+        while (!qemuMigrationSrcIsCanceled(vm)) {
+            if (qemuDomainObjWait(vm) < 0)
+                return -1;
+        }
+    }
+
+    return 0;
 }
 
 
@@ -4979,7 +5020,7 @@ qemuMigrationSrcRun(virQEMUDriver *driver,
 
         if (cancel &&
             priv->job.current->status != VIR_DOMAIN_JOB_STATUS_HYPERVISOR_COMPLETED)
-            qemuMigrationSrcCancel(vm, VIR_ASYNC_JOB_MIGRATION_OUT);
+            qemuMigrationSrcCancel(vm, VIR_ASYNC_JOB_MIGRATION_OUT, true);
 
         /* cancel any outstanding NBD jobs */
         if (mig && mig->nbd)
@@ -6924,7 +6965,7 @@ qemuMigrationSrcToFile(virQEMUDriver *driver, virDomainObj *vm,
             virErrorPreserveLast(&orig_err);
             virCommandAbort(compressor);
             if (virDomainObjIsActive(vm))
-                qemuMigrationSrcCancel(vm, asyncJob);
+                qemuMigrationSrcCancel(vm, asyncJob, true);
         }
         goto cleanup;
     }
@@ -6975,7 +7016,7 @@ qemuMigrationSrcCancelUnattended(virDomainObj *vm)
     VIR_DEBUG("Canceling unfinished outgoing migration of domain %s",
               vm->def->name);
 
-    qemuMigrationSrcCancel(vm, VIR_ASYNC_JOB_NONE);
+    qemuMigrationSrcCancel(vm, VIR_ASYNC_JOB_NONE, true);
 
     for (i = 0; i < vm->def->ndisks; i++) {
         virDomainDiskDef *disk = vm->def->disks[i];
index 31a55473990e611f48fad843f42b429030561a7b..fbea45ad4e8467b758a73642ada57f97e75d04ac 100644 (file)
@@ -245,7 +245,8 @@ qemuMigrationSrcCancelUnattended(virDomainObj *vm);
 
 int
 qemuMigrationSrcCancel(virDomainObj *vm,
-                       virDomainAsyncJob asyncJob);
+                       virDomainAsyncJob asyncJob,
+                       bool wait);
 
 int
 qemuMigrationAnyFetchStats(virDomainObj *vm,
index 4465fa89e9cef148189e25fc9af320325469d4bb..08eefd0fbad36cb4615753efd03be5525fc47f31 100644 (file)
@@ -3696,7 +3696,7 @@ qemuProcessRecoverJob(virQEMUDriver *driver,
     case VIR_ASYNC_JOB_SAVE:
     case VIR_ASYNC_JOB_DUMP:
     case VIR_ASYNC_JOB_SNAPSHOT:
-        qemuMigrationSrcCancel(vm, VIR_ASYNC_JOB_NONE);
+        qemuMigrationSrcCancel(vm, VIR_ASYNC_JOB_NONE, true);
         /* resume the domain but only if it was paused as a result of
          * running a migration-to-file operation.  Although we are
          * recovering an async job, this function is run at startup