]> xenbits.xensource.com Git - people/liuw/libxenctrl-split/libvirt.git/commitdiff
qemu: Track job owner for better debugging
authorJiri Denemark <jdenemar@redhat.com>
Fri, 6 Apr 2012 16:55:46 +0000 (18:55 +0200)
committerJiri Denemark <jdenemar@redhat.com>
Wed, 11 Apr 2012 07:57:39 +0000 (09:57 +0200)
In case an API fails with "cannot acquire state change lock", searching
for the API that possibly forgot to end its job is not always easy.
Let's keep track of the job owner and print it out for easier
identification.

src/qemu/qemu_domain.c
src/qemu/qemu_domain.h
src/qemu/qemu_migration.c
src/qemu/qemu_process.c

index da1f57fef4c7e543ef0d0500d653d3f28c9e85d6..7f1f8eef5905030a16ccb2006ee6615c75ae370e 100644 (file)
@@ -147,6 +147,7 @@ qemuDomainObjResetJob(qemuDomainObjPrivatePtr priv)
     struct qemuDomainJobObj *job = &priv->job;
 
     job->active = QEMU_JOB_NONE;
+    job->owner = 0;
 }
 
 static void
@@ -155,6 +156,7 @@ qemuDomainObjResetAsyncJob(qemuDomainObjPrivatePtr priv)
     struct qemuDomainJobObj *job = &priv->job;
 
     job->asyncJob = QEMU_ASYNC_JOB_NONE;
+    job->asyncOwner = 0;
     job->phase = 0;
     job->mask = DEFAULT_JOB_MASK;
     job->start = 0;
@@ -169,13 +171,25 @@ qemuDomainObjRestoreJob(virDomainObjPtr obj,
 
     memset(job, 0, sizeof(*job));
     job->active = priv->job.active;
+    job->owner = priv->job.owner;
     job->asyncJob = priv->job.asyncJob;
+    job->asyncOwner = priv->job.asyncOwner;
     job->phase = priv->job.phase;
 
     qemuDomainObjResetJob(priv);
     qemuDomainObjResetAsyncJob(priv);
 }
 
+void
+qemuDomainObjTransferJob(virDomainObjPtr obj)
+{
+    qemuDomainObjPrivatePtr priv = obj->privateData;
+
+    VIR_DEBUG("Changing job owner from %d to %d",
+              priv->job.owner, virThreadSelfID());
+    priv->job.owner = virThreadSelfID();
+}
+
 static void
 qemuDomainObjFreeJob(qemuDomainObjPrivatePtr priv)
 {
@@ -664,11 +678,23 @@ qemuDomainObjSetJobPhase(struct qemud_driver *driver,
                          int phase)
 {
     qemuDomainObjPrivatePtr priv = obj->privateData;
+    int me = virThreadSelfID();
 
     if (!priv->job.asyncJob)
         return;
 
+    VIR_DEBUG("Setting '%s' phase to '%s'",
+              qemuDomainAsyncJobTypeToString(priv->job.asyncJob),
+              qemuDomainAsyncJobPhaseToString(priv->job.asyncJob, phase));
+
+    if (priv->job.asyncOwner && me != priv->job.asyncOwner) {
+        VIR_WARN("'%s' async job is owned by thread %d",
+                 qemuDomainAsyncJobTypeToString(priv->job.asyncJob),
+                 priv->job.asyncOwner);
+    }
+
     priv->job.phase = phase;
+    priv->job.asyncOwner = me;
     qemuDomainObjSaveJob(driver, obj);
 }
 
@@ -695,6 +721,22 @@ qemuDomainObjDiscardAsyncJob(struct qemud_driver *driver, virDomainObjPtr obj)
     qemuDomainObjSaveJob(driver, obj);
 }
 
+void
+qemuDomainObjReleaseAsyncJob(virDomainObjPtr obj)
+{
+    qemuDomainObjPrivatePtr priv = obj->privateData;
+
+    VIR_DEBUG("Releasing ownership of '%s' async job",
+              qemuDomainAsyncJobTypeToString(priv->job.asyncJob));
+
+    if (priv->job.asyncOwner != virThreadSelfID()) {
+        VIR_WARN("'%s' async job is owned by thread %d",
+                 qemuDomainAsyncJobTypeToString(priv->job.asyncJob),
+                 priv->job.asyncOwner);
+    }
+    priv->job.asyncOwner = 0;
+}
+
 static bool
 qemuDomainNestedJobAllowed(qemuDomainObjPrivatePtr priv, enum qemuDomainJob job)
 {
@@ -764,11 +806,13 @@ retry:
                    qemuDomainJobTypeToString(job),
                    qemuDomainAsyncJobTypeToString(priv->job.asyncJob));
         priv->job.active = job;
+        priv->job.owner = virThreadSelfID();
     } else {
         VIR_DEBUG("Starting async job: %s",
                   qemuDomainAsyncJobTypeToString(asyncJob));
         qemuDomainObjResetAsyncJob(priv);
         priv->job.asyncJob = asyncJob;
+        priv->job.asyncOwner = virThreadSelfID();
         priv->job.start = now;
     }
 
@@ -784,6 +828,15 @@ retry:
     return 0;
 
 error:
+    VIR_WARN("Cannot start job (%s, %s) for domain %s;"
+             " current job is (%s, %s) owned by (%d, %d)",
+             qemuDomainJobTypeToString(job),
+             qemuDomainAsyncJobTypeToString(asyncJob),
+             obj->def->name,
+             qemuDomainJobTypeToString(priv->job.active),
+             qemuDomainAsyncJobTypeToString(priv->job.asyncJob),
+             priv->job.owner, priv->job.asyncOwner);
+
     if (errno == ETIMEDOUT)
         qemuReportError(VIR_ERR_OPERATION_TIMEOUT,
                         "%s", _("cannot acquire state change lock"));
index b3eecd38b4b0540b4928d29cf66ad00657536ad2..ce52569f5a0f0194d184504aa03429d3efe73d38 100644 (file)
@@ -96,9 +96,11 @@ VIR_ENUM_DECL(qemuDomainAsyncJob)
 struct qemuDomainJobObj {
     virCond cond;                       /* Use to coordinate jobs */
     enum qemuDomainJob active;          /* Currently running job */
+    int owner;                          /* Thread which set current job */
 
     virCond asyncCond;                  /* Use to coordinate with async jobs */
     enum qemuDomainAsyncJob asyncJob;   /* Currently active async job */
+    int asyncOwner;                     /* Thread which set current async job */
     int phase;                          /* Job phase (mainly for migrations) */
     unsigned long long mask;            /* Jobs allowed during async job */
     unsigned long long start;           /* When the async job started */
@@ -203,8 +205,10 @@ void qemuDomainObjSetAsyncJobMask(virDomainObjPtr obj,
                                   unsigned long long allowedJobs);
 void qemuDomainObjRestoreJob(virDomainObjPtr obj,
                              struct qemuDomainJobObj *job);
+void qemuDomainObjTransferJob(virDomainObjPtr obj);
 void qemuDomainObjDiscardAsyncJob(struct qemud_driver *driver,
                                   virDomainObjPtr obj);
+void qemuDomainObjReleaseAsyncJob(virDomainObjPtr obj);
 
 void qemuDomainObjEnterMonitor(struct qemud_driver *driver,
                                virDomainObjPtr obj)
index ff6f273044d568a6f4d9b590cfea2987a021d706..c1bb93a908082b84170f757d25d0babcbe4f3134 100644 (file)
@@ -3242,6 +3242,7 @@ qemuMigrationJobStartPhase(struct qemud_driver *driver,
 int
 qemuMigrationJobContinue(virDomainObjPtr vm)
 {
+    qemuDomainObjReleaseAsyncJob(vm);
     return virDomainObjUnref(vm);
 }
 
index 96f39e8800b814b12220068dd85a6eb4b290d2e1..19569cf01ec4fd533278322224708a2a7a62ae91 100644 (file)
@@ -3038,8 +3038,8 @@ qemuProcessReconnect(void *opaque)
 
     priv = obj->privateData;
 
-    /* Set fake job so that EnterMonitor* doesn't want to start a new one */
-    priv->job.active = QEMU_JOB_MODIFY;
+    /* Job was started by the caller for us */
+    qemuDomainObjTransferJob(obj);
 
     /* Hold an extra reference because we can't allow 'vm' to be
      * deleted if qemuConnectMonitor() failed */
@@ -3119,8 +3119,6 @@ qemuProcessReconnect(void *opaque)
     if (qemuProcessRecoverJob(driver, obj, conn, &oldjob) < 0)
         goto error;
 
-    priv->job.active = QEMU_JOB_NONE;
-
     /* update domain state XML with possibly updated state in virDomainObj */
     if (virDomainSaveStatus(driver->caps, driver->stateDir, obj) < 0)
         goto error;