]> xenbits.xensource.com Git - libvirt.git/commitdiff
qemu: Properly report error state in qemuDomainGetControlInfo()
authorPeter Krempa <pkrempa@redhat.com>
Thu, 19 Feb 2015 10:53:42 +0000 (11:53 +0100)
committerPeter Krempa <pkrempa@redhat.com>
Wed, 4 Mar 2015 09:41:30 +0000 (10:41 +0100)
Previously when a domain would get stuck in a domain job due to a
programming mistake we'd report the following control state:

$ virsh domcontrol domain
occupied (1424343406.150s)

The timestamp is invalid as the monitor was not entered for that domain.
We can use that to detect that the domain has an active job and report a
better error instead:

$ virsh domcontrol domain
error: internal (locking) error

include/libvirt/libvirt-domain.h
src/qemu/qemu_driver.c
tools/virsh-domain-monitor.c

index 4dbd7f51a2aa9633b0653745538d79c78c07407b..a9d3efdd6a85c616b747e2bed842daea0a0d5933 100644 (file)
@@ -182,13 +182,35 @@ typedef enum {
                                         monitored by virDomainGetJobInfo); only
                                         limited set of commands may be allowed */
     VIR_DOMAIN_CONTROL_OCCUPIED = 2, /* occupied by a running command */
-    VIR_DOMAIN_CONTROL_ERROR = 3,    /* unusable, domain cannot be fully operated */
+    VIR_DOMAIN_CONTROL_ERROR = 3,    /* unusable, domain cannot be fully
+                                        operated, possible reason is provided
+                                        in the details field */
 
 # ifdef VIR_ENUM_SENTINELS
     VIR_DOMAIN_CONTROL_LAST
 # endif
 } virDomainControlState;
 
+/**
+ * virDomainControlErrorReason:
+ *
+ * Reason for the error state.
+ */
+typedef enum {
+    VIR_DOMAIN_CONTROL_ERROR_REASON_NONE = 0,     /* server didn't provide a
+                                                     reason */
+    VIR_DOMAIN_CONTROL_ERROR_REASON_UNKNOWN = 1,  /* unknown reason for the
+                                                     error */
+    VIR_DOMAIN_CONTROL_ERROR_REASON_MONITOR = 2,  /* monitor connection is
+                                                     broken */
+    VIR_DOMAIN_CONTROL_ERROR_REASON_INTERNAL = 3, /* error caused due to
+                                                     internal failure in libvirt
+                                                  */
+# ifdef VIR_ENUM_SENTINELS
+    VIR_DOMAIN_CONTROL_ERROR_REASON_LAST
+# endif
+} virDomainControlErrorReason;
+
 /**
  * virDomainControlInfo:
  *
@@ -198,7 +220,8 @@ typedef enum {
 typedef struct _virDomainControlInfo virDomainControlInfo;
 struct _virDomainControlInfo {
     unsigned int state;     /* control state, one of virDomainControlState */
-    unsigned int details;   /* state details, currently 0 */
+    unsigned int details;   /* state details, currently 0 except for ERROR
+                               state (one of virDomainControlErrorReason) */
     unsigned long long stateTime; /* for how long (in msec) control interface
                                      has been in current state (except for OK
                                      and ERROR states) */
index 84aa0994b242aef526faf0162848d0b774d9cb8a..0e933e8f01f5344210f8519f0977106bf61d0a03 100644 (file)
@@ -2701,6 +2701,7 @@ qemuDomainGetControlInfo(virDomainPtr dom,
 
     if (priv->monError) {
         info->state = VIR_DOMAIN_CONTROL_ERROR;
+        info->details = VIR_DOMAIN_CONTROL_ERROR_REASON_MONITOR;
     } else if (priv->job.active) {
         if (virTimeMillisNow(&info->stateTime) < 0)
             goto cleanup;
@@ -2708,8 +2709,18 @@ qemuDomainGetControlInfo(virDomainPtr dom,
             info->state = VIR_DOMAIN_CONTROL_JOB;
             info->stateTime -= priv->job.current->started;
         } else {
-            info->state = VIR_DOMAIN_CONTROL_OCCUPIED;
-            info->stateTime -= priv->monStart;
+            if (priv->monStart > 0) {
+                info->state = VIR_DOMAIN_CONTROL_OCCUPIED;
+                info->stateTime -= priv->monStart;
+            } else {
+                /* At this point the domain has an active job, but monitor was
+                 * not entered and the domain object lock is not held thus we
+                 * are stuck in the job forever due to a programming error.
+                 */
+                info->state = VIR_DOMAIN_CONTROL_ERROR;
+                info->details = VIR_DOMAIN_CONTROL_ERROR_REASON_INTERNAL;
+                info->stateTime = 0;
+            }
         }
     } else {
         info->state = VIR_DOMAIN_CONTROL_OK;
index 3c16b3eab912c0b93245b3e462927e88b7af0f62..fbafa7763f5221992dd0a63707d30769e2581ce5 100644 (file)
@@ -128,6 +128,21 @@ vshDomainControlStateToString(int state)
     return str ? _(str) : _("unknown");
 }
 
+VIR_ENUM_DECL(vshDomainControlErrorReason)
+VIR_ENUM_IMPL(vshDomainControlErrorReason,
+              VIR_DOMAIN_CONTROL_ERROR_REASON_LAST,
+              "",
+              N_("unknown"),
+              N_("monitor failure"),
+              N_("internal (locking) error"))
+
+static const char *
+vshDomainControlErrorReasonToString(int reason)
+{
+    const char *ret = vshDomainControlErrorReasonTypeToString(reason);
+    return ret ? _(ret) : _("unknown");
+}
+
 VIR_ENUM_DECL(vshDomainState)
 VIR_ENUM_IMPL(vshDomainState,
               VIR_DOMAIN_LAST,
@@ -815,6 +830,10 @@ cmdDomControl(vshControl *ctl, const vshCmd *cmd)
         vshPrint(ctl, "%s (%0.3fs)\n",
                  vshDomainControlStateToString(info.state),
                  info.stateTime / 1000.0);
+    } else if (info.state == VIR_DOMAIN_CONTROL_ERROR && info.details > 0) {
+        vshPrint(ctl, "%s: %s\n",
+                 vshDomainControlStateToString(info.state),
+                 vshDomainControlErrorReasonToString(info.details));
     } else {
         vshPrint(ctl, "%s\n",
                  vshDomainControlStateToString(info.state));