Previously when a domain would get stuck in a domain job due to a
programming mistake we'd report the following control state:
$ virsh domcontrol domain
occupied (
1424343406.150s)
The timestamp is invalid as the monitor was not entered for that domain.
We can use that to detect that the domain has an active job and report a
better error instead:
$ virsh domcontrol domain
error: internal (locking) error
monitored by virDomainGetJobInfo); only
limited set of commands may be allowed */
VIR_DOMAIN_CONTROL_OCCUPIED = 2, /* occupied by a running command */
- VIR_DOMAIN_CONTROL_ERROR = 3, /* unusable, domain cannot be fully operated */
+ VIR_DOMAIN_CONTROL_ERROR = 3, /* unusable, domain cannot be fully
+ operated, possible reason is provided
+ in the details field */
# ifdef VIR_ENUM_SENTINELS
VIR_DOMAIN_CONTROL_LAST
# endif
} virDomainControlState;
+/**
+ * virDomainControlErrorReason:
+ *
+ * Reason for the error state.
+ */
+typedef enum {
+ VIR_DOMAIN_CONTROL_ERROR_REASON_NONE = 0, /* server didn't provide a
+ reason */
+ VIR_DOMAIN_CONTROL_ERROR_REASON_UNKNOWN = 1, /* unknown reason for the
+ error */
+ VIR_DOMAIN_CONTROL_ERROR_REASON_MONITOR = 2, /* monitor connection is
+ broken */
+ VIR_DOMAIN_CONTROL_ERROR_REASON_INTERNAL = 3, /* error caused due to
+ internal failure in libvirt
+ */
+# ifdef VIR_ENUM_SENTINELS
+ VIR_DOMAIN_CONTROL_ERROR_REASON_LAST
+# endif
+} virDomainControlErrorReason;
+
/**
* virDomainControlInfo:
*
typedef struct _virDomainControlInfo virDomainControlInfo;
struct _virDomainControlInfo {
unsigned int state; /* control state, one of virDomainControlState */
- unsigned int details; /* state details, currently 0 */
+ unsigned int details; /* state details, currently 0 except for ERROR
+ state (one of virDomainControlErrorReason) */
unsigned long long stateTime; /* for how long (in msec) control interface
has been in current state (except for OK
and ERROR states) */
if (priv->monError) {
info->state = VIR_DOMAIN_CONTROL_ERROR;
+ info->details = VIR_DOMAIN_CONTROL_ERROR_REASON_MONITOR;
} else if (priv->job.active) {
if (virTimeMillisNow(&info->stateTime) < 0)
goto cleanup;
info->state = VIR_DOMAIN_CONTROL_JOB;
info->stateTime -= priv->job.current->started;
} else {
- info->state = VIR_DOMAIN_CONTROL_OCCUPIED;
- info->stateTime -= priv->monStart;
+ if (priv->monStart > 0) {
+ info->state = VIR_DOMAIN_CONTROL_OCCUPIED;
+ info->stateTime -= priv->monStart;
+ } else {
+ /* At this point the domain has an active job, but monitor was
+ * not entered and the domain object lock is not held thus we
+ * are stuck in the job forever due to a programming error.
+ */
+ info->state = VIR_DOMAIN_CONTROL_ERROR;
+ info->details = VIR_DOMAIN_CONTROL_ERROR_REASON_INTERNAL;
+ info->stateTime = 0;
+ }
}
} else {
info->state = VIR_DOMAIN_CONTROL_OK;
return str ? _(str) : _("unknown");
}
+VIR_ENUM_DECL(vshDomainControlErrorReason)
+VIR_ENUM_IMPL(vshDomainControlErrorReason,
+ VIR_DOMAIN_CONTROL_ERROR_REASON_LAST,
+ "",
+ N_("unknown"),
+ N_("monitor failure"),
+ N_("internal (locking) error"))
+
+static const char *
+vshDomainControlErrorReasonToString(int reason)
+{
+ const char *ret = vshDomainControlErrorReasonTypeToString(reason);
+ return ret ? _(ret) : _("unknown");
+}
+
VIR_ENUM_DECL(vshDomainState)
VIR_ENUM_IMPL(vshDomainState,
VIR_DOMAIN_LAST,
vshPrint(ctl, "%s (%0.3fs)\n",
vshDomainControlStateToString(info.state),
info.stateTime / 1000.0);
+ } else if (info.state == VIR_DOMAIN_CONTROL_ERROR && info.details > 0) {
+ vshPrint(ctl, "%s: %s\n",
+ vshDomainControlStateToString(info.state),
+ vshDomainControlErrorReasonToString(info.details));
} else {
vshPrint(ctl, "%s\n",
vshDomainControlStateToString(info.state));