]> xenbits.xensource.com Git - people/liuw/libxenctrl-split/xen.git/commitdiff
x86/hvm: don't rely on shared ioreq state for completion handling
authorPaul Durrant <paul.durrant@citrix.com>
Fri, 31 Jul 2015 15:34:22 +0000 (16:34 +0100)
committerIan Campbell <ian.campbell@citrix.com>
Mon, 3 Aug 2015 09:17:32 +0000 (10:17 +0100)
Both hvm_io_pending() and hvm_wait_for_io() use the shared (with emulator)
ioreq structure to determined whether there is a pending I/O. The latter will
misbehave if the shared state is driven to STATE_IOREQ_NONE by the emulator,
or when the shared ioreq page is cleared for re-insertion into the guest
P2M when the ioreq server is disabled (STATE_IOREQ_NONE == 0) because it
will terminate its wait without calling hvm_io_assist() to adjust Xen's
internal I/O emulation state. This may then lead to an io completion
handler finding incorrect internal emulation state and calling
domain_crash().

This patch fixes the problem by adding a pending flag to the ioreq server's
per-vcpu structure which cannot be directly manipulated by the emulator
and thus can be used to determine whether an I/O is actually pending for
that vcpu on that ioreq server. If an I/O is pending and the shared state
is seen to go to STATE_IOREQ_NONE then it can be treated as an abnormal
completion of emulation (hence the data placed in the shared structure
is not used) and the internal state is adjusted as for a normal completion.
Thus, when a completion handler subsequently runs, the internal state is as
expected and domain_crash() will not be called.

Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Reported-by: Sander Eikelenboom <linux@eikelenboom.it>
Tested-by: Roger Pau Monné <roger.pau@citrix.com>
Cc: Keir Fraser <keir@xen.org>
Cc: Jan Beulich <jbeulich@suse.com>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
xen/arch/x86/hvm/hvm.c
xen/include/asm-x86/hvm/domain.h

index 0ca45e6b9e531ca91a717f84566a0cec1289f27b..c9576109d398807a73f9cb3c6e2812f448d38e0a 100644 (file)
@@ -411,44 +411,57 @@ bool_t hvm_io_pending(struct vcpu *v)
                           &d->arch.hvm_domain.ioreq_server.list,
                           list_entry )
     {
-        ioreq_t *p = get_ioreq(s, v);
+        struct hvm_ioreq_vcpu *sv;
 
-        if ( p->state != STATE_IOREQ_NONE )
-            return 1;
+        list_for_each_entry ( sv,
+                              &s->ioreq_vcpu_list,
+                              list_entry )
+        {
+            if ( sv->vcpu == v && sv->pending )
+                return 1;
+        }
     }
 
     return 0;
 }
 
-static void hvm_io_assist(ioreq_t *p)
+static void hvm_io_assist(struct hvm_ioreq_vcpu *sv, uint64_t data)
 {
-    struct vcpu *curr = current;
-    struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io;
-
-    p->state = STATE_IOREQ_NONE;
+    struct vcpu *v = sv->vcpu;
+    struct hvm_vcpu_io *vio = &v->arch.hvm_vcpu.hvm_io;
 
     if ( hvm_vcpu_io_need_completion(vio) )
     {
         vio->io_req.state = STATE_IORESP_READY;
-        vio->io_req.data = p->data;
+        vio->io_req.data = data;
     }
     else
         vio->io_req.state = STATE_IOREQ_NONE;
 
-    msix_write_completion(curr);
-    vcpu_end_shutdown_deferral(curr);
+    msix_write_completion(v);
+    vcpu_end_shutdown_deferral(v);
+
+    sv->pending = 0;
 }
 
 static bool_t hvm_wait_for_io(struct hvm_ioreq_vcpu *sv, ioreq_t *p)
 {
-    /* NB. Optimised for common case (p->state == STATE_IOREQ_NONE). */
-    while ( p->state != STATE_IOREQ_NONE )
+    while ( sv->pending )
     {
         switch ( p->state )
         {
+        case STATE_IOREQ_NONE:
+            /*
+             * The only reason we should see this case is when an
+             * emulator is dying and it races with an I/O being
+             * requested.
+             */
+            hvm_io_assist(sv, ~0ul);
+            break;
         case STATE_IORESP_READY: /* IORESP_READY -> NONE */
             rmb(); /* see IORESP_READY /then/ read contents of ioreq */
-            hvm_io_assist(p);
+            p->state = STATE_IOREQ_NONE;
+            hvm_io_assist(sv, p->data);
             break;
         case STATE_IOREQ_READY:  /* IOREQ_{READY,INPROCESS} -> IORESP_READY */
         case STATE_IOREQ_INPROCESS:
@@ -458,6 +471,7 @@ static bool_t hvm_wait_for_io(struct hvm_ioreq_vcpu *sv, ioreq_t *p)
             break;
         default:
             gdprintk(XENLOG_ERR, "Weird HVM iorequest state %d.\n", p->state);
+            sv->pending = 0;
             domain_crash(sv->vcpu->domain);
             return 0; /* bail */
         }
@@ -488,7 +502,7 @@ void hvm_do_resume(struct vcpu *v)
                               &s->ioreq_vcpu_list,
                               list_entry )
         {
-            if ( sv->vcpu == v )
+            if ( sv->vcpu == v && sv->pending )
             {
                 if ( !hvm_wait_for_io(sv, get_ioreq(s, v)) )
                     return;
@@ -2744,6 +2758,8 @@ int hvm_send_ioreq(struct hvm_ioreq_server *s, ioreq_t *proto_p,
              */
             p->state = STATE_IOREQ_READY;
             notify_via_xen_event_channel(d, port);
+
+            sv->pending = 1;
             return X86EMUL_RETRY;
         }
     }
index 4c1c061cf2d67d0091942034e943421df1d4d95d..992d5d14091935992e979229d87d4c92c1f9998b 100644 (file)
@@ -45,6 +45,7 @@ struct hvm_ioreq_vcpu {
     struct list_head list_entry;
     struct vcpu      *vcpu;
     evtchn_port_t    ioreq_evtchn;
+    bool_t           pending;
 };
 
 #define NR_IO_RANGE_TYPES (HVMOP_IO_RANGE_PCI + 1)