direct-io.hg

changeset 12348:814fbacfafc6

[HVM] Clarify ioreq interactions between Xen and qemu-dm.

Mainly this involves placing appropriate barriers before
updating the shared state variable, and after reading from it. The
model is that it is state switches that drive the transfer of data to
and fro in the shared ioreq structure. So writers should wmb() before
updating the variable; readers should rmb() after seeing a state
change.

These barriers aren't actually required given the current code
structure since the communication flow is really driven by
event-channel notifications, which happen to provide a sufficient
barrier. However, relying on this for all time and on all
architectures seems foolish and the barriers have negligible cost
compared with the totoal ioreq round-trip latency. Also the model of
communications being driven by the shared-memory state variable more
closely matches other inter-domain protocols (where there is usually a
shared-memory producer index), and is hence a model we are familiar
with and likely to implement correctly.

Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Fri Nov 10 10:30:38 2006 +0000 (2006-11-10)
parents 357127ca8bf2
children 2d20b5432253
files tools/ioemu/target-i386-dm/helper2.c xen/arch/x86/hvm/io.c xen/arch/x86/hvm/platform.c
line diff
     1.1 --- a/tools/ioemu/target-i386-dm/helper2.c	Fri Nov 10 09:50:35 2006 +0000
     1.2 +++ b/tools/ioemu/target-i386-dm/helper2.c	Fri Nov 10 10:30:38 2006 +0000
     1.3 @@ -209,18 +209,19 @@ static ioreq_t *__cpu_get_ioreq(int vcpu
     1.4  
     1.5      req = &(shared_page->vcpu_iodata[vcpu].vp_ioreq);
     1.6  
     1.7 -    if (req->state == STATE_IOREQ_READY) {
     1.8 -        req->state = STATE_IOREQ_INPROCESS;
     1.9 -        rmb();
    1.10 -        return req;
    1.11 +    if (req->state != STATE_IOREQ_READY) {
    1.12 +        fprintf(logfile, "I/O request not ready: "
    1.13 +                "%x, ptr: %x, port: %"PRIx64", "
    1.14 +                "data: %"PRIx64", count: %"PRIx64", size: %"PRIx64"\n",
    1.15 +                req->state, req->data_is_ptr, req->addr,
    1.16 +                req->data, req->count, req->size);
    1.17 +        return NULL;
    1.18      }
    1.19  
    1.20 -    fprintf(logfile, "False I/O request ... in-service already: "
    1.21 -            "%x, ptr: %x, port: %"PRIx64", "
    1.22 -            "data: %"PRIx64", count: %"PRIx64", size: %"PRIx64"\n",
    1.23 -            req->state, req->data_is_ptr, req->addr,
    1.24 -            req->data, req->count, req->size);
    1.25 -    return NULL;
    1.26 +    rmb(); /* see IOREQ_READY /then/ read contents of ioreq */
    1.27 +
    1.28 +    req->state = STATE_IOREQ_INPROCESS;
    1.29 +    return req;
    1.30  }
    1.31  
    1.32  //use poll to get the port notification
    1.33 @@ -504,12 +505,19 @@ void cpu_handle_ioreq(void *opaque)
    1.34      if (req) {
    1.35          __handle_ioreq(env, req);
    1.36  
    1.37 -        /* No state change if state = STATE_IORESP_HOOK */
    1.38 -        if (req->state == STATE_IOREQ_INPROCESS) {
    1.39 -            req->state = STATE_IORESP_READY;
    1.40 -            xc_evtchn_notify(xce_handle, ioreq_local_port[send_vcpu]);
    1.41 -        } else
    1.42 +        if (req->state != STATE_IOREQ_INPROCESS) {
    1.43 +            fprintf(logfile, "Badness in I/O request ... not in service?!: "
    1.44 +                    "%x, ptr: %x, port: %"PRIx64", "
    1.45 +                    "data: %"PRIx64", count: %"PRIx64", size: %"PRIx64"\n",
    1.46 +                    req->state, req->data_is_ptr, req->addr,
    1.47 +                    req->data, req->count, req->size);
    1.48              destroy_hvm_domain();
    1.49 +            return;
    1.50 +        }
    1.51 +
    1.52 +        wmb(); /* Update ioreq contents /then/ update state. */
    1.53 +        req->state = STATE_IORESP_READY;
    1.54 +        xc_evtchn_notify(xce_handle, ioreq_local_port[send_vcpu]);
    1.55      }
    1.56  }
    1.57  
     2.1 --- a/xen/arch/x86/hvm/io.c	Fri Nov 10 09:50:35 2006 +0000
     2.2 +++ b/xen/arch/x86/hvm/io.c	Fri Nov 10 10:30:38 2006 +0000
     2.3 @@ -745,6 +745,8 @@ void hvm_io_assist(struct vcpu *v)
     2.4          domain_crash_synchronous();
     2.5      }
     2.6  
     2.7 +    rmb(); /* see IORESP_READY /then/ read contents of ioreq */
     2.8 +
     2.9      p->state = STATE_IOREQ_NONE;
    2.10  
    2.11      if ( p->type == IOREQ_TYPE_PIO )
     3.1 --- a/xen/arch/x86/hvm/platform.c	Fri Nov 10 09:50:35 2006 +0000
     3.2 +++ b/xen/arch/x86/hvm/platform.c	Fri Nov 10 10:30:38 2006 +0000
     3.3 @@ -727,15 +727,20 @@ static void hvm_send_assist_req(struct v
     3.4      ioreq_t *p;
     3.5  
     3.6      p = &get_vio(v->domain, v->vcpu_id)->vp_ioreq;
     3.7 -    if ( unlikely(p->state != STATE_IOREQ_NONE) ) {
     3.8 -        /* This indicates a bug in the device model.  Crash the
     3.9 -           domain. */
    3.10 -        printk("Device model set bad IO state %d.\n", p->state);
    3.11 +    if ( unlikely(p->state != STATE_IOREQ_NONE) )
    3.12 +    {
    3.13 +        /* This indicates a bug in the device model.  Crash the domain. */
    3.14 +        gdprintk(XENLOG_ERR, "Device model set bad IO state %d.\n", p->state);
    3.15          domain_crash(v->domain);
    3.16          return;
    3.17      }
    3.18  
    3.19      prepare_wait_on_xen_event_channel(v->arch.hvm_vcpu.xen_port);
    3.20 +
    3.21 +    /*
    3.22 +     * Following happens /after/ blocking and setting up ioreq contents.
    3.23 +     * prepare_wait_on_xen_event_channel() is an implicit barrier.
    3.24 +     */
    3.25      p->state = STATE_IOREQ_READY;
    3.26      notify_via_xen_event_channel(v->arch.hvm_vcpu.xen_port);
    3.27  }
    3.28 @@ -781,10 +786,12 @@ void send_pio_req(unsigned long port, un
    3.29              p->data = shadow_gva_to_gpa(current, value);
    3.30          else
    3.31              p->data = value; /* guest VA == guest PA */
    3.32 -    } else if ( dir == IOREQ_WRITE )
    3.33 +    }
    3.34 +    else if ( dir == IOREQ_WRITE )
    3.35          p->data = value;
    3.36  
    3.37 -    if ( hvm_portio_intercept(p) ) {
    3.38 +    if ( hvm_portio_intercept(p) )
    3.39 +    {
    3.40          p->state = STATE_IORESP_READY;
    3.41          hvm_io_assist(v);
    3.42          return;
    3.43 @@ -829,15 +836,18 @@ static void send_mmio_req(unsigned char 
    3.44  
    3.45      p->io_count++;
    3.46  
    3.47 -    if (value_is_ptr) {
    3.48 -        if (hvm_paging_enabled(v))
    3.49 +    if ( value_is_ptr )
    3.50 +    {
    3.51 +        if ( hvm_paging_enabled(v) )
    3.52              p->data = shadow_gva_to_gpa(v, value);
    3.53          else
    3.54              p->data = value; /* guest VA == guest PA */
    3.55 -    } else
    3.56 +    }
    3.57 +    else
    3.58          p->data = value;
    3.59  
    3.60 -    if ( hvm_mmio_intercept(p) || hvm_buffered_io_intercept(p) ) {
    3.61 +    if ( hvm_mmio_intercept(p) || hvm_buffered_io_intercept(p) )
    3.62 +    {
    3.63          p->state = STATE_IORESP_READY;
    3.64          hvm_io_assist(v);
    3.65          return;