direct-io.hg

changeset 15388:50358c4b37f4

hvm: Support injection of virtual NMIs and clean up ExtInt handling in general.
Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Wed Jun 20 11:50:16 2007 +0100 (2007-06-20)
parents 739d698986e9
children 07be0266f6d8
files xen/arch/x86/hvm/irq.c xen/arch/x86/hvm/svm/intr.c xen/arch/x86/hvm/svm/svm.c xen/arch/x86/hvm/vioapic.c xen/arch/x86/hvm/vlapic.c xen/arch/x86/hvm/vmx/intr.c xen/arch/x86/hvm/vmx/vmx.c xen/arch/x86/hvm/vpic.c xen/arch/x86/hvm/vpt.c xen/include/asm-x86/event.h xen/include/asm-x86/hvm/hvm.h xen/include/asm-x86/hvm/irq.h xen/include/asm-x86/hvm/vcpu.h xen/include/asm-x86/hvm/vlapic.h xen/include/asm-x86/hvm/vmx/vmx.h xen/include/asm-x86/hvm/vpic.h xen/include/asm-x86/hvm/vpt.h
line diff
     1.1 --- a/xen/arch/x86/hvm/irq.c	Wed Jun 20 10:55:37 2007 +0100
     1.2 +++ b/xen/arch/x86/hvm/irq.c	Wed Jun 20 11:50:16 2007 +0100
     1.3 @@ -285,43 +285,49 @@ void hvm_set_callback_via(struct domain 
     1.4      }
     1.5  }
     1.6  
     1.7 -int cpu_has_pending_irq(struct vcpu *v)
     1.8 +enum hvm_intack hvm_vcpu_has_pending_irq(struct vcpu *v)
     1.9  {
    1.10      struct hvm_domain *plat = &v->domain->arch.hvm_domain;
    1.11  
    1.12 -    /* APIC */
    1.13 -    if ( vlapic_has_interrupt(v) != -1 )
    1.14 -        return 1;
    1.15 +    if ( unlikely(v->arch.hvm_vcpu.nmi_pending) )
    1.16 +        return hvm_intack_nmi;
    1.17  
    1.18 -    /* PIC */
    1.19 +    if ( vlapic_has_interrupt(v) != -1 )
    1.20 +        return hvm_intack_lapic;
    1.21 +
    1.22      if ( !vlapic_accept_pic_intr(v) )
    1.23 -        return 0;
    1.24 +        return hvm_intack_none;
    1.25  
    1.26 -    return plat->vpic[0].int_output;
    1.27 +    return plat->vpic[0].int_output ? hvm_intack_pic : hvm_intack_none;
    1.28  }
    1.29  
    1.30 -int cpu_get_interrupt(struct vcpu *v, int *type)
    1.31 +int hvm_vcpu_ack_pending_irq(struct vcpu *v, enum hvm_intack type, int *vector)
    1.32  {
    1.33 -    int vector;
    1.34 +    switch ( type )
    1.35 +    {
    1.36 +    case hvm_intack_nmi:
    1.37 +        return test_and_clear_bool(v->arch.hvm_vcpu.nmi_pending);
    1.38 +    case hvm_intack_lapic:
    1.39 +        return ((*vector = cpu_get_apic_interrupt(v)) != -1);
    1.40 +    case hvm_intack_pic:
    1.41 +        ASSERT(v->vcpu_id == 0);
    1.42 +        return ((*vector = cpu_get_pic_interrupt(v)) != -1);
    1.43 +    default:
    1.44 +        break;
    1.45 +    }
    1.46  
    1.47 -    if ( (vector = cpu_get_apic_interrupt(v, type)) != -1 )
    1.48 -        return vector;
    1.49 -
    1.50 -    if ( (v->vcpu_id == 0) &&
    1.51 -         ((vector = cpu_get_pic_interrupt(v, type)) != -1) )
    1.52 -        return vector;
    1.53 -
    1.54 -    return -1;
    1.55 +    return 0;
    1.56  }
    1.57  
    1.58 -int get_isa_irq_vector(struct vcpu *v, int isa_irq, int type)
    1.59 +int get_isa_irq_vector(struct vcpu *v, int isa_irq, enum hvm_intack src)
    1.60  {
    1.61      unsigned int gsi = hvm_isa_irq_to_gsi(isa_irq);
    1.62  
    1.63 -    if ( type == APIC_DM_EXTINT )
    1.64 +    if ( src == hvm_intack_pic )
    1.65          return (v->domain->arch.hvm_domain.vpic[isa_irq >> 3].irq_base
    1.66                  + (isa_irq & 7));
    1.67  
    1.68 +    ASSERT(src == hvm_intack_lapic);
    1.69      return domain_vioapic(v->domain)->redirtbl[gsi].fields.vector;
    1.70  }
    1.71  
    1.72 @@ -337,19 +343,20 @@ int is_isa_irq_masked(struct vcpu *v, in
    1.73              domain_vioapic(v->domain)->redirtbl[gsi].fields.mask);
    1.74  }
    1.75  
    1.76 -/*
    1.77 - * TODO: 1. Should not need special treatment of event-channel events.
    1.78 - *       2. Should take notice of interrupt shadows (or clear them).
    1.79 - */
    1.80  int hvm_local_events_need_delivery(struct vcpu *v)
    1.81  {
    1.82 -    int pending;
    1.83 +    enum hvm_intack type;
    1.84  
    1.85 -    pending = (vcpu_info(v, evtchn_upcall_pending) || cpu_has_pending_irq(v));
    1.86 -    if ( unlikely(pending) )
    1.87 -        pending = hvm_interrupts_enabled(v); 
    1.88 +    /* TODO: Get rid of event-channel special case. */
    1.89 +    if ( vcpu_info(v, evtchn_upcall_pending) )
    1.90 +        type = hvm_intack_pic;
    1.91 +    else
    1.92 +        type = hvm_vcpu_has_pending_irq(v);
    1.93  
    1.94 -    return pending;
    1.95 +    if ( likely(type == hvm_intack_none) )
    1.96 +        return 0;
    1.97 +
    1.98 +    return hvm_interrupts_enabled(v, type);
    1.99  }
   1.100  
   1.101  #if 0 /* Keep for debugging */
     2.1 --- a/xen/arch/x86/hvm/svm/intr.c	Wed Jun 20 10:55:37 2007 +0100
     2.2 +++ b/xen/arch/x86/hvm/svm/intr.c	Wed Jun 20 11:50:16 2007 +0100
     2.3 @@ -15,7 +15,6 @@
     2.4   * You should have received a copy of the GNU General Public License along with
     2.5   * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
     2.6   * Place - Suite 330, Boston, MA 02111-1307 USA.
     2.7 - *
     2.8   */
     2.9  
    2.10  #include <xen/config.h>
    2.11 @@ -39,100 +38,119 @@
    2.12  #include <xen/domain_page.h>
    2.13  #include <asm/hvm/trace.h>
    2.14  
    2.15 -/*
    2.16 - * Most of this code is copied from vmx_io.c and modified 
    2.17 - * to be suitable for SVM.
    2.18 - */
    2.19 -
    2.20 -static inline int svm_inject_extint(struct vcpu *v, int trap)
    2.21 +static void svm_inject_dummy_vintr(struct vcpu *v)
    2.22  {
    2.23      struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
    2.24      vintr_t intr = vmcb->vintr;
    2.25  
    2.26 -    /* Update only relevant fields */    
    2.27      intr.fields.irq = 1;
    2.28      intr.fields.intr_masking = 1;
    2.29 -    intr.fields.vector = trap;
    2.30 +    intr.fields.vector = 0;
    2.31      intr.fields.prio = 0xF;
    2.32      intr.fields.ign_tpr = 1;
    2.33      vmcb->vintr = intr;
    2.34 +}
    2.35 +    
    2.36 +static void svm_inject_nmi(struct vcpu *v)
    2.37 +{
    2.38 +    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
    2.39 +    eventinj_t event;
    2.40  
    2.41 -    return 0;
    2.42 +    event.bytes = 0;
    2.43 +    event.fields.v = 1;
    2.44 +    event.fields.type = EVENTTYPE_NMI;
    2.45 +    event.fields.vector = 2;
    2.46 +
    2.47 +    ASSERT(vmcb->eventinj.fields.v == 0);
    2.48 +    vmcb->eventinj = event;
    2.49 +}
    2.50 +    
    2.51 +static void svm_inject_extint(struct vcpu *v, int vector)
    2.52 +{
    2.53 +    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
    2.54 +    eventinj_t event;
    2.55 +
    2.56 +    event.bytes = 0;
    2.57 +    event.fields.v = 1;
    2.58 +    event.fields.type = EVENTTYPE_INTR;
    2.59 +    event.fields.vector = vector;
    2.60 +
    2.61 +    ASSERT(vmcb->eventinj.fields.v == 0);
    2.62 +    vmcb->eventinj = event;
    2.63  }
    2.64      
    2.65  asmlinkage void svm_intr_assist(void) 
    2.66  {
    2.67      struct vcpu *v = current;
    2.68      struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
    2.69 -    int intr_type = APIC_DM_EXTINT;
    2.70 -    int intr_vector = -1;
    2.71 +    enum hvm_intack intr_source;
    2.72 +    int intr_vector;
    2.73  
    2.74      /*
    2.75 -     * Previous Interrupt delivery caused this intercept?
    2.76 +     * Previous event delivery caused this intercept?
    2.77       * This will happen if the injection is latched by the processor (hence
    2.78 -     * clearing vintr.fields.irq) but then subsequently a fault occurs (e.g.,
    2.79 -     * due to lack of shadow mapping of guest IDT or guest-kernel stack).
    2.80 -     * 
    2.81 -     * NB. Exceptions that fault during delivery are lost. This needs to be
    2.82 -     * fixed but we'll usually get away with it since faults are usually
    2.83 -     * idempotent. But this isn't the case for e.g. software interrupts!
    2.84 +     * clearing vintr.fields.irq or eventinj.v) but then subsequently a fault
    2.85 +     * occurs (e.g., due to lack of shadow mapping of guest IDT or guest-kernel
    2.86 +     * stack).
    2.87       */
    2.88 -    if ( vmcb->exitintinfo.fields.v && (vmcb->exitintinfo.fields.type == 0) )
    2.89 +    if ( vmcb->exitintinfo.fields.v )
    2.90      {
    2.91 -        intr_vector = vmcb->exitintinfo.fields.vector;
    2.92 +        vmcb->eventinj = vmcb->exitintinfo;
    2.93          vmcb->exitintinfo.bytes = 0;
    2.94          HVMTRACE_1D(REINJ_VIRQ, v, intr_vector);
    2.95 -        svm_inject_extint(v, intr_vector);
    2.96          return;
    2.97      }
    2.98  
    2.99 -    /*
   2.100 -     * Previous interrupt still pending? This occurs if we return from VMRUN
   2.101 -     * very early in the entry-to-guest process. Usually this is because an
   2.102 -     * external physical interrupt was pending when we executed VMRUN.
   2.103 -     */
   2.104 -    if ( vmcb->vintr.fields.irq )
   2.105 -        return;
   2.106 -
   2.107 -    /* Crank the handle on interrupt state and check for new interrrupts. */
   2.108 +    /* Crank the handle on interrupt state. */
   2.109      pt_update_irq(v);
   2.110      hvm_set_callback_irq_level();
   2.111 -    if ( !cpu_has_pending_irq(v) )
   2.112 -        return;
   2.113  
   2.114 -    /*
   2.115 -     * If the guest can't take an interrupt right now, create a 'fake'
   2.116 -     * virtual interrupt on to intercept as soon as the guest _can_ take
   2.117 -     * interrupts.  Do not obtain the next interrupt from the vlapic/pic
   2.118 -     * if unable to inject.
   2.119 -     *
   2.120 -     * Also do this if there is an exception pending.  This is because
   2.121 -     * the delivery of the exception can arbitrarily delay the injection
   2.122 -     * of the vintr (for example, if the exception is handled via an
   2.123 -     * interrupt gate, hence zeroing RFLAGS.IF). In the meantime:
   2.124 -     * - the vTPR could be modified upwards, so we need to wait until the
   2.125 -     *   exception is delivered before we can safely decide that an
   2.126 -     *   interrupt is deliverable; and
   2.127 -     * - the guest might look at the APIC/PIC state, so we ought not to have 
   2.128 -     *   cleared the interrupt out of the IRR.
   2.129 -     */
   2.130 -    if ( irq_masked(vmcb->rflags) || vmcb->interrupt_shadow 
   2.131 -         || vmcb->eventinj.fields.v )  
   2.132 +    do {
   2.133 +        intr_source = hvm_vcpu_has_pending_irq(v);
   2.134 +        if ( likely(intr_source == hvm_intack_none) )
   2.135 +            return;
   2.136 +
   2.137 +        /*
   2.138 +         * If the guest can't take an interrupt right now, create a 'fake'
   2.139 +         * virtual interrupt on to intercept as soon as the guest _can_ take
   2.140 +         * interrupts.  Do not obtain the next interrupt from the vlapic/pic
   2.141 +         * if unable to inject.
   2.142 +         *
   2.143 +         * Also do this if there is an injection already pending. This is
   2.144 +         * because the event delivery can arbitrarily delay the injection
   2.145 +         * of the vintr (for example, if the exception is handled via an
   2.146 +         * interrupt gate, hence zeroing RFLAGS.IF). In the meantime:
   2.147 +         * - the vTPR could be modified upwards, so we need to wait until the
   2.148 +         *   exception is delivered before we can safely decide that an
   2.149 +         *   interrupt is deliverable; and
   2.150 +         * - the guest might look at the APIC/PIC state, so we ought not to
   2.151 +         *   have cleared the interrupt out of the IRR.
   2.152 +         *
   2.153 +         * TODO: Better NMI handling. We need a way to skip a MOV SS interrupt
   2.154 +         * shadow. This is hard to do without hardware support. We should also
   2.155 +         * track 'NMI blocking' from NMI injection until IRET. This can be done
   2.156 +         * quite easily in software by intercepting the unblocking IRET.
   2.157 +         */
   2.158 +        if ( !hvm_interrupts_enabled(v, intr_source) ||
   2.159 +             vmcb->eventinj.fields.v )
   2.160 +        {
   2.161 +            vmcb->general1_intercepts |= GENERAL1_INTERCEPT_VINTR;
   2.162 +            HVMTRACE_2D(INJ_VIRQ, v, 0x0, /*fake=*/ 1);
   2.163 +            svm_inject_dummy_vintr(v);
   2.164 +            return;
   2.165 +        }
   2.166 +    } while ( !hvm_vcpu_ack_pending_irq(v, intr_source, &intr_vector) );
   2.167 +
   2.168 +    if ( intr_source == hvm_intack_nmi )
   2.169      {
   2.170 -        vmcb->general1_intercepts |= GENERAL1_INTERCEPT_VINTR;
   2.171 -        HVMTRACE_2D(INJ_VIRQ, v, 0x0, /*fake=*/ 1);
   2.172 -        svm_inject_extint(v, 0x0); /* actual vector doesn't matter */
   2.173 -        return;
   2.174 +        svm_inject_nmi(v);
   2.175      }
   2.176 -
   2.177 -    /* Okay, we can deliver the interrupt: grab it and update PIC state. */
   2.178 -    intr_vector = cpu_get_interrupt(v, &intr_type);
   2.179 -    BUG_ON(intr_vector < 0);
   2.180 -
   2.181 -    HVMTRACE_2D(INJ_VIRQ, v, intr_vector, /*fake=*/ 0);
   2.182 -    svm_inject_extint(v, intr_vector);
   2.183 -
   2.184 -    pt_intr_post(v, intr_vector, intr_type);
   2.185 +    else
   2.186 +    {
   2.187 +        HVMTRACE_2D(INJ_VIRQ, v, intr_vector, /*fake=*/ 0);
   2.188 +        svm_inject_extint(v, intr_vector);
   2.189 +        pt_intr_post(v, intr_vector, intr_source);
   2.190 +    }
   2.191  }
   2.192  
   2.193  /*
     3.1 --- a/xen/arch/x86/hvm/svm/svm.c	Wed Jun 20 10:55:37 2007 +0100
     3.2 +++ b/xen/arch/x86/hvm/svm/svm.c	Wed Jun 20 11:50:16 2007 +0100
     3.3 @@ -312,26 +312,8 @@ int svm_vmcb_save(struct vcpu *v, struct
     3.4      c->sysenter_esp = vmcb->sysenter_esp;
     3.5      c->sysenter_eip = vmcb->sysenter_eip;
     3.6  
     3.7 -    /* Save any event/interrupt that was being injected when we last
     3.8 -     * exited.  Although there are three(!) VMCB fields that can contain
     3.9 -     * active events, we only need to save at most one: because the
    3.10 -     * intr_assist logic never delivers an IRQ when any other event is
    3.11 -     * active, we know that the only possible collision is if we inject
    3.12 -     * a fault while exitintinfo contains a valid event (the delivery of
    3.13 -     * which caused the last exit).  In that case replaying just the
    3.14 -     * first event should cause the same behaviour when we restore. */
    3.15 -    if ( vmcb->vintr.fields.irq 
    3.16 -         && /* Check it's not a fake interrupt (see svm_intr_assist()) */
    3.17 -         !(vmcb->general1_intercepts & GENERAL1_INTERCEPT_VINTR) )
    3.18 -    {
    3.19 -        c->pending_vector = vmcb->vintr.fields.vector;
    3.20 -        c->pending_type = 0; /* External interrupt */
    3.21 -        c->pending_error_valid = 0;
    3.22 -        c->pending_reserved = 0;
    3.23 -        c->pending_valid = 1;
    3.24 -        c->error_code = 0;
    3.25 -    }
    3.26 -    else if ( vmcb->exitintinfo.fields.v )
    3.27 +    /* Save any event/interrupt that was being injected when we last exited. */
    3.28 +    if ( vmcb->exitintinfo.fields.v )
    3.29      {
    3.30          c->pending_event = vmcb->exitintinfo.bytes & 0xffffffff;
    3.31          c->error_code = vmcb->exitintinfo.fields.errorcode;
    3.32 @@ -569,10 +551,15 @@ static inline void svm_restore_dr(struct
    3.33          __restore_debug_registers(v);
    3.34  }
    3.35  
    3.36 -static int svm_interrupts_enabled(struct vcpu *v)
    3.37 +static int svm_interrupts_enabled(struct vcpu *v, enum hvm_intack type)
    3.38  {
    3.39 -    unsigned long eflags = v->arch.hvm_svm.vmcb->rflags;
    3.40 -    return !irq_masked(eflags); 
    3.41 +    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
    3.42 +
    3.43 +    if ( type == hvm_intack_nmi )
    3.44 +        return !vmcb->interrupt_shadow;
    3.45 +
    3.46 +    ASSERT((type == hvm_intack_pic) || (type == hvm_intack_lapic));
    3.47 +    return !irq_masked(vmcb->rflags) && !vmcb->interrupt_shadow; 
    3.48  }
    3.49  
    3.50  static int svm_guest_x86_mode(struct vcpu *v)
    3.51 @@ -2160,11 +2147,14 @@ static inline void svm_do_msr_access(
    3.52  
    3.53  static inline void svm_vmexit_do_hlt(struct vmcb_struct *vmcb)
    3.54  {
    3.55 +    enum hvm_intack type = hvm_vcpu_has_pending_irq(current);
    3.56 +
    3.57      __update_guest_eip(vmcb, 1);
    3.58  
    3.59      /* Check for interrupt not handled or new interrupt. */
    3.60 -    if ( (vmcb->rflags & X86_EFLAGS_IF) &&
    3.61 -         (vmcb->vintr.fields.irq || cpu_has_pending_irq(current)) ) {
    3.62 +    if ( vmcb->eventinj.fields.v ||
    3.63 +         ((type != hvm_intack_none) && hvm_interrupts_enabled(current, type)) )
    3.64 +    {
    3.65          HVMTRACE_1D(HLT, current, /*int pending=*/ 1);
    3.66          return;
    3.67      }
     4.1 --- a/xen/arch/x86/hvm/vioapic.c	Wed Jun 20 10:55:37 2007 +0100
     4.2 +++ b/xen/arch/x86/hvm/vioapic.c	Wed Jun 20 11:50:16 2007 +0100
     4.3 @@ -254,17 +254,11 @@ static void ioapic_inj_irq(
     4.4      HVM_DBG_LOG(DBG_LEVEL_IOAPIC, "irq %d trig %d deliv %d",
     4.5                  vector, trig_mode, delivery_mode);
     4.6  
     4.7 -    switch ( delivery_mode )
     4.8 -    {
     4.9 -    case dest_Fixed:
    4.10 -    case dest_LowestPrio:
    4.11 -        if ( vlapic_set_irq(target, vector, trig_mode) )
    4.12 -            vcpu_kick(vlapic_vcpu(target));
    4.13 -        break;
    4.14 -    default:
    4.15 -        gdprintk(XENLOG_WARNING, "error delivery mode %d\n", delivery_mode);
    4.16 -        break;
    4.17 -    }
    4.18 +    ASSERT((delivery_mode == dest_Fixed) ||
    4.19 +           (delivery_mode == dest_LowestPrio));
    4.20 +
    4.21 +    if ( vlapic_set_irq(target, vector, trig_mode) )
    4.22 +        vcpu_kick(vlapic_vcpu(target));
    4.23  }
    4.24  
    4.25  static uint32_t ioapic_get_delivery_bitmask(
    4.26 @@ -368,7 +362,6 @@ static void vioapic_deliver(struct hvm_h
    4.27      }
    4.28  
    4.29      case dest_Fixed:
    4.30 -    case dest_ExtINT:
    4.31      {
    4.32          uint8_t bit;
    4.33          for ( bit = 0; deliver_bitmask != 0; bit++ )
    4.34 @@ -393,10 +386,21 @@ static void vioapic_deliver(struct hvm_h
    4.35          break;
    4.36      }
    4.37  
    4.38 -    case dest_SMI:
    4.39      case dest_NMI:
    4.40 -    case dest_INIT:
    4.41 -    case dest__reserved_2:
    4.42 +    {
    4.43 +        uint8_t bit;
    4.44 +        for ( bit = 0; deliver_bitmask != 0; bit++ )
    4.45 +        {
    4.46 +            if ( !(deliver_bitmask & (1 << bit)) )
    4.47 +                continue;
    4.48 +            deliver_bitmask &= ~(1 << bit);
    4.49 +            if ( ((v = vioapic_domain(vioapic)->vcpu[bit]) != NULL) &&
    4.50 +                 !test_and_set_bool(v->arch.hvm_vcpu.nmi_pending) )
    4.51 +                vcpu_kick(v);
    4.52 +        }
    4.53 +        break;
    4.54 +    }
    4.55 +
    4.56      default:
    4.57          gdprintk(XENLOG_WARNING, "Unsupported delivery mode %d\n",
    4.58                   delivery_mode);
     5.1 --- a/xen/arch/x86/hvm/vlapic.c	Wed Jun 20 10:55:37 2007 +0100
     5.2 +++ b/xen/arch/x86/hvm/vlapic.c	Wed Jun 20 11:50:16 2007 +0100
     5.3 @@ -294,7 +294,8 @@ static int vlapic_accept_irq(struct vcpu
     5.4          break;
     5.5  
     5.6      case APIC_DM_NMI:
     5.7 -        gdprintk(XENLOG_WARNING, "Ignoring guest NMI\n");
     5.8 +        if ( !test_and_set_bool(v->arch.hvm_vcpu.nmi_pending) )
     5.9 +            vcpu_kick(v);
    5.10          break;
    5.11  
    5.12      case APIC_DM_INIT:
    5.13 @@ -747,7 +748,7 @@ int vlapic_has_interrupt(struct vcpu *v)
    5.14      return highest_irr;
    5.15  }
    5.16  
    5.17 -int cpu_get_apic_interrupt(struct vcpu *v, int *mode)
    5.18 +int cpu_get_apic_interrupt(struct vcpu *v)
    5.19  {
    5.20      int vector = vlapic_has_interrupt(v);
    5.21      struct vlapic *vlapic = vcpu_vlapic(v);
    5.22 @@ -757,8 +758,6 @@ int cpu_get_apic_interrupt(struct vcpu *
    5.23   
    5.24      vlapic_set_vector(vector, &vlapic->regs->data[APIC_ISR]);
    5.25      vlapic_clear_irr(vector, vlapic);
    5.26 -
    5.27 -    *mode = APIC_DM_FIXED;
    5.28      return vector;
    5.29  }
    5.30  
     6.1 --- a/xen/arch/x86/hvm/vmx/intr.c	Wed Jun 20 10:55:37 2007 +0100
     6.2 +++ b/xen/arch/x86/hvm/vmx/intr.c	Wed Jun 20 11:50:16 2007 +0100
     6.3 @@ -102,8 +102,8 @@ static void update_tpr_threshold(struct 
     6.4  
     6.5  asmlinkage void vmx_intr_assist(void)
     6.6  {
     6.7 -    int has_ext_irq, intr_vector, intr_type = 0;
     6.8 -    unsigned long eflags, intr_shadow;
     6.9 +    int intr_vector;
    6.10 +    enum hvm_intack intr_source;
    6.11      struct vcpu *v = current;
    6.12      unsigned int idtv_info_field;
    6.13      unsigned long inst_len;
    6.14 @@ -114,65 +114,67 @@ asmlinkage void vmx_intr_assist(void)
    6.15  
    6.16      update_tpr_threshold(vcpu_vlapic(v));
    6.17  
    6.18 -    has_ext_irq = cpu_has_pending_irq(v);
    6.19 +    do {
    6.20 +        intr_source = hvm_vcpu_has_pending_irq(v);
    6.21  
    6.22 -    if ( unlikely(v->arch.hvm_vmx.vector_injected) )
    6.23 -    {
    6.24 -        v->arch.hvm_vmx.vector_injected = 0;
    6.25 -        if ( unlikely(has_ext_irq) )
    6.26 -            enable_irq_window(v);
    6.27 -        return;
    6.28 -    }
    6.29 +        if ( unlikely(v->arch.hvm_vmx.vector_injected) )
    6.30 +        {
    6.31 +            v->arch.hvm_vmx.vector_injected = 0;
    6.32 +            if ( unlikely(intr_source != hvm_intack_none) )
    6.33 +                enable_irq_window(v);
    6.34 +            return;
    6.35 +        }
    6.36  
    6.37 -    /* This could be moved earlier in the VMX resume sequence. */
    6.38 -    idtv_info_field = __vmread(IDT_VECTORING_INFO_FIELD);
    6.39 -    if ( unlikely(idtv_info_field & INTR_INFO_VALID_MASK) )
    6.40 -    {
    6.41 -        __vmwrite(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field);
    6.42 +        /* This could be moved earlier in the VMX resume sequence. */
    6.43 +        idtv_info_field = __vmread(IDT_VECTORING_INFO_FIELD);
    6.44 +        if ( unlikely(idtv_info_field & INTR_INFO_VALID_MASK) )
    6.45 +        {
    6.46 +            __vmwrite(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field);
    6.47 +
    6.48 +            /*
    6.49 +             * Safe: the length will only be interpreted for software
    6.50 +             * exceptions and interrupts. If we get here then delivery of some
    6.51 +             * event caused a fault, and this always results in defined
    6.52 +             * VM_EXIT_INSTRUCTION_LEN.
    6.53 +             */
    6.54 +            inst_len = __vmread(VM_EXIT_INSTRUCTION_LEN); /* Safe */
    6.55 +            __vmwrite(VM_ENTRY_INSTRUCTION_LEN, inst_len);
    6.56 +
    6.57 +            if ( unlikely(idtv_info_field & 0x800) ) /* valid error code */
    6.58 +                __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE,
    6.59 +                          __vmread(IDT_VECTORING_ERROR_CODE));
    6.60 +            if ( unlikely(intr_source != hvm_intack_none) )
    6.61 +                enable_irq_window(v);
    6.62 +
    6.63 +            HVM_DBG_LOG(DBG_LEVEL_1, "idtv_info_field=%x", idtv_info_field);
    6.64 +            return;
    6.65 +        }
    6.66 +
    6.67 +        if ( likely(intr_source == hvm_intack_none) )
    6.68 +            return;
    6.69  
    6.70          /*
    6.71 -         * Safe: the length will only be interpreted for software exceptions
    6.72 -         * and interrupts. If we get here then delivery of some event caused a
    6.73 -         * fault, and this always results in defined VM_EXIT_INSTRUCTION_LEN.
    6.74 +         * TODO: Better NMI handling. Shouldn't wait for EFLAGS.IF==1, but
    6.75 +         * should wait for exit from 'NMI blocking' window (NMI injection to
    6.76 +         * next IRET). This requires us to use the new 'virtual NMI' support.
    6.77           */
    6.78 -        inst_len = __vmread(VM_EXIT_INSTRUCTION_LEN); /* Safe */
    6.79 -        __vmwrite(VM_ENTRY_INSTRUCTION_LEN, inst_len);
    6.80 -
    6.81 -        if ( unlikely(idtv_info_field & 0x800) ) /* valid error code */
    6.82 -            __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE,
    6.83 -                      __vmread(IDT_VECTORING_ERROR_CODE));
    6.84 -        if ( unlikely(has_ext_irq) )
    6.85 +        if ( !hvm_interrupts_enabled(v, intr_source) )
    6.86 +        {
    6.87              enable_irq_window(v);
    6.88 -
    6.89 -        HVM_DBG_LOG(DBG_LEVEL_1, "idtv_info_field=%x", idtv_info_field);
    6.90 -        return;
    6.91 -    }
    6.92 -
    6.93 -    if ( likely(!has_ext_irq) )
    6.94 -        return;
    6.95 +            return;
    6.96 +        }
    6.97 +    } while ( !hvm_vcpu_ack_pending_irq(v, intr_source, &intr_vector) );
    6.98  
    6.99 -    intr_shadow = __vmread(GUEST_INTERRUPTIBILITY_INFO);
   6.100 -    if ( unlikely(intr_shadow & (VMX_INTR_SHADOW_STI|VMX_INTR_SHADOW_MOV_SS)) )
   6.101 -    {
   6.102 -        enable_irq_window(v);
   6.103 -        HVM_DBG_LOG(DBG_LEVEL_1, "interruptibility");
   6.104 -        return;
   6.105 -    }
   6.106 -
   6.107 -    eflags = __vmread(GUEST_RFLAGS);
   6.108 -    if ( irq_masked(eflags) )
   6.109 +    if ( intr_source == hvm_intack_nmi )
   6.110      {
   6.111 -        enable_irq_window(v);
   6.112 -        return;
   6.113 +        vmx_inject_nmi(v);
   6.114      }
   6.115 -
   6.116 -    intr_vector = cpu_get_interrupt(v, &intr_type);
   6.117 -    BUG_ON(intr_vector < 0);
   6.118 -
   6.119 -    HVMTRACE_2D(INJ_VIRQ, v, intr_vector, /*fake=*/ 0);
   6.120 -    vmx_inject_extint(v, intr_vector, VMX_DELIVER_NO_ERROR_CODE);
   6.121 -
   6.122 -    pt_intr_post(v, intr_vector, intr_type);
   6.123 +    else
   6.124 +    {
   6.125 +        HVMTRACE_2D(INJ_VIRQ, v, intr_vector, /*fake=*/ 0);
   6.126 +        vmx_inject_extint(v, intr_vector);
   6.127 +        pt_intr_post(v, intr_vector, intr_source);
   6.128 +    }
   6.129  }
   6.130  
   6.131  /*
     7.1 --- a/xen/arch/x86/hvm/vmx/vmx.c	Wed Jun 20 10:55:37 2007 +0100
     7.2 +++ b/xen/arch/x86/hvm/vmx/vmx.c	Wed Jun 20 11:50:16 2007 +0100
     7.3 @@ -1115,16 +1115,26 @@ static int vmx_nx_enabled(struct vcpu *v
     7.4      return v->arch.hvm_vmx.efer & EFER_NX;
     7.5  }
     7.6  
     7.7 -static int vmx_interrupts_enabled(struct vcpu *v) 
     7.8 +static int vmx_interrupts_enabled(struct vcpu *v, enum hvm_intack type)
     7.9  {
    7.10 -    unsigned long eflags = __vmread(GUEST_RFLAGS); 
    7.11 -    return !irq_masked(eflags); 
    7.12 +    unsigned long intr_shadow, eflags;
    7.13 +
    7.14 +    ASSERT(v == current);
    7.15 +
    7.16 +    intr_shadow  = __vmread(GUEST_INTERRUPTIBILITY_INFO);
    7.17 +    intr_shadow &= VMX_INTR_SHADOW_STI|VMX_INTR_SHADOW_MOV_SS;
    7.18 +
    7.19 +    if ( type == hvm_intack_nmi )
    7.20 +        return !intr_shadow;
    7.21 +
    7.22 +    ASSERT((type == hvm_intack_pic) || (type == hvm_intack_lapic));
    7.23 +    eflags = __vmread(GUEST_RFLAGS);
    7.24 +    return !irq_masked(eflags) && !intr_shadow;
    7.25  }
    7.26  
    7.27 -
    7.28  static void vmx_update_host_cr3(struct vcpu *v)
    7.29  {
    7.30 -    ASSERT( (v == current) || !vcpu_runnable(v) );
    7.31 +    ASSERT((v == current) || !vcpu_runnable(v));
    7.32      vmx_vmcs_enter(v);
    7.33      __vmwrite(HOST_CR3, v->arch.cr3);
    7.34      vmx_vmcs_exit(v);
    7.35 @@ -1132,7 +1142,7 @@ static void vmx_update_host_cr3(struct v
    7.36  
    7.37  static void vmx_update_guest_cr3(struct vcpu *v)
    7.38  {
    7.39 -    ASSERT( (v == current) || !vcpu_runnable(v) );
    7.40 +    ASSERT((v == current) || !vcpu_runnable(v));
    7.41      vmx_vmcs_enter(v);
    7.42      __vmwrite(GUEST_CR3, v->arch.hvm_vcpu.hw_cr3);
    7.43      vmx_vmcs_exit(v);
     8.1 --- a/xen/arch/x86/hvm/vpic.c	Wed Jun 20 10:55:37 2007 +0100
     8.2 +++ b/xen/arch/x86/hvm/vpic.c	Wed Jun 20 11:50:16 2007 +0100
     8.3 @@ -499,7 +499,7 @@ void vpic_irq_negative_edge(struct domai
     8.4          vpic_update_int_output(vpic);
     8.5  }
     8.6  
     8.7 -int cpu_get_pic_interrupt(struct vcpu *v, int *type)
     8.8 +int cpu_get_pic_interrupt(struct vcpu *v)
     8.9  {
    8.10      int irq, vector;
    8.11      struct hvm_hw_vpic *vpic = &v->domain->arch.hvm_domain.vpic[0];
    8.12 @@ -512,6 +512,5 @@ int cpu_get_pic_interrupt(struct vcpu *v
    8.13          return -1;
    8.14  
    8.15      vector = vpic[irq >> 3].irq_base + (irq & 7);
    8.16 -    *type = APIC_DM_EXTINT;
    8.17      return vector;
    8.18  }
     9.1 --- a/xen/arch/x86/hvm/vpt.c	Wed Jun 20 10:55:37 2007 +0100
     9.2 +++ b/xen/arch/x86/hvm/vpt.c	Wed Jun 20 11:50:16 2007 +0100
     9.3 @@ -155,7 +155,8 @@ void pt_update_irq(struct vcpu *v)
     9.4      }
     9.5  }
     9.6  
     9.7 -static struct periodic_time *is_pt_irq(struct vcpu *v, int vector, int type)
     9.8 +static struct periodic_time *is_pt_irq(
     9.9 +    struct vcpu *v, int vector, enum hvm_intack src)
    9.10  {
    9.11      struct list_head *head = &v->arch.hvm_vcpu.tm_list;
    9.12      struct periodic_time *pt;
    9.13 @@ -174,7 +175,7 @@ static struct periodic_time *is_pt_irq(s
    9.14              return pt;
    9.15          }
    9.16  
    9.17 -        vec = get_isa_irq_vector(v, pt->irq, type);
    9.18 +        vec = get_isa_irq_vector(v, pt->irq, src);
    9.19  
    9.20          /* RTC irq need special care */
    9.21          if ( (vector != vec) || (pt->irq == 8 && !is_rtc_periodic_irq(rtc)) )
    9.22 @@ -186,7 +187,7 @@ static struct periodic_time *is_pt_irq(s
    9.23      return NULL;
    9.24  }
    9.25  
    9.26 -void pt_intr_post(struct vcpu *v, int vector, int type)
    9.27 +void pt_intr_post(struct vcpu *v, int vector, enum hvm_intack src)
    9.28  {
    9.29      struct periodic_time *pt;
    9.30      time_cb *cb;
    9.31 @@ -194,7 +195,7 @@ void pt_intr_post(struct vcpu *v, int ve
    9.32  
    9.33      spin_lock(&v->arch.hvm_vcpu.tm_lock);
    9.34  
    9.35 -    pt = is_pt_irq(v, vector, type);
    9.36 +    pt = is_pt_irq(v, vector, src);
    9.37      if ( pt == NULL )
    9.38      {
    9.39          spin_unlock(&v->arch.hvm_vcpu.tm_lock);
    10.1 --- a/xen/include/asm-x86/event.h	Wed Jun 20 10:55:37 2007 +0100
    10.2 +++ b/xen/include/asm-x86/event.h	Wed Jun 20 11:50:16 2007 +0100
    10.3 @@ -10,7 +10,6 @@
    10.4  #define __ASM_EVENT_H__
    10.5  
    10.6  #include <xen/shared.h>
    10.7 -#include <asm/hvm/irq.h> /* cpu_has_pending_irq() */
    10.8  
    10.9  static inline void vcpu_kick(struct vcpu *v)
   10.10  {
    11.1 --- a/xen/include/asm-x86/hvm/hvm.h	Wed Jun 20 10:55:37 2007 +0100
    11.2 +++ b/xen/include/asm-x86/hvm/hvm.h	Wed Jun 20 11:50:16 2007 +0100
    11.3 @@ -55,6 +55,14 @@ typedef struct segment_register {
    11.4      u64        base;
    11.5  } __attribute__ ((packed)) segment_register_t;
    11.6  
    11.7 +/* Interrupt acknowledgement sources. */
    11.8 +enum hvm_intack {
    11.9 +    hvm_intack_none,
   11.10 +    hvm_intack_pic,
   11.11 +    hvm_intack_lapic,
   11.12 +    hvm_intack_nmi
   11.13 +};
   11.14 +
   11.15  /*
   11.16   * The hardware virtual machine (HVM) interface abstracts away from the
   11.17   * x86/x86_64 CPU virtualization assist specifics. Currently this interface
   11.18 @@ -106,7 +114,7 @@ struct hvm_function_table {
   11.19      int (*long_mode_enabled)(struct vcpu *v);
   11.20      int (*pae_enabled)(struct vcpu *v);
   11.21      int (*nx_enabled)(struct vcpu *v);
   11.22 -    int (*interrupts_enabled)(struct vcpu *v);
   11.23 +    int (*interrupts_enabled)(struct vcpu *v, enum hvm_intack);
   11.24      int (*guest_x86_mode)(struct vcpu *v);
   11.25      unsigned long (*get_guest_ctrl_reg)(struct vcpu *v, unsigned int num);
   11.26      unsigned long (*get_segment_base)(struct vcpu *v, enum x86_segment seg);
   11.27 @@ -199,16 +207,16 @@ hvm_long_mode_enabled(struct vcpu *v)
   11.28  #define hvm_long_mode_enabled(v) (v,0)
   11.29  #endif
   11.30  
   11.31 - static inline int
   11.32 +static inline int
   11.33  hvm_pae_enabled(struct vcpu *v)
   11.34  {
   11.35      return hvm_funcs.pae_enabled(v);
   11.36  }
   11.37  
   11.38  static inline int
   11.39 -hvm_interrupts_enabled(struct vcpu *v)
   11.40 +hvm_interrupts_enabled(struct vcpu *v, enum hvm_intack type)
   11.41  {
   11.42 -    return hvm_funcs.interrupts_enabled(v);
   11.43 +    return hvm_funcs.interrupts_enabled(v, type);
   11.44  }
   11.45  
   11.46  static inline int
    12.1 --- a/xen/include/asm-x86/hvm/irq.h	Wed Jun 20 10:55:37 2007 +0100
    12.2 +++ b/xen/include/asm-x86/hvm/irq.h	Wed Jun 20 11:50:16 2007 +0100
    12.3 @@ -24,11 +24,11 @@
    12.4  
    12.5  #include <xen/types.h>
    12.6  #include <xen/spinlock.h>
    12.7 +#include <asm/hvm/hvm.h>
    12.8  #include <asm/hvm/vpic.h>
    12.9  #include <asm/hvm/vioapic.h>
   12.10  #include <public/hvm/save.h>
   12.11  
   12.12 -
   12.13  struct hvm_irq {
   12.14      /*
   12.15       * Virtual interrupt wires for a single PCI bus.
   12.16 @@ -58,7 +58,6 @@ struct hvm_irq {
   12.17              HVMIRQ_callback_gsi,
   12.18              HVMIRQ_callback_pci_intx
   12.19          } callback_via_type;
   12.20 -        uint32_t pad; /* So the next field will be aligned */
   12.21      };
   12.22      union {
   12.23          uint32_t gsi;
   12.24 @@ -115,9 +114,12 @@ void hvm_set_pci_link_route(struct domai
   12.25  void hvm_set_callback_irq_level(void);
   12.26  void hvm_set_callback_via(struct domain *d, uint64_t via);
   12.27  
   12.28 -int cpu_get_interrupt(struct vcpu *v, int *type);
   12.29 -int cpu_has_pending_irq(struct vcpu *v);
   12.30 -int get_isa_irq_vector(struct vcpu *vcpu, int irq, int type);
   12.31 +/* Check/Acknowledge next pending interrupt. */
   12.32 +enum hvm_intack hvm_vcpu_has_pending_irq(struct vcpu *v);
   12.33 +int hvm_vcpu_ack_pending_irq(
   12.34 +    struct vcpu *v, enum hvm_intack type, int *vector);
   12.35 +
   12.36 +int get_isa_irq_vector(struct vcpu *vcpu, int irq, enum hvm_intack src);
   12.37  int is_isa_irq_masked(struct vcpu *v, int isa_irq);
   12.38  
   12.39  #endif /* __ASM_X86_HVM_IRQ_H__ */
    13.1 --- a/xen/include/asm-x86/hvm/vcpu.h	Wed Jun 20 10:55:37 2007 +0100
    13.2 +++ b/xen/include/asm-x86/hvm/vcpu.h	Wed Jun 20 11:50:16 2007 +0100
    13.3 @@ -30,12 +30,14 @@
    13.4  
    13.5  struct hvm_vcpu {
    13.6      unsigned long       hw_cr3;     /* value we give to HW to use */
    13.7 -    unsigned long       ioflags;
    13.8      struct hvm_io_op    io_op;
    13.9      struct vlapic       vlapic;
   13.10      s64                 cache_tsc_offset;
   13.11      u64                 guest_time;
   13.12  
   13.13 +    /* Is an NMI pending for delivery to this VCPU core? */
   13.14 +    bool_t              nmi_pending; /* NB. integrate flag with save/restore */
   13.15 +
   13.16      /* Lock and list for virtual platform timers. */
   13.17      spinlock_t          tm_lock;
   13.18      struct list_head    tm_list;
    14.1 --- a/xen/include/asm-x86/hvm/vlapic.h	Wed Jun 20 10:55:37 2007 +0100
    14.2 +++ b/xen/include/asm-x86/hvm/vlapic.h	Wed Jun 20 11:50:16 2007 +0100
    14.3 @@ -76,7 +76,7 @@ int vlapic_set_irq(struct vlapic *vlapic
    14.4  int vlapic_find_highest_irr(struct vlapic *vlapic);
    14.5  
    14.6  int vlapic_has_interrupt(struct vcpu *v);
    14.7 -int cpu_get_apic_interrupt(struct vcpu *v, int *mode);
    14.8 +int cpu_get_apic_interrupt(struct vcpu *v);
    14.9  
   14.10  int  vlapic_init(struct vcpu *v);
   14.11  void vlapic_destroy(struct vcpu *v);
    15.1 --- a/xen/include/asm-x86/hvm/vmx/vmx.h	Wed Jun 20 10:55:37 2007 +0100
    15.2 +++ b/xen/include/asm-x86/hvm/vmx/vmx.h	Wed Jun 20 11:50:16 2007 +0100
    15.3 @@ -336,9 +336,16 @@ static inline void vmx_inject_sw_excepti
    15.4                             instruction_len);
    15.5  }
    15.6  
    15.7 -static inline void vmx_inject_extint(struct vcpu *v, int trap, int error_code)
    15.8 +static inline void vmx_inject_extint(struct vcpu *v, int trap)
    15.9  {
   15.10 -    __vmx_inject_exception(v, trap, INTR_TYPE_EXT_INTR, error_code, 0);
   15.11 +    __vmx_inject_exception(v, trap, INTR_TYPE_EXT_INTR,
   15.12 +                           VMX_DELIVER_NO_ERROR_CODE, 0);
   15.13 +}
   15.14 +
   15.15 +static inline void vmx_inject_nmi(struct vcpu *v)
   15.16 +{
   15.17 +    __vmx_inject_exception(v, 2, INTR_TYPE_NMI,
   15.18 +                           VMX_DELIVER_NO_ERROR_CODE, 0);
   15.19  }
   15.20  
   15.21  #endif /* __ASM_X86_HVM_VMX_VMX_H__ */
    16.1 --- a/xen/include/asm-x86/hvm/vpic.h	Wed Jun 20 10:55:37 2007 +0100
    16.2 +++ b/xen/include/asm-x86/hvm/vpic.h	Wed Jun 20 11:50:16 2007 +0100
    16.3 @@ -32,7 +32,7 @@
    16.4  void vpic_irq_positive_edge(struct domain *d, int irq);
    16.5  void vpic_irq_negative_edge(struct domain *d, int irq);
    16.6  void vpic_init(struct domain *d);
    16.7 -int cpu_get_pic_interrupt(struct vcpu *v, int *type);
    16.8 +int cpu_get_pic_interrupt(struct vcpu *v);
    16.9  int is_periodic_irq(struct vcpu *v, int irq, int type);
   16.10  
   16.11  #endif  /* __ASM_X86_HVM_VPIC_H__ */  
    17.1 --- a/xen/include/asm-x86/hvm/vpt.h	Wed Jun 20 10:55:37 2007 +0100
    17.2 +++ b/xen/include/asm-x86/hvm/vpt.h	Wed Jun 20 11:50:16 2007 +0100
    17.3 @@ -29,6 +29,7 @@
    17.4  #include <xen/timer.h>
    17.5  #include <xen/list.h>
    17.6  #include <asm/hvm/vpic.h>
    17.7 +#include <asm/hvm/irq.h>
    17.8  #include <public/hvm/save.h>
    17.9  
   17.10  struct HPETState;
   17.11 @@ -119,7 +120,7 @@ struct pl_time {    /* platform time */
   17.12  void pt_freeze_time(struct vcpu *v);
   17.13  void pt_thaw_time(struct vcpu *v);
   17.14  void pt_update_irq(struct vcpu *v);
   17.15 -void pt_intr_post(struct vcpu *v, int vector, int type);
   17.16 +void pt_intr_post(struct vcpu *v, int vector, enum hvm_intack src);
   17.17  void pt_reset(struct vcpu *v);
   17.18  void pt_migrate(struct vcpu *v);
   17.19  void create_periodic_time(