ia64/xen-unstable

changeset 9714:bb0dc0ae23bb

Fix Xen's interrupt acknowledgement routines on certain
(apparently broken) IO-APIC hardware:
1. Do not mask/unmask the IO-APIC pin during normal ISR
processing. This seems to have really bizarre side effects
on some chipsets.
2. Since we instead tickle the local APIC in the ->end
irq hook function, it *must* run on the CPU that
received the interrupt. Therefore we track which CPUs
need to do final acknowledgement and IPI them if
necessary to do so.

Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Fri Apr 14 12:01:15 2006 +0100 (2006-04-14)
parents 2ccaa3879417
children 9a273aabb839
files xen/arch/x86/io_apic.c xen/arch/x86/irq.c xen/arch/x86/physdev.c
line diff
     1.1 --- a/xen/arch/x86/io_apic.c	Fri Apr 14 11:58:49 2006 +0100
     1.2 +++ b/xen/arch/x86/io_apic.c	Fri Apr 14 12:01:15 2006 +0100
     1.3 @@ -190,16 +190,16 @@ static void __unmask_IO_APIC_irq (unsign
     1.4      __modify_IO_APIC_irq(irq, 0, 0x00010000);
     1.5  }
     1.6  
     1.7 -/* trigger = 0 */
     1.8 -static void __edge_IO_APIC_irq (unsigned int irq)
     1.9 +/* mask = 1, trigger = 0 */
    1.10 +static void __mask_and_edge_IO_APIC_irq (unsigned int irq)
    1.11  {
    1.12 -    __modify_IO_APIC_irq(irq, 0, 0x00008000);
    1.13 +    __modify_IO_APIC_irq(irq, 0x00010000, 0x00008000);
    1.14  }
    1.15  
    1.16 -/* trigger = 1 */
    1.17 -static void __level_IO_APIC_irq (unsigned int irq)
    1.18 +/* mask = 0, trigger = 1 */
    1.19 +static void __unmask_and_level_IO_APIC_irq (unsigned int irq)
    1.20  {
    1.21 -    __modify_IO_APIC_irq(irq, 0x00008000, 0);
    1.22 +    __modify_IO_APIC_irq(irq, 0x00008000, 0x00010000);
    1.23  }
    1.24  
    1.25  static void mask_IO_APIC_irq (unsigned int irq)
    1.26 @@ -1323,10 +1323,13 @@ static unsigned int startup_level_ioapic
    1.27  
    1.28  static void mask_and_ack_level_ioapic_irq (unsigned int irq)
    1.29  {
    1.30 +}
    1.31 +
    1.32 +static void end_level_ioapic_irq (unsigned int irq)
    1.33 +{
    1.34      unsigned long v;
    1.35      int i;
    1.36  
    1.37 -    mask_IO_APIC_irq(irq);
    1.38  /*
    1.39   * It appears there is an erratum which affects at least version 0x11
    1.40   * of I/O APIC (that's the 82093AA and cores integrated into various
    1.41 @@ -1355,17 +1358,12 @@ static void mask_and_ack_level_ioapic_ir
    1.42      if (!(v & (1 << (i & 0x1f)))) {
    1.43          atomic_inc(&irq_mis_count);
    1.44          spin_lock(&ioapic_lock);
    1.45 -        __edge_IO_APIC_irq(irq);
    1.46 -        __level_IO_APIC_irq(irq);
    1.47 +        __mask_and_edge_IO_APIC_irq(irq);
    1.48 +        __unmask_and_level_IO_APIC_irq(irq);
    1.49          spin_unlock(&ioapic_lock);
    1.50      }
    1.51  }
    1.52  
    1.53 -static void end_level_ioapic_irq (unsigned int irq)
    1.54 -{
    1.55 -    unmask_IO_APIC_irq(irq);
    1.56 -}
    1.57 -
    1.58  static unsigned int startup_edge_ioapic_vector(unsigned int vector)
    1.59  {
    1.60      int irq = vector_to_irq(vector);
     2.1 --- a/xen/arch/x86/irq.c	Fri Apr 14 11:58:49 2006 +0100
     2.2 +++ b/xen/arch/x86/irq.c	Fri Apr 14 12:01:15 2006 +0100
     2.3 @@ -148,6 +148,11 @@ typedef struct {
     2.4      u8 nr_guests;
     2.5      u8 in_flight;
     2.6      u8 shareable;
     2.7 +    u8 ack_type;
     2.8 +#define ACKTYPE_NONE   0 /* Final ACK is not required */
     2.9 +#define ACKTYPE_SINGLE 1 /* Final ACK on any CPU */
    2.10 +#define ACKTYPE_MULTI  2 /* Final ACK on the CPU that was interrupted */
    2.11 +    cpumask_t cpu_ack_map;
    2.12      struct domain *guest[IRQ_MAX_GUESTS];
    2.13  } irq_guest_action_t;
    2.14  
    2.15 @@ -159,37 +164,111 @@ static void __do_IRQ_guest(int vector)
    2.16      struct domain      *d;
    2.17      int                 i;
    2.18  
    2.19 +    if ( unlikely(action->nr_guests == 0) )
    2.20 +    {
    2.21 +        /* An interrupt may slip through while freeing an ACKTYPE_MULTI irq. */
    2.22 +        ASSERT(action->ack_type == ACKTYPE_MULTI);
    2.23 +        desc->handler->end(vector);
    2.24 +        return;
    2.25 +    }
    2.26 +
    2.27 +    if ( action->ack_type == ACKTYPE_MULTI )
    2.28 +        cpu_set(smp_processor_id(), action->cpu_ack_map);
    2.29 +
    2.30      for ( i = 0; i < action->nr_guests; i++ )
    2.31      {
    2.32          d = action->guest[i];
    2.33 -        if ( !test_and_set_bit(irq, &d->pirq_mask) )
    2.34 +        if ( (action->ack_type != ACKTYPE_NONE) &&
    2.35 +             !test_and_set_bit(irq, &d->pirq_mask) )
    2.36              action->in_flight++;
    2.37          send_guest_pirq(d, irq);
    2.38      }
    2.39  }
    2.40  
    2.41 +static void end_guest_irq(void *data)
    2.42 +{
    2.43 +    irq_desc_t         *desc = data;
    2.44 +    irq_guest_action_t *action = (irq_guest_action_t *)desc->action;
    2.45 +    unsigned long       flags;
    2.46 +
    2.47 +    spin_lock_irqsave(&desc->lock, flags);
    2.48 +    if ( (desc->status & IRQ_GUEST) &&
    2.49 +         (action->in_flight == 0) &&
    2.50 +         test_and_clear_bit(smp_processor_id(), &action->cpu_ack_map) )
    2.51 +        desc->handler->end(desc - irq_desc);
    2.52 +    spin_unlock_irqrestore(&desc->lock, flags);    
    2.53 +}
    2.54 +
    2.55  int pirq_guest_unmask(struct domain *d)
    2.56  {
    2.57 -    irq_desc_t    *desc;
    2.58 -    unsigned int   pirq;
    2.59 -    shared_info_t *s = d->shared_info;
    2.60 +    irq_desc_t         *desc;
    2.61 +    irq_guest_action_t *action;
    2.62 +    cpumask_t           cpu_ack_map = CPU_MASK_NONE;
    2.63 +    unsigned int        pirq, cpu = smp_processor_id();
    2.64 +    shared_info_t      *s = d->shared_info;
    2.65  
    2.66      for ( pirq = find_first_bit(d->pirq_mask, NR_PIRQS);
    2.67            pirq < NR_PIRQS;
    2.68            pirq = find_next_bit(d->pirq_mask, NR_PIRQS, pirq+1) )
    2.69      {
    2.70 -        desc = &irq_desc[irq_to_vector(pirq)];
    2.71 +        desc   = &irq_desc[irq_to_vector(pirq)];
    2.72 +        action = (irq_guest_action_t *)desc->action;
    2.73 +
    2.74          spin_lock_irq(&desc->lock);
    2.75          if ( !test_bit(d->pirq_to_evtchn[pirq], &s->evtchn_mask[0]) &&
    2.76 -             test_and_clear_bit(pirq, &d->pirq_mask) &&
    2.77 -             (--((irq_guest_action_t *)desc->action)->in_flight == 0) )
    2.78 -            desc->handler->end(irq_to_vector(pirq));
    2.79 +             test_and_clear_bit(pirq, &d->pirq_mask) )
    2.80 +        {
    2.81 +            ASSERT(action->ack_type != ACKTYPE_NONE);
    2.82 +            if ( --action->in_flight == 0 )
    2.83 +            {
    2.84 +                if ( (action->ack_type == ACKTYPE_SINGLE) ||
    2.85 +                     test_and_clear_bit(cpu, &action->cpu_ack_map) )
    2.86 +                    desc->handler->end(irq_to_vector(pirq));
    2.87 +                cpu_ack_map = action->cpu_ack_map;
    2.88 +            }
    2.89 +        }
    2.90          spin_unlock_irq(&desc->lock);
    2.91 +
    2.92 +        if ( !cpus_empty(cpu_ack_map) )
    2.93 +        {
    2.94 +            on_selected_cpus(cpu_ack_map, end_guest_irq, desc, 1, 0);
    2.95 +            cpu_ack_map = CPU_MASK_NONE;
    2.96 +        }
    2.97      }
    2.98  
    2.99      return 0;
   2.100  }
   2.101  
   2.102 +int pirq_acktype(int irq)
   2.103 +{
   2.104 +    irq_desc_t  *desc;
   2.105 +    unsigned int vector;
   2.106 +
   2.107 +    vector = irq_to_vector(irq);
   2.108 +    if ( vector == 0 )
   2.109 +        return ACKTYPE_NONE;
   2.110 +
   2.111 +    desc = &irq_desc[vector];
   2.112 +
   2.113 +    /*
   2.114 +     * Edge-triggered IO-APIC interrupts need no final acknowledgement:
   2.115 +     * we ACK early during interrupt processing.
   2.116 +     */
   2.117 +    if ( !strcmp(desc->handler->typename, "IO-APIC-edge") )
   2.118 +        return ACKTYPE_NONE;
   2.119 +
   2.120 +    /* Legacy PIC interrupts can be acknowledged from any CPU. */
   2.121 +    if ( !strcmp(desc->handler->typename, "XT-PIC") )
   2.122 +        return ACKTYPE_SINGLE;
   2.123 +
   2.124 +    /*
   2.125 +     * By default assume that an interrupt must be finally acknowledged on
   2.126 +     * the CPU on which it was received. This is true for level-triggered
   2.127 +     * IO-APIC interrupts, for example, where we tickle the LAPIC to EOI.
   2.128 +     */
   2.129 +    return ACKTYPE_MULTI;
   2.130 +}
   2.131 +
   2.132  int pirq_guest_bind(struct vcpu *v, int irq, int will_share)
   2.133  {
   2.134      unsigned int        vector;
   2.135 @@ -230,10 +309,12 @@ int pirq_guest_bind(struct vcpu *v, int 
   2.136              goto out;
   2.137          }
   2.138  
   2.139 -        action->nr_guests = 0;
   2.140 -        action->in_flight = 0;
   2.141 -        action->shareable = will_share;
   2.142 -        
   2.143 +        action->nr_guests   = 0;
   2.144 +        action->in_flight   = 0;
   2.145 +        action->shareable   = will_share;
   2.146 +        action->ack_type    = pirq_acktype(irq);
   2.147 +        action->cpu_ack_map = CPU_MASK_NONE;
   2.148 +
   2.149          desc->depth = 0;
   2.150          desc->status |= IRQ_GUEST;
   2.151          desc->status &= ~IRQ_DISABLED;
   2.152 @@ -271,6 +352,7 @@ int pirq_guest_unbind(struct domain *d, 
   2.153      unsigned int        vector = irq_to_vector(irq);
   2.154      irq_desc_t         *desc = &irq_desc[vector];
   2.155      irq_guest_action_t *action;
   2.156 +    cpumask_t           cpu_ack_map;
   2.157      unsigned long       flags;
   2.158      int                 i;
   2.159  
   2.160 @@ -280,28 +362,60 @@ int pirq_guest_unbind(struct domain *d, 
   2.161  
   2.162      action = (irq_guest_action_t *)desc->action;
   2.163  
   2.164 -    if ( test_and_clear_bit(irq, &d->pirq_mask) &&
   2.165 -         (--action->in_flight == 0) )
   2.166 -        desc->handler->end(vector);
   2.167 +    i = 0;
   2.168 +    while ( action->guest[i] && (action->guest[i] != d) )
   2.169 +        i++;
   2.170 +    memmove(&action->guest[i], &action->guest[i+1], IRQ_MAX_GUESTS-i-1);
   2.171 +    action->nr_guests--;
   2.172  
   2.173 -    if ( action->nr_guests == 1 )
   2.174 +    switch ( action->ack_type )
   2.175      {
   2.176 -        desc->action = NULL;
   2.177 -        xfree(action);
   2.178 -        desc->depth   = 1;
   2.179 -        desc->status |= IRQ_DISABLED;
   2.180 -        desc->status &= ~IRQ_GUEST;
   2.181 -        desc->handler->shutdown(vector);
   2.182 -    }
   2.183 -    else
   2.184 -    {
   2.185 -        i = 0;
   2.186 -        while ( action->guest[i] && (action->guest[i] != d) )
   2.187 -            i++;
   2.188 -        memmove(&action->guest[i], &action->guest[i+1], IRQ_MAX_GUESTS-i-1);
   2.189 -        action->nr_guests--;
   2.190 +    case ACKTYPE_SINGLE:
   2.191 +        if ( test_and_clear_bit(irq, &d->pirq_mask) &&
   2.192 +             (--action->in_flight == 0) )
   2.193 +            desc->handler->end(vector);
   2.194 +        break;
   2.195 +    case ACKTYPE_MULTI:
   2.196 +        if ( test_and_clear_bit(irq, &d->pirq_mask) )
   2.197 +            --action->in_flight;
   2.198 +        while ( action->in_flight == 0 )
   2.199 +        {
   2.200 +            /* We cannot release guest info until all pending ACKs are done. */
   2.201 +            cpu_ack_map = action->cpu_ack_map;
   2.202 +            if ( cpus_empty(cpu_ack_map) )
   2.203 +                break;
   2.204 +
   2.205 +            /* We cannot hold the lock while interrupting other CPUs. */
   2.206 +            spin_unlock_irqrestore(&desc->lock, flags);    
   2.207 +            on_selected_cpus(cpu_ack_map, end_guest_irq, desc, 1, 1);
   2.208 +            spin_lock_irqsave(&desc->lock, flags);
   2.209 +
   2.210 +            /* The world can change while we do not hold the lock. */
   2.211 +            if ( !(desc->status & IRQ_GUEST) )
   2.212 +                goto out;
   2.213 +            if ( (action->ack_type != ACKTYPE_MULTI) ||
   2.214 +                 (action->nr_guests != 0) )
   2.215 +                break;
   2.216 +        }
   2.217 +        break;
   2.218      }
   2.219  
   2.220 +    BUG_ON(test_bit(irq, &d->pirq_mask));
   2.221 +
   2.222 +    if ( action->nr_guests != 0 )
   2.223 +        goto out;
   2.224 +
   2.225 +    BUG_ON(action->in_flight != 0);
   2.226 +    BUG_ON(!cpus_empty(action->cpu_ack_map));
   2.227 +
   2.228 +    desc->action = NULL;
   2.229 +    xfree(action);
   2.230 +    desc->depth   = 1;
   2.231 +    desc->status |= IRQ_DISABLED;
   2.232 +    desc->status &= ~IRQ_GUEST;
   2.233 +    desc->handler->shutdown(vector);
   2.234 +
   2.235 + out:
   2.236      spin_unlock_irqrestore(&desc->lock, flags);    
   2.237      return 0;
   2.238  }
     3.1 --- a/xen/arch/x86/physdev.c	Fri Apr 14 11:58:49 2006 +0100
     3.2 +++ b/xen/arch/x86/physdev.c	Fri Apr 14 12:01:15 2006 +0100
     3.3 @@ -18,6 +18,9 @@ ioapic_guest_read(
     3.4  extern int
     3.5  ioapic_guest_write(
     3.6      unsigned long physbase, unsigned int reg, u32 pval);
     3.7 +extern int
     3.8 +pirq_acktype(
     3.9 +    int irq);
    3.10  
    3.11  /*
    3.12   * Demuxing hypercall.
    3.13 @@ -43,8 +46,7 @@ long do_physdev_op(GUEST_HANDLE(physdev_
    3.14          if ( (irq < 0) || (irq >= NR_IRQS) )
    3.15              break;
    3.16          op.u.irq_status_query.flags = 0;
    3.17 -        /* Edge-triggered interrupts don't need an explicit unmask downcall. */
    3.18 -        if ( !strstr(irq_desc[irq_to_vector(irq)].handler->typename, "edge") )
    3.19 +        if ( pirq_acktype(irq) != 0 )
    3.20              op.u.irq_status_query.flags |= PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY;
    3.21          ret = 0;
    3.22          break;