ia64/xen-unstable

changeset 9639:f84a333d8aa6

Robustify and add tracing to the IO-APIC update hypercall.
If this patch, and any others that follow it, fix some of the
prblems that various users have been seeing then they may
be good candidates for backporting to 3.0.2 (assuming no
regressions for other users).

Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Fri Apr 07 18:41:28 2006 +0100 (2006-04-07)
parents 05db1d61e031
children 12621916d820
files xen/arch/x86/io_apic.c
line diff
     1.1 --- a/xen/arch/x86/io_apic.c	Fri Apr 07 16:15:44 2006 +0100
     1.2 +++ b/xen/arch/x86/io_apic.c	Fri Apr 07 18:41:28 2006 +0100
     1.3 @@ -75,6 +75,7 @@ int disable_timer_pin_1 __initdata;
     1.4  static struct irq_pin_list {
     1.5      int apic, pin, next;
     1.6  } irq_2_pin[PIN_MAP_SIZE];
     1.7 +static int irq_2_pin_free_entry = NR_IRQS;
     1.8  
     1.9  int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1};
    1.10  
    1.11 @@ -85,22 +86,54 @@ int vector_irq[NR_VECTORS] __read_mostly
    1.12   */
    1.13  static void add_pin_to_irq(unsigned int irq, int apic, int pin)
    1.14  {
    1.15 -    static int first_free_entry = NR_IRQS;
    1.16      struct irq_pin_list *entry = irq_2_pin + irq;
    1.17  
    1.18 -    while (entry->next)
    1.19 +    while (entry->next) {
    1.20 +        BUG_ON((entry->apic == apic) && (entry->pin == pin));
    1.21          entry = irq_2_pin + entry->next;
    1.22 +    }
    1.23 +
    1.24 +    BUG_ON((entry->apic == apic) && (entry->pin == pin));
    1.25  
    1.26      if (entry->pin != -1) {
    1.27 -        entry->next = first_free_entry;
    1.28 +        if (irq_2_pin_free_entry >= PIN_MAP_SIZE)
    1.29 +            panic("io_apic.c: whoops");
    1.30 +        entry->next = irq_2_pin_free_entry;
    1.31          entry = irq_2_pin + entry->next;
    1.32 -        if (++first_free_entry >= PIN_MAP_SIZE)
    1.33 -            panic("io_apic.c: whoops");
    1.34 +        irq_2_pin_free_entry = entry->next;
    1.35 +        entry->next = 0;
    1.36      }
    1.37      entry->apic = apic;
    1.38      entry->pin = pin;
    1.39  }
    1.40  
    1.41 +static void remove_pin_at_irq(unsigned int irq, int apic, int pin)
    1.42 +{
    1.43 +    struct irq_pin_list *entry, *prev;
    1.44 +    int idx;
    1.45 +
    1.46 +    for (entry = &irq_2_pin[irq]; ; entry = &irq_2_pin[entry->next]) {
    1.47 +        if ((entry->apic == apic) && (entry->pin == pin))
    1.48 +            break;
    1.49 +        if (!entry->next)
    1.50 +            BUG();
    1.51 +    }
    1.52 +
    1.53 +    entry->pin  = -1;
    1.54 +    entry->apic = -1;
    1.55 +    
    1.56 +    idx = entry - irq_2_pin;
    1.57 +    if (idx >= NR_IRQS) {
    1.58 +        while (prev->next != idx)
    1.59 +            prev = &irq_2_pin[prev->next];
    1.60 +        prev->next = entry->next;
    1.61 +        entry->next = irq_2_pin_free_entry;
    1.62 +        irq_2_pin_free_entry = idx;
    1.63 +    } else {
    1.64 +        entry->next = 0;
    1.65 +    }
    1.66 +}
    1.67 +
    1.68  /*
    1.69   * Reroute an IRQ to a different pin.
    1.70   */
    1.71 @@ -959,6 +992,10 @@ static void __init enable_IO_APIC(void)
    1.72          irq_2_pin[i].next = 0;
    1.73      }
    1.74  
    1.75 +    /* Initialise dynamic irq_2_pin free list. */
    1.76 +    for (i = NR_IRQS; i < PIN_MAP_SIZE; i++)
    1.77 +        irq_2_pin[i].next = i + 1;
    1.78 +
    1.79      /*
    1.80       * The number of IO-APIC IRQ registers (== #pins):
    1.81       */
    1.82 @@ -1854,11 +1891,18 @@ int ioapic_guest_read(unsigned long phys
    1.83      return 0;
    1.84  }
    1.85  
    1.86 +#define WARN_BOGUS_WRITE(f, a...)                                       \
    1.87 +    printk("%s: apic=%d,pin=%d,oirq=%d,nirq=%d\n"                       \
    1.88 +           "%s: oent=%08x:%08x,nent=%08x:%08x\n"                        \
    1.89 +           "%s: " f, __FUNCTION__, apic, pin, old_irq, new_irq,         \
    1.90 +           __FUNCTION__, *(u32 *)&old_rte, *((u32 *)&old_rte+1),        \
    1.91 +           *(u32 *)&new_rte, *((u32 *)&new_rte+1),                      \
    1.92 +           __FUNCTION__ , ##a )
    1.93 +
    1.94  int ioapic_guest_write(unsigned long physbase, unsigned int reg, u32 val)
    1.95  {
    1.96 -    int apic, pin, irq;
    1.97 -    struct IO_APIC_route_entry rte = { 0 };
    1.98 -    struct irq_pin_list *entry;
    1.99 +    int apic, pin, old_irq = -1, new_irq = -1;
   1.100 +    struct IO_APIC_route_entry old_rte = { 0 }, new_rte = { 0 };
   1.101      unsigned long flags;
   1.102  
   1.103      if ( (apic = ioapic_physbase_to_id(physbase)) < 0 )
   1.104 @@ -1870,8 +1914,9 @@ int ioapic_guest_write(unsigned long phy
   1.105      
   1.106      pin = (reg - 0x10) >> 1;
   1.107  
   1.108 -    *(u32 *)&rte = val;
   1.109 -    rte.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
   1.110 +    /* Write first half from guest; second half is target info. */
   1.111 +    *(u32 *)&new_rte = val;
   1.112 +    new_rte.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
   1.113  
   1.114      /*
   1.115       * What about weird destination types?
   1.116 @@ -1881,7 +1926,7 @@ int ioapic_guest_write(unsigned long phy
   1.117       *  ExtINT: Ignore? Linux only asserts this at start of day.
   1.118       * For now, print a message and return an error. We can fix up on demand.
   1.119       */
   1.120 -    if ( rte.delivery_mode > dest_LowestPrio )
   1.121 +    if ( new_rte.delivery_mode > dest_LowestPrio )
   1.122      {
   1.123          printk("ERROR: Attempt to write weird IOAPIC destination mode!\n");
   1.124          printk("       APIC=%d/%d, lo-reg=%x\n", apic, pin, val);
   1.125 @@ -1892,36 +1937,69 @@ int ioapic_guest_write(unsigned long phy
   1.126       * The guest does not know physical APIC arrangement (flat vs. cluster).
   1.127       * Apply genapic conventions for this platform.
   1.128       */
   1.129 -    rte.delivery_mode = INT_DELIVERY_MODE;
   1.130 -    rte.dest_mode     = INT_DEST_MODE;
   1.131 -
   1.132 -    if ( rte.vector >= FIRST_DEVICE_VECTOR )
   1.133 -    {
   1.134 -        /* Is there a valid irq mapped to this vector? */
   1.135 -        irq = vector_irq[rte.vector];
   1.136 -        if ( !IO_APIC_IRQ(irq) )
   1.137 -            return 0;
   1.138 +    new_rte.delivery_mode = INT_DELIVERY_MODE;
   1.139 +    new_rte.dest_mode     = INT_DEST_MODE;
   1.140  
   1.141 -        /* Set the correct irq-handling type. */
   1.142 -        irq_desc[IO_APIC_VECTOR(irq)].handler = rte.trigger ? 
   1.143 -            &ioapic_level_type: &ioapic_edge_type;
   1.144 +    spin_lock_irqsave(&ioapic_lock, flags);
   1.145  
   1.146 -        /* Record the pin<->irq mapping. */
   1.147 -        for ( entry = &irq_2_pin[irq]; ; entry = &irq_2_pin[entry->next] )
   1.148 -        {
   1.149 -            if ( (entry->apic == apic) && (entry->pin == pin) )
   1.150 -                break;
   1.151 -            if ( !entry->next )
   1.152 -            {
   1.153 -                add_pin_to_irq(irq, apic, pin);
   1.154 -                break;
   1.155 -            }
   1.156 -        }
   1.157 +    /* Read first (interesting) half of current routing entry. */
   1.158 +    *(u32 *)&old_rte = io_apic_read(apic, 0x10 + 2 * pin);
   1.159 +
   1.160 +    /* No change to the first half of the routing entry? Bail quietly. */
   1.161 +    if ( *(u32 *)&old_rte == *(u32 *)&new_rte )
   1.162 +    {
   1.163 +        spin_unlock_irqrestore(&ioapic_lock, flags);
   1.164 +        return 0;
   1.165      }
   1.166  
   1.167 -    spin_lock_irqsave(&ioapic_lock, flags);
   1.168 -    io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&rte) + 0));
   1.169 -    io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&rte) + 1));
   1.170 +    if ( old_rte.vector >= FIRST_DEVICE_VECTOR )
   1.171 +        old_irq = vector_irq[old_rte.vector];
   1.172 +    if ( new_rte.vector >= FIRST_DEVICE_VECTOR )
   1.173 +        new_irq = vector_irq[new_rte.vector];
   1.174 +
   1.175 +    if ( (old_irq != new_irq) && (old_irq != -1) && IO_APIC_IRQ(old_irq) )
   1.176 +    {
   1.177 +        if ( irq_desc[IO_APIC_VECTOR(old_irq)].action )
   1.178 +        {
   1.179 +            WARN_BOGUS_WRITE("Attempt to remove IO-APIC pin of in-use IRQ!\n");
   1.180 +            spin_unlock_irqrestore(&ioapic_lock, flags);
   1.181 +            return 0;
   1.182 +        }
   1.183 +
   1.184 +        remove_pin_at_irq(old_irq, apic, pin);
   1.185 +    }
   1.186 +
   1.187 +    if ( (new_irq != -1) && IO_APIC_IRQ(new_irq) )
   1.188 +    {
   1.189 +        if ( irq_desc[IO_APIC_VECTOR(new_irq)].action )
   1.190 +        {
   1.191 +            WARN_BOGUS_WRITE("Attempt to %s IO-APIC pin for in-use IRQ!\n",
   1.192 +                             (old_irq != new_irq) ? "add" : "modify");
   1.193 +            spin_unlock_irqrestore(&ioapic_lock, flags);
   1.194 +            return 0;
   1.195 +        }
   1.196 +        
   1.197 +        /* Set the correct irq-handling type. */
   1.198 +        irq_desc[IO_APIC_VECTOR(new_irq)].handler = new_rte.trigger ? 
   1.199 +            &ioapic_level_type: &ioapic_edge_type;
   1.200 +        
   1.201 +        if ( old_irq != new_irq )
   1.202 +            add_pin_to_irq(new_irq, apic, pin);
   1.203 +
   1.204 +        /* Mask iff level triggered. */
   1.205 +        new_rte.mask = new_rte.trigger;
   1.206 +    }
   1.207 +    else if ( !new_rte.mask )
   1.208 +    {
   1.209 +        /* This pin leads nowhere but the guest has not masked it. */
   1.210 +        WARN_BOGUS_WRITE("Installing bogus unmasked IO-APIC entry!\n");
   1.211 +        new_rte.mask = 1;
   1.212 +    }
   1.213 +
   1.214 +
   1.215 +    io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&new_rte) + 0));
   1.216 +    io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&new_rte) + 1));
   1.217 +
   1.218      spin_unlock_irqrestore(&ioapic_lock, flags);
   1.219  
   1.220      return 0;