ia64/xen-unstable

view linux-2.6-xen-sparse/arch/ia64/kernel/iosapic.c @ 9747:de2dc4e7966a

[IA64] Add support to physdev_ops

Add support to physdev ops, and thus give IOSAPIC RTEs
managed by Xen now. Dom0 now issues hypercall to r/w
RTE entry. Another change is the irq vector allocation
which is also owned by xen now.

After this change, the IOSAPIC is almost owned by xen
with only exception as IOSAPIC EOI which is still issued
by dom0 directly. But that's OK since currently dom0
owns all external physical devices. Later full event
channel mechanism will provide necessary support for
driver domain, and at that time, dom0 instead issues
physdev_op (PHYSDEVOP_IRQ_UNMASK_NOTIFY) naturally as
replace of IOSAPIC EOI.

Signed-off-by Kevin Tian <kevin.tian@intel.com>
author awilliam@xenbuild.aw
date Fri Apr 21 09:03:19 2006 -0600 (2006-04-21)
parents 19148831ab05
children 42a8e3101c6c
line source
1 /*
2 * I/O SAPIC support.
3 *
4 * Copyright (C) 1999 Intel Corp.
5 * Copyright (C) 1999 Asit Mallick <asit.k.mallick@intel.com>
6 * Copyright (C) 2000-2002 J.I. Lee <jung-ik.lee@intel.com>
7 * Copyright (C) 1999-2000, 2002-2003 Hewlett-Packard Co.
8 * David Mosberger-Tang <davidm@hpl.hp.com>
9 * Copyright (C) 1999 VA Linux Systems
10 * Copyright (C) 1999,2000 Walt Drummond <drummond@valinux.com>
11 *
12 * 00/04/19 D. Mosberger Rewritten to mirror more closely the x86 I/O APIC code.
13 * In particular, we now have separate handlers for edge
14 * and level triggered interrupts.
15 * 00/10/27 Asit Mallick, Goutham Rao <goutham.rao@intel.com> IRQ vector allocation
16 * PCI to vector mapping, shared PCI interrupts.
17 * 00/10/27 D. Mosberger Document things a bit more to make them more understandable.
18 * Clean up much of the old IOSAPIC cruft.
19 * 01/07/27 J.I. Lee PCI irq routing, Platform/Legacy interrupts and fixes for
20 * ACPI S5(SoftOff) support.
21 * 02/01/23 J.I. Lee iosapic pgm fixes for PCI irq routing from _PRT
22 * 02/01/07 E. Focht <efocht@ess.nec.de> Redirectable interrupt vectors in
23 * iosapic_set_affinity(), initializations for
24 * /proc/irq/#/smp_affinity
25 * 02/04/02 P. Diefenbaugh Cleaned up ACPI PCI IRQ routing.
26 * 02/04/18 J.I. Lee bug fix in iosapic_init_pci_irq
27 * 02/04/30 J.I. Lee bug fix in find_iosapic to fix ACPI PCI IRQ to IOSAPIC mapping
28 * error
29 * 02/07/29 T. Kochi Allocate interrupt vectors dynamically
30 * 02/08/04 T. Kochi Cleaned up terminology (irq, global system interrupt, vector, etc.)
31 * 02/09/20 D. Mosberger Simplified by taking advantage of ACPI's pci_irq code.
32 * 03/02/19 B. Helgaas Make pcat_compat system-wide, not per-IOSAPIC.
33 * Remove iosapic_address & gsi_base from external interfaces.
34 * Rationalize __init/__devinit attributes.
35 * 04/12/04 Ashok Raj <ashok.raj@intel.com> Intel Corporation 2004
36 * Updated to work with irq migration necessary for CPU Hotplug
37 */
38 /*
39 * Here is what the interrupt logic between a PCI device and the kernel looks like:
40 *
41 * (1) A PCI device raises one of the four interrupt pins (INTA, INTB, INTC, INTD). The
42 * device is uniquely identified by its bus--, and slot-number (the function
43 * number does not matter here because all functions share the same interrupt
44 * lines).
45 *
46 * (2) The motherboard routes the interrupt line to a pin on a IOSAPIC controller.
47 * Multiple interrupt lines may have to share the same IOSAPIC pin (if they're level
48 * triggered and use the same polarity). Each interrupt line has a unique Global
49 * System Interrupt (GSI) number which can be calculated as the sum of the controller's
50 * base GSI number and the IOSAPIC pin number to which the line connects.
51 *
52 * (3) The IOSAPIC uses an internal routing table entries (RTEs) to map the IOSAPIC pin
53 * into the IA-64 interrupt vector. This interrupt vector is then sent to the CPU.
54 *
55 * (4) The kernel recognizes an interrupt as an IRQ. The IRQ interface is used as
56 * architecture-independent interrupt handling mechanism in Linux. As an
57 * IRQ is a number, we have to have IA-64 interrupt vector number <-> IRQ number
58 * mapping. On smaller systems, we use one-to-one mapping between IA-64 vector and
59 * IRQ. A platform can implement platform_irq_to_vector(irq) and
60 * platform_local_vector_to_irq(vector) APIs to differentiate the mapping.
61 * Please see also include/asm-ia64/hw_irq.h for those APIs.
62 *
63 * To sum up, there are three levels of mappings involved:
64 *
65 * PCI pin -> global system interrupt (GSI) -> IA-64 vector <-> IRQ
66 *
67 * Note: The term "IRQ" is loosely used everywhere in Linux kernel to describe interrupts.
68 * Now we use "IRQ" only for Linux IRQ's. ISA IRQ (isa_irq) is the only exception in this
69 * source code.
70 */
71 #include <linux/config.h>
73 #include <linux/acpi.h>
74 #include <linux/init.h>
75 #include <linux/irq.h>
76 #include <linux/kernel.h>
77 #include <linux/list.h>
78 #include <linux/pci.h>
79 #include <linux/smp.h>
80 #include <linux/smp_lock.h>
81 #include <linux/string.h>
82 #include <linux/bootmem.h>
84 #include <asm/delay.h>
85 #include <asm/hw_irq.h>
86 #include <asm/io.h>
87 #include <asm/iosapic.h>
88 #include <asm/machvec.h>
89 #include <asm/processor.h>
90 #include <asm/ptrace.h>
91 #include <asm/system.h>
94 #undef DEBUG_INTERRUPT_ROUTING
96 #ifdef DEBUG_INTERRUPT_ROUTING
97 #define DBG(fmt...) printk(fmt)
98 #else
99 #define DBG(fmt...)
100 #endif
102 #define NR_PREALLOCATE_RTE_ENTRIES (PAGE_SIZE / sizeof(struct iosapic_rte_info))
103 #define RTE_PREALLOCATED (1)
105 static DEFINE_SPINLOCK(iosapic_lock);
107 /* These tables map IA-64 vectors to the IOSAPIC pin that generates this vector. */
109 struct iosapic_rte_info {
110 struct list_head rte_list; /* node in list of RTEs sharing the same vector */
111 char __iomem *addr; /* base address of IOSAPIC */
112 unsigned int gsi_base; /* first GSI assigned to this IOSAPIC */
113 char rte_index; /* IOSAPIC RTE index */
114 int refcnt; /* reference counter */
115 unsigned int flags; /* flags */
116 } ____cacheline_aligned;
118 static struct iosapic_intr_info {
119 struct list_head rtes; /* RTEs using this vector (empty => not an IOSAPIC interrupt) */
120 int count; /* # of RTEs that shares this vector */
121 u32 low32; /* current value of low word of Redirection table entry */
122 unsigned int dest; /* destination CPU physical ID */
123 unsigned char dmode : 3; /* delivery mode (see iosapic.h) */
124 unsigned char polarity: 1; /* interrupt polarity (see iosapic.h) */
125 unsigned char trigger : 1; /* trigger mode (see iosapic.h) */
126 } iosapic_intr_info[IA64_NUM_VECTORS];
128 static struct iosapic {
129 char __iomem *addr; /* base address of IOSAPIC */
130 unsigned int gsi_base; /* first GSI assigned to this IOSAPIC */
131 unsigned short num_rte; /* number of RTE in this IOSAPIC */
132 int rtes_inuse; /* # of RTEs in use on this IOSAPIC */
133 #ifdef CONFIG_NUMA
134 unsigned short node; /* numa node association via pxm */
135 #endif
136 } iosapic_lists[NR_IOSAPICS];
138 static unsigned char pcat_compat __devinitdata; /* 8259 compatibility flag */
140 static int iosapic_kmalloc_ok;
141 static LIST_HEAD(free_rte_list);
143 #ifdef CONFIG_XEN
144 #include <xen/interface/xen.h>
145 #include <xen/interface/physdev.h>
146 #include <asm/hypervisor.h>
147 static inline unsigned int xen_iosapic_read(char __iomem *iosapic, unsigned int reg)
148 {
149 physdev_op_t op;
150 int ret;
152 op.cmd = PHYSDEVOP_APIC_READ;
153 op.u.apic_op.apic_physbase = (unsigned long)iosapic -
154 __IA64_UNCACHED_OFFSET;
155 op.u.apic_op.reg = reg;
156 ret = HYPERVISOR_physdev_op(&op);
157 if (ret)
158 return ret;
159 return op.u.apic_op.value;
160 }
162 static inline void xen_iosapic_write(char __iomem *iosapic, unsigned int reg, u32 val)
163 {
164 physdev_op_t op;
166 op.cmd = PHYSDEVOP_APIC_WRITE;
167 op.u.apic_op.apic_physbase = (unsigned long)iosapic -
168 __IA64_UNCACHED_OFFSET;
169 op.u.apic_op.reg = reg;
170 op.u.apic_op.value = val;
171 HYPERVISOR_physdev_op(&op);
172 }
174 static inline unsigned int iosapic_read(char __iomem *iosapic, unsigned int reg)
175 {
176 if (!running_on_xen) {
177 writel(reg, iosapic + IOSAPIC_REG_SELECT);
178 return readl(iosapic + IOSAPIC_WINDOW);
179 } else
180 return xen_iosapic_read(iosapic, reg);
181 }
183 static inline void iosapic_write(char __iomem *iosapic, unsigned int reg, u32 val)
184 {
185 if (!running_on_xen) {
186 writel(reg, iosapic + IOSAPIC_REG_SELECT);
187 writel(val, iosapic + IOSAPIC_WINDOW);
188 } else
189 xen_iosapic_write(iosapic, reg, val);
190 }
192 int xen_assign_irq_vector(int irq)
193 {
194 physdev_op_t op;
196 op.cmd = PHYSDEVOP_ASSIGN_VECTOR;
197 op.u.irq_op.irq = irq;
198 if (HYPERVISOR_physdev_op(&op))
199 return -ENOSPC;
201 return op.u.irq_op.vector;
202 }
203 #endif /* XEN */
205 /*
206 * Find an IOSAPIC associated with a GSI
207 */
208 static inline int
209 find_iosapic (unsigned int gsi)
210 {
211 int i;
213 for (i = 0; i < NR_IOSAPICS; i++) {
214 if ((unsigned) (gsi - iosapic_lists[i].gsi_base) < iosapic_lists[i].num_rte)
215 return i;
216 }
218 return -1;
219 }
221 static inline int
222 _gsi_to_vector (unsigned int gsi)
223 {
224 struct iosapic_intr_info *info;
225 struct iosapic_rte_info *rte;
227 for (info = iosapic_intr_info; info < iosapic_intr_info + IA64_NUM_VECTORS; ++info)
228 list_for_each_entry(rte, &info->rtes, rte_list)
229 if (rte->gsi_base + rte->rte_index == gsi)
230 return info - iosapic_intr_info;
231 return -1;
232 }
234 /*
235 * Translate GSI number to the corresponding IA-64 interrupt vector. If no
236 * entry exists, return -1.
237 */
238 inline int
239 gsi_to_vector (unsigned int gsi)
240 {
241 return _gsi_to_vector(gsi);
242 }
244 int
245 gsi_to_irq (unsigned int gsi)
246 {
247 unsigned long flags;
248 int irq;
249 /*
250 * XXX fix me: this assumes an identity mapping vetween IA-64 vector and Linux irq
251 * numbers...
252 */
253 spin_lock_irqsave(&iosapic_lock, flags);
254 {
255 irq = _gsi_to_vector(gsi);
256 }
257 spin_unlock_irqrestore(&iosapic_lock, flags);
259 return irq;
260 }
262 static struct iosapic_rte_info *gsi_vector_to_rte(unsigned int gsi, unsigned int vec)
263 {
264 struct iosapic_rte_info *rte;
266 list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list)
267 if (rte->gsi_base + rte->rte_index == gsi)
268 return rte;
269 return NULL;
270 }
272 static void
273 set_rte (unsigned int gsi, unsigned int vector, unsigned int dest, int mask)
274 {
275 unsigned long pol, trigger, dmode;
276 u32 low32, high32;
277 char __iomem *addr;
278 int rte_index;
279 char redir;
280 struct iosapic_rte_info *rte;
282 DBG(KERN_DEBUG"IOSAPIC: routing vector %d to 0x%x\n", vector, dest);
284 rte = gsi_vector_to_rte(gsi, vector);
285 if (!rte)
286 return; /* not an IOSAPIC interrupt */
288 rte_index = rte->rte_index;
289 addr = rte->addr;
290 pol = iosapic_intr_info[vector].polarity;
291 trigger = iosapic_intr_info[vector].trigger;
292 dmode = iosapic_intr_info[vector].dmode;
294 redir = (dmode == IOSAPIC_LOWEST_PRIORITY) ? 1 : 0;
296 #ifdef CONFIG_SMP
297 {
298 unsigned int irq;
300 for (irq = 0; irq < NR_IRQS; ++irq)
301 if (irq_to_vector(irq) == vector) {
302 set_irq_affinity_info(irq, (int)(dest & 0xffff), redir);
303 break;
304 }
305 }
306 #endif
308 low32 = ((pol << IOSAPIC_POLARITY_SHIFT) |
309 (trigger << IOSAPIC_TRIGGER_SHIFT) |
310 (dmode << IOSAPIC_DELIVERY_SHIFT) |
311 ((mask ? 1 : 0) << IOSAPIC_MASK_SHIFT) |
312 vector);
314 /* dest contains both id and eid */
315 high32 = (dest << IOSAPIC_DEST_SHIFT);
317 iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index), high32);
318 iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
319 iosapic_intr_info[vector].low32 = low32;
320 iosapic_intr_info[vector].dest = dest;
321 }
323 static void
324 nop (unsigned int vector)
325 {
326 /* do nothing... */
327 }
329 static void
330 mask_irq (unsigned int irq)
331 {
332 unsigned long flags;
333 char __iomem *addr;
334 u32 low32;
335 int rte_index;
336 ia64_vector vec = irq_to_vector(irq);
337 struct iosapic_rte_info *rte;
339 if (list_empty(&iosapic_intr_info[vec].rtes))
340 return; /* not an IOSAPIC interrupt! */
342 spin_lock_irqsave(&iosapic_lock, flags);
343 {
344 /* set only the mask bit */
345 low32 = iosapic_intr_info[vec].low32 |= IOSAPIC_MASK;
346 list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list) {
347 addr = rte->addr;
348 rte_index = rte->rte_index;
349 iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
350 }
351 }
352 spin_unlock_irqrestore(&iosapic_lock, flags);
353 }
355 static void
356 unmask_irq (unsigned int irq)
357 {
358 unsigned long flags;
359 char __iomem *addr;
360 u32 low32;
361 int rte_index;
362 ia64_vector vec = irq_to_vector(irq);
363 struct iosapic_rte_info *rte;
365 if (list_empty(&iosapic_intr_info[vec].rtes))
366 return; /* not an IOSAPIC interrupt! */
368 spin_lock_irqsave(&iosapic_lock, flags);
369 {
370 low32 = iosapic_intr_info[vec].low32 &= ~IOSAPIC_MASK;
371 list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list) {
372 addr = rte->addr;
373 rte_index = rte->rte_index;
374 iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
375 }
376 }
377 spin_unlock_irqrestore(&iosapic_lock, flags);
378 }
381 static void
382 iosapic_set_affinity (unsigned int irq, cpumask_t mask)
383 {
384 #ifdef CONFIG_SMP
385 unsigned long flags;
386 u32 high32, low32;
387 int dest, rte_index;
388 char __iomem *addr;
389 int redir = (irq & IA64_IRQ_REDIRECTED) ? 1 : 0;
390 ia64_vector vec;
391 struct iosapic_rte_info *rte;
393 irq &= (~IA64_IRQ_REDIRECTED);
394 vec = irq_to_vector(irq);
396 if (cpus_empty(mask))
397 return;
399 dest = cpu_physical_id(first_cpu(mask));
401 if (list_empty(&iosapic_intr_info[vec].rtes))
402 return; /* not an IOSAPIC interrupt */
404 set_irq_affinity_info(irq, dest, redir);
406 /* dest contains both id and eid */
407 high32 = dest << IOSAPIC_DEST_SHIFT;
409 spin_lock_irqsave(&iosapic_lock, flags);
410 {
411 low32 = iosapic_intr_info[vec].low32 & ~(7 << IOSAPIC_DELIVERY_SHIFT);
413 if (redir)
414 /* change delivery mode to lowest priority */
415 low32 |= (IOSAPIC_LOWEST_PRIORITY << IOSAPIC_DELIVERY_SHIFT);
416 else
417 /* change delivery mode to fixed */
418 low32 |= (IOSAPIC_FIXED << IOSAPIC_DELIVERY_SHIFT);
420 iosapic_intr_info[vec].low32 = low32;
421 iosapic_intr_info[vec].dest = dest;
422 list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list) {
423 addr = rte->addr;
424 rte_index = rte->rte_index;
425 iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index), high32);
426 iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
427 }
428 }
429 spin_unlock_irqrestore(&iosapic_lock, flags);
430 #endif
431 }
433 /*
434 * Handlers for level-triggered interrupts.
435 */
437 static unsigned int
438 iosapic_startup_level_irq (unsigned int irq)
439 {
440 unmask_irq(irq);
441 return 0;
442 }
444 static void
445 iosapic_end_level_irq (unsigned int irq)
446 {
447 ia64_vector vec = irq_to_vector(irq);
448 struct iosapic_rte_info *rte;
450 move_irq(irq);
451 list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list)
452 iosapic_eoi(rte->addr, vec);
453 }
455 #define iosapic_shutdown_level_irq mask_irq
456 #define iosapic_enable_level_irq unmask_irq
457 #define iosapic_disable_level_irq mask_irq
458 #define iosapic_ack_level_irq nop
460 struct hw_interrupt_type irq_type_iosapic_level = {
461 .typename = "IO-SAPIC-level",
462 .startup = iosapic_startup_level_irq,
463 .shutdown = iosapic_shutdown_level_irq,
464 .enable = iosapic_enable_level_irq,
465 .disable = iosapic_disable_level_irq,
466 .ack = iosapic_ack_level_irq,
467 .end = iosapic_end_level_irq,
468 .set_affinity = iosapic_set_affinity
469 };
471 /*
472 * Handlers for edge-triggered interrupts.
473 */
475 static unsigned int
476 iosapic_startup_edge_irq (unsigned int irq)
477 {
478 unmask_irq(irq);
479 /*
480 * IOSAPIC simply drops interrupts pended while the
481 * corresponding pin was masked, so we can't know if an
482 * interrupt is pending already. Let's hope not...
483 */
484 return 0;
485 }
487 static void
488 iosapic_ack_edge_irq (unsigned int irq)
489 {
490 irq_desc_t *idesc = irq_descp(irq);
492 move_irq(irq);
493 /*
494 * Once we have recorded IRQ_PENDING already, we can mask the
495 * interrupt for real. This prevents IRQ storms from unhandled
496 * devices.
497 */
498 if ((idesc->status & (IRQ_PENDING|IRQ_DISABLED)) == (IRQ_PENDING|IRQ_DISABLED))
499 mask_irq(irq);
500 }
502 #define iosapic_enable_edge_irq unmask_irq
503 #define iosapic_disable_edge_irq nop
504 #define iosapic_end_edge_irq nop
506 struct hw_interrupt_type irq_type_iosapic_edge = {
507 .typename = "IO-SAPIC-edge",
508 .startup = iosapic_startup_edge_irq,
509 .shutdown = iosapic_disable_edge_irq,
510 .enable = iosapic_enable_edge_irq,
511 .disable = iosapic_disable_edge_irq,
512 .ack = iosapic_ack_edge_irq,
513 .end = iosapic_end_edge_irq,
514 .set_affinity = iosapic_set_affinity
515 };
517 unsigned int
518 iosapic_version (char __iomem *addr)
519 {
520 /*
521 * IOSAPIC Version Register return 32 bit structure like:
522 * {
523 * unsigned int version : 8;
524 * unsigned int reserved1 : 8;
525 * unsigned int max_redir : 8;
526 * unsigned int reserved2 : 8;
527 * }
528 */
529 return iosapic_read(addr, IOSAPIC_VERSION);
530 }
532 static int iosapic_find_sharable_vector (unsigned long trigger, unsigned long pol)
533 {
534 int i, vector = -1, min_count = -1;
535 struct iosapic_intr_info *info;
537 /*
538 * shared vectors for edge-triggered interrupts are not
539 * supported yet
540 */
541 if (trigger == IOSAPIC_EDGE)
542 return -1;
544 for (i = IA64_FIRST_DEVICE_VECTOR; i <= IA64_LAST_DEVICE_VECTOR; i++) {
545 info = &iosapic_intr_info[i];
546 if (info->trigger == trigger && info->polarity == pol &&
547 (info->dmode == IOSAPIC_FIXED || info->dmode == IOSAPIC_LOWEST_PRIORITY)) {
548 if (min_count == -1 || info->count < min_count) {
549 vector = i;
550 min_count = info->count;
551 }
552 }
553 }
555 return vector;
556 }
558 /*
559 * if the given vector is already owned by other,
560 * assign a new vector for the other and make the vector available
561 */
562 static void __init
563 iosapic_reassign_vector (int vector)
564 {
565 int new_vector;
567 if (!list_empty(&iosapic_intr_info[vector].rtes)) {
568 new_vector = assign_irq_vector(AUTO_ASSIGN);
569 if (new_vector < 0)
570 panic("%s: out of interrupt vectors!\n", __FUNCTION__);
571 printk(KERN_INFO "Reassigning vector %d to %d\n", vector, new_vector);
572 memcpy(&iosapic_intr_info[new_vector], &iosapic_intr_info[vector],
573 sizeof(struct iosapic_intr_info));
574 INIT_LIST_HEAD(&iosapic_intr_info[new_vector].rtes);
575 list_move(iosapic_intr_info[vector].rtes.next, &iosapic_intr_info[new_vector].rtes);
576 memset(&iosapic_intr_info[vector], 0, sizeof(struct iosapic_intr_info));
577 iosapic_intr_info[vector].low32 = IOSAPIC_MASK;
578 INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes);
579 }
580 }
582 static struct iosapic_rte_info *iosapic_alloc_rte (void)
583 {
584 int i;
585 struct iosapic_rte_info *rte;
586 int preallocated = 0;
588 if (!iosapic_kmalloc_ok && list_empty(&free_rte_list)) {
589 rte = alloc_bootmem(sizeof(struct iosapic_rte_info) * NR_PREALLOCATE_RTE_ENTRIES);
590 if (!rte)
591 return NULL;
592 for (i = 0; i < NR_PREALLOCATE_RTE_ENTRIES; i++, rte++)
593 list_add(&rte->rte_list, &free_rte_list);
594 }
596 if (!list_empty(&free_rte_list)) {
597 rte = list_entry(free_rte_list.next, struct iosapic_rte_info, rte_list);
598 list_del(&rte->rte_list);
599 preallocated++;
600 } else {
601 rte = kmalloc(sizeof(struct iosapic_rte_info), GFP_ATOMIC);
602 if (!rte)
603 return NULL;
604 }
606 memset(rte, 0, sizeof(struct iosapic_rte_info));
607 if (preallocated)
608 rte->flags |= RTE_PREALLOCATED;
610 return rte;
611 }
613 static void iosapic_free_rte (struct iosapic_rte_info *rte)
614 {
615 if (rte->flags & RTE_PREALLOCATED)
616 list_add_tail(&rte->rte_list, &free_rte_list);
617 else
618 kfree(rte);
619 }
621 static inline int vector_is_shared (int vector)
622 {
623 return (iosapic_intr_info[vector].count > 1);
624 }
626 static int
627 register_intr (unsigned int gsi, int vector, unsigned char delivery,
628 unsigned long polarity, unsigned long trigger)
629 {
630 irq_desc_t *idesc;
631 struct hw_interrupt_type *irq_type;
632 int rte_index;
633 int index;
634 unsigned long gsi_base;
635 void __iomem *iosapic_address;
636 struct iosapic_rte_info *rte;
638 index = find_iosapic(gsi);
639 if (index < 0) {
640 printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n", __FUNCTION__, gsi);
641 return -ENODEV;
642 }
644 iosapic_address = iosapic_lists[index].addr;
645 gsi_base = iosapic_lists[index].gsi_base;
647 rte = gsi_vector_to_rte(gsi, vector);
648 if (!rte) {
649 rte = iosapic_alloc_rte();
650 if (!rte) {
651 printk(KERN_WARNING "%s: cannot allocate memory\n", __FUNCTION__);
652 return -ENOMEM;
653 }
655 rte_index = gsi - gsi_base;
656 rte->rte_index = rte_index;
657 rte->addr = iosapic_address;
658 rte->gsi_base = gsi_base;
659 rte->refcnt++;
660 list_add_tail(&rte->rte_list, &iosapic_intr_info[vector].rtes);
661 iosapic_intr_info[vector].count++;
662 iosapic_lists[index].rtes_inuse++;
663 }
664 else if (vector_is_shared(vector)) {
665 struct iosapic_intr_info *info = &iosapic_intr_info[vector];
666 if (info->trigger != trigger || info->polarity != polarity) {
667 printk (KERN_WARNING "%s: cannot override the interrupt\n", __FUNCTION__);
668 return -EINVAL;
669 }
670 }
672 iosapic_intr_info[vector].polarity = polarity;
673 iosapic_intr_info[vector].dmode = delivery;
674 iosapic_intr_info[vector].trigger = trigger;
676 if (trigger == IOSAPIC_EDGE)
677 irq_type = &irq_type_iosapic_edge;
678 else
679 irq_type = &irq_type_iosapic_level;
681 idesc = irq_descp(vector);
682 if (idesc->handler != irq_type) {
683 if (idesc->handler != &no_irq_type)
684 printk(KERN_WARNING "%s: changing vector %d from %s to %s\n",
685 __FUNCTION__, vector, idesc->handler->typename, irq_type->typename);
686 idesc->handler = irq_type;
687 }
688 return 0;
689 }
691 static unsigned int
692 get_target_cpu (unsigned int gsi, int vector)
693 {
694 #ifdef CONFIG_SMP
695 static int cpu = -1;
697 /*
698 * In case of vector shared by multiple RTEs, all RTEs that
699 * share the vector need to use the same destination CPU.
700 */
701 if (!list_empty(&iosapic_intr_info[vector].rtes))
702 return iosapic_intr_info[vector].dest;
704 /*
705 * If the platform supports redirection via XTP, let it
706 * distribute interrupts.
707 */
708 if (smp_int_redirect & SMP_IRQ_REDIRECTION)
709 return cpu_physical_id(smp_processor_id());
711 /*
712 * Some interrupts (ACPI SCI, for instance) are registered
713 * before the BSP is marked as online.
714 */
715 if (!cpu_online(smp_processor_id()))
716 return cpu_physical_id(smp_processor_id());
718 #ifdef CONFIG_NUMA
719 {
720 int num_cpus, cpu_index, iosapic_index, numa_cpu, i = 0;
721 cpumask_t cpu_mask;
723 iosapic_index = find_iosapic(gsi);
724 if (iosapic_index < 0 ||
725 iosapic_lists[iosapic_index].node == MAX_NUMNODES)
726 goto skip_numa_setup;
728 cpu_mask = node_to_cpumask(iosapic_lists[iosapic_index].node);
730 for_each_cpu_mask(numa_cpu, cpu_mask) {
731 if (!cpu_online(numa_cpu))
732 cpu_clear(numa_cpu, cpu_mask);
733 }
735 num_cpus = cpus_weight(cpu_mask);
737 if (!num_cpus)
738 goto skip_numa_setup;
740 /* Use vector assigment to distribute across cpus in node */
741 cpu_index = vector % num_cpus;
743 for (numa_cpu = first_cpu(cpu_mask) ; i < cpu_index ; i++)
744 numa_cpu = next_cpu(numa_cpu, cpu_mask);
746 if (numa_cpu != NR_CPUS)
747 return cpu_physical_id(numa_cpu);
748 }
749 skip_numa_setup:
750 #endif
751 /*
752 * Otherwise, round-robin interrupt vectors across all the
753 * processors. (It'd be nice if we could be smarter in the
754 * case of NUMA.)
755 */
756 do {
757 if (++cpu >= NR_CPUS)
758 cpu = 0;
759 } while (!cpu_online(cpu));
761 return cpu_physical_id(cpu);
762 #else
763 return cpu_physical_id(smp_processor_id());
764 #endif
765 }
767 /*
768 * ACPI can describe IOSAPIC interrupts via static tables and namespace
769 * methods. This provides an interface to register those interrupts and
770 * program the IOSAPIC RTE.
771 */
772 int
773 iosapic_register_intr (unsigned int gsi,
774 unsigned long polarity, unsigned long trigger)
775 {
776 int vector, mask = 1, err;
777 unsigned int dest;
778 unsigned long flags;
779 struct iosapic_rte_info *rte;
780 u32 low32;
781 again:
782 /*
783 * If this GSI has already been registered (i.e., it's a
784 * shared interrupt, or we lost a race to register it),
785 * don't touch the RTE.
786 */
787 spin_lock_irqsave(&iosapic_lock, flags);
788 {
789 vector = gsi_to_vector(gsi);
790 if (vector > 0) {
791 rte = gsi_vector_to_rte(gsi, vector);
792 rte->refcnt++;
793 spin_unlock_irqrestore(&iosapic_lock, flags);
794 return vector;
795 }
796 }
797 spin_unlock_irqrestore(&iosapic_lock, flags);
799 /* If vector is running out, we try to find a sharable vector */
800 vector = assign_irq_vector(AUTO_ASSIGN);
801 if (vector < 0) {
802 vector = iosapic_find_sharable_vector(trigger, polarity);
803 if (vector < 0)
804 return -ENOSPC;
805 }
807 spin_lock_irqsave(&irq_descp(vector)->lock, flags);
808 spin_lock(&iosapic_lock);
809 {
810 if (gsi_to_vector(gsi) > 0) {
811 if (list_empty(&iosapic_intr_info[vector].rtes))
812 free_irq_vector(vector);
813 spin_unlock(&iosapic_lock);
814 spin_unlock_irqrestore(&irq_descp(vector)->lock, flags);
815 goto again;
816 }
818 dest = get_target_cpu(gsi, vector);
819 err = register_intr(gsi, vector, IOSAPIC_LOWEST_PRIORITY,
820 polarity, trigger);
821 if (err < 0) {
822 spin_unlock(&iosapic_lock);
823 spin_unlock_irqrestore(&irq_descp(vector)->lock, flags);
824 return err;
825 }
827 /*
828 * If the vector is shared and already unmasked for
829 * other interrupt sources, don't mask it.
830 */
831 low32 = iosapic_intr_info[vector].low32;
832 if (vector_is_shared(vector) && !(low32 & IOSAPIC_MASK))
833 mask = 0;
834 set_rte(gsi, vector, dest, mask);
835 }
836 spin_unlock(&iosapic_lock);
837 spin_unlock_irqrestore(&irq_descp(vector)->lock, flags);
839 printk(KERN_INFO "GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d\n",
840 gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
841 (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
842 cpu_logical_id(dest), dest, vector);
844 return vector;
845 }
847 void
848 iosapic_unregister_intr (unsigned int gsi)
849 {
850 unsigned long flags;
851 int irq, vector, index;
852 irq_desc_t *idesc;
853 u32 low32;
854 unsigned long trigger, polarity;
855 unsigned int dest;
856 struct iosapic_rte_info *rte;
858 /*
859 * If the irq associated with the gsi is not found,
860 * iosapic_unregister_intr() is unbalanced. We need to check
861 * this again after getting locks.
862 */
863 irq = gsi_to_irq(gsi);
864 if (irq < 0) {
865 printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n", gsi);
866 WARN_ON(1);
867 return;
868 }
869 vector = irq_to_vector(irq);
871 idesc = irq_descp(irq);
872 spin_lock_irqsave(&idesc->lock, flags);
873 spin_lock(&iosapic_lock);
874 {
875 if ((rte = gsi_vector_to_rte(gsi, vector)) == NULL) {
876 printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n", gsi);
877 WARN_ON(1);
878 goto out;
879 }
881 if (--rte->refcnt > 0)
882 goto out;
884 /* Mask the interrupt */
885 low32 = iosapic_intr_info[vector].low32 | IOSAPIC_MASK;
886 iosapic_write(rte->addr, IOSAPIC_RTE_LOW(rte->rte_index), low32);
888 /* Remove the rte entry from the list */
889 list_del(&rte->rte_list);
890 iosapic_intr_info[vector].count--;
891 iosapic_free_rte(rte);
892 index = find_iosapic(gsi);
893 iosapic_lists[index].rtes_inuse--;
894 WARN_ON(iosapic_lists[index].rtes_inuse < 0);
896 trigger = iosapic_intr_info[vector].trigger;
897 polarity = iosapic_intr_info[vector].polarity;
898 dest = iosapic_intr_info[vector].dest;
899 printk(KERN_INFO "GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d unregistered\n",
900 gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
901 (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
902 cpu_logical_id(dest), dest, vector);
904 if (list_empty(&iosapic_intr_info[vector].rtes)) {
905 /* Sanity check */
906 BUG_ON(iosapic_intr_info[vector].count);
908 /* Clear the interrupt controller descriptor */
909 idesc->handler = &no_irq_type;
911 /* Clear the interrupt information */
912 memset(&iosapic_intr_info[vector], 0, sizeof(struct iosapic_intr_info));
913 iosapic_intr_info[vector].low32 |= IOSAPIC_MASK;
914 INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes);
916 if (idesc->action) {
917 printk(KERN_ERR "interrupt handlers still exist on IRQ %u\n", irq);
918 WARN_ON(1);
919 }
921 /* Free the interrupt vector */
922 free_irq_vector(vector);
923 }
924 }
925 out:
926 spin_unlock(&iosapic_lock);
927 spin_unlock_irqrestore(&idesc->lock, flags);
928 }
930 /*
931 * ACPI calls this when it finds an entry for a platform interrupt.
932 * Note that the irq_base and IOSAPIC address must be set in iosapic_init().
933 */
934 int __init
935 iosapic_register_platform_intr (u32 int_type, unsigned int gsi,
936 int iosapic_vector, u16 eid, u16 id,
937 unsigned long polarity, unsigned long trigger)
938 {
939 static const char * const name[] = {"unknown", "PMI", "INIT", "CPEI"};
940 unsigned char delivery;
941 int vector, mask = 0;
942 unsigned int dest = ((id << 8) | eid) & 0xffff;
944 switch (int_type) {
945 case ACPI_INTERRUPT_PMI:
946 vector = iosapic_vector;
947 /*
948 * since PMI vector is alloc'd by FW(ACPI) not by kernel,
949 * we need to make sure the vector is available
950 */
951 iosapic_reassign_vector(vector);
952 delivery = IOSAPIC_PMI;
953 break;
954 case ACPI_INTERRUPT_INIT:
955 vector = assign_irq_vector(AUTO_ASSIGN);
956 if (vector < 0)
957 panic("%s: out of interrupt vectors!\n", __FUNCTION__);
958 delivery = IOSAPIC_INIT;
959 break;
960 case ACPI_INTERRUPT_CPEI:
961 vector = IA64_CPE_VECTOR;
962 delivery = IOSAPIC_LOWEST_PRIORITY;
963 mask = 1;
964 break;
965 default:
966 printk(KERN_ERR "iosapic_register_platform_irq(): invalid int type 0x%x\n", int_type);
967 return -1;
968 }
970 register_intr(gsi, vector, delivery, polarity, trigger);
972 printk(KERN_INFO "PLATFORM int %s (0x%x): GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d\n",
973 int_type < ARRAY_SIZE(name) ? name[int_type] : "unknown",
974 int_type, gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
975 (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
976 cpu_logical_id(dest), dest, vector);
978 set_rte(gsi, vector, dest, mask);
979 return vector;
980 }
983 /*
984 * ACPI calls this when it finds an entry for a legacy ISA IRQ override.
985 * Note that the gsi_base and IOSAPIC address must be set in iosapic_init().
986 */
987 void __init
988 iosapic_override_isa_irq (unsigned int isa_irq, unsigned int gsi,
989 unsigned long polarity,
990 unsigned long trigger)
991 {
992 int vector;
993 unsigned int dest = cpu_physical_id(smp_processor_id());
995 vector = isa_irq_to_vector(isa_irq);
997 register_intr(gsi, vector, IOSAPIC_LOWEST_PRIORITY, polarity, trigger);
999 DBG("ISA: IRQ %u -> GSI %u (%s,%s) -> CPU %d (0x%04x) vector %d\n",
1000 isa_irq, gsi, trigger == IOSAPIC_EDGE ? "edge" : "level",
1001 polarity == IOSAPIC_POL_HIGH ? "high" : "low",
1002 cpu_logical_id(dest), dest, vector);
1004 set_rte(gsi, vector, dest, 1);
1007 void __init
1008 iosapic_system_init (int system_pcat_compat)
1010 int vector;
1012 for (vector = 0; vector < IA64_NUM_VECTORS; ++vector) {
1013 iosapic_intr_info[vector].low32 = IOSAPIC_MASK;
1014 INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes); /* mark as unused */
1017 pcat_compat = system_pcat_compat;
1018 #ifdef CONFIG_XEN
1019 if (running_on_xen)
1020 return;
1021 #endif
1022 if (pcat_compat) {
1023 /*
1024 * Disable the compatibility mode interrupts (8259 style), needs IN/OUT support
1025 * enabled.
1026 */
1027 printk(KERN_INFO "%s: Disabling PC-AT compatible 8259 interrupts\n", __FUNCTION__);
1028 outb(0xff, 0xA1);
1029 outb(0xff, 0x21);
1033 static inline int
1034 iosapic_alloc (void)
1036 int index;
1038 for (index = 0; index < NR_IOSAPICS; index++)
1039 if (!iosapic_lists[index].addr)
1040 return index;
1042 printk(KERN_WARNING "%s: failed to allocate iosapic\n", __FUNCTION__);
1043 return -1;
1046 static inline void
1047 iosapic_free (int index)
1049 memset(&iosapic_lists[index], 0, sizeof(iosapic_lists[0]));
1052 static inline int
1053 iosapic_check_gsi_range (unsigned int gsi_base, unsigned int ver)
1055 int index;
1056 unsigned int gsi_end, base, end;
1058 /* check gsi range */
1059 gsi_end = gsi_base + ((ver >> 16) & 0xff);
1060 for (index = 0; index < NR_IOSAPICS; index++) {
1061 if (!iosapic_lists[index].addr)
1062 continue;
1064 base = iosapic_lists[index].gsi_base;
1065 end = base + iosapic_lists[index].num_rte - 1;
1067 if (gsi_base < base && gsi_end < base)
1068 continue;/* OK */
1070 if (gsi_base > end && gsi_end > end)
1071 continue; /* OK */
1073 return -EBUSY;
1075 return 0;
1078 int __devinit
1079 iosapic_init (unsigned long phys_addr, unsigned int gsi_base)
1081 int num_rte, err, index;
1082 unsigned int isa_irq, ver;
1083 char __iomem *addr;
1084 unsigned long flags;
1086 spin_lock_irqsave(&iosapic_lock, flags);
1088 addr = ioremap(phys_addr, 0);
1089 ver = iosapic_version(addr);
1091 if ((err = iosapic_check_gsi_range(gsi_base, ver))) {
1092 iounmap(addr);
1093 spin_unlock_irqrestore(&iosapic_lock, flags);
1094 return err;
1097 /*
1098 * The MAX_REDIR register holds the highest input pin
1099 * number (starting from 0).
1100 * We add 1 so that we can use it for number of pins (= RTEs)
1101 */
1102 num_rte = ((ver >> 16) & 0xff) + 1;
1104 index = iosapic_alloc();
1105 iosapic_lists[index].addr = addr;
1106 iosapic_lists[index].gsi_base = gsi_base;
1107 iosapic_lists[index].num_rte = num_rte;
1108 #ifdef CONFIG_NUMA
1109 iosapic_lists[index].node = MAX_NUMNODES;
1110 #endif
1112 spin_unlock_irqrestore(&iosapic_lock, flags);
1114 if ((gsi_base == 0) && pcat_compat) {
1115 /*
1116 * Map the legacy ISA devices into the IOSAPIC data. Some of these may
1117 * get reprogrammed later on with data from the ACPI Interrupt Source
1118 * Override table.
1119 */
1120 for (isa_irq = 0; isa_irq < 16; ++isa_irq)
1121 iosapic_override_isa_irq(isa_irq, isa_irq, IOSAPIC_POL_HIGH, IOSAPIC_EDGE);
1123 return 0;
1126 #ifdef CONFIG_HOTPLUG
1127 int
1128 iosapic_remove (unsigned int gsi_base)
1130 int index, err = 0;
1131 unsigned long flags;
1133 spin_lock_irqsave(&iosapic_lock, flags);
1135 index = find_iosapic(gsi_base);
1136 if (index < 0) {
1137 printk(KERN_WARNING "%s: No IOSAPIC for GSI base %u\n",
1138 __FUNCTION__, gsi_base);
1139 goto out;
1142 if (iosapic_lists[index].rtes_inuse) {
1143 err = -EBUSY;
1144 printk(KERN_WARNING "%s: IOSAPIC for GSI base %u is busy\n",
1145 __FUNCTION__, gsi_base);
1146 goto out;
1149 iounmap(iosapic_lists[index].addr);
1150 iosapic_free(index);
1152 out:
1153 spin_unlock_irqrestore(&iosapic_lock, flags);
1154 return err;
1156 #endif /* CONFIG_HOTPLUG */
1158 #ifdef CONFIG_NUMA
1159 void __devinit
1160 map_iosapic_to_node(unsigned int gsi_base, int node)
1162 int index;
1164 index = find_iosapic(gsi_base);
1165 if (index < 0) {
1166 printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n",
1167 __FUNCTION__, gsi_base);
1168 return;
1170 iosapic_lists[index].node = node;
1171 return;
1173 #endif
1175 static int __init iosapic_enable_kmalloc (void)
1177 iosapic_kmalloc_ok = 1;
1178 return 0;
1180 core_initcall (iosapic_enable_kmalloc);