ia64/xen-unstable

view xen/arch/ia64/linux-xen/iosapic.c @ 9747:de2dc4e7966a

[IA64] Add support to physdev_ops

Add support to physdev ops, and thus give IOSAPIC RTEs
managed by Xen now. Dom0 now issues hypercall to r/w
RTE entry. Another change is the irq vector allocation
which is also owned by xen now.

After this change, the IOSAPIC is almost owned by xen
with only exception as IOSAPIC EOI which is still issued
by dom0 directly. But that's OK since currently dom0
owns all external physical devices. Later full event
channel mechanism will provide necessary support for
driver domain, and at that time, dom0 instead issues
physdev_op (PHYSDEVOP_IRQ_UNMASK_NOTIFY) naturally as
replace of IOSAPIC EOI.

Signed-off-by Kevin Tian <kevin.tian@intel.com>
author awilliam@xenbuild.aw
date Fri Apr 21 09:03:19 2006 -0600 (2006-04-21)
parents eab6fd4949f0
children 003157eafd66
line source
1 /*
2 * I/O SAPIC support.
3 *
4 * Copyright (C) 1999 Intel Corp.
5 * Copyright (C) 1999 Asit Mallick <asit.k.mallick@intel.com>
6 * Copyright (C) 2000-2002 J.I. Lee <jung-ik.lee@intel.com>
7 * Copyright (C) 1999-2000, 2002-2003 Hewlett-Packard Co.
8 * David Mosberger-Tang <davidm@hpl.hp.com>
9 * Copyright (C) 1999 VA Linux Systems
10 * Copyright (C) 1999,2000 Walt Drummond <drummond@valinux.com>
11 *
12 * 00/04/19 D. Mosberger Rewritten to mirror more closely the x86 I/O APIC code.
13 * In particular, we now have separate handlers for edge
14 * and level triggered interrupts.
15 * 00/10/27 Asit Mallick, Goutham Rao <goutham.rao@intel.com> IRQ vector allocation
16 * PCI to vector mapping, shared PCI interrupts.
17 * 00/10/27 D. Mosberger Document things a bit more to make them more understandable.
18 * Clean up much of the old IOSAPIC cruft.
19 * 01/07/27 J.I. Lee PCI irq routing, Platform/Legacy interrupts and fixes for
20 * ACPI S5(SoftOff) support.
21 * 02/01/23 J.I. Lee iosapic pgm fixes for PCI irq routing from _PRT
22 * 02/01/07 E. Focht <efocht@ess.nec.de> Redirectable interrupt vectors in
23 * iosapic_set_affinity(), initializations for
24 * /proc/irq/#/smp_affinity
25 * 02/04/02 P. Diefenbaugh Cleaned up ACPI PCI IRQ routing.
26 * 02/04/18 J.I. Lee bug fix in iosapic_init_pci_irq
27 * 02/04/30 J.I. Lee bug fix in find_iosapic to fix ACPI PCI IRQ to IOSAPIC mapping
28 * error
29 * 02/07/29 T. Kochi Allocate interrupt vectors dynamically
30 * 02/08/04 T. Kochi Cleaned up terminology (irq, global system interrupt, vector, etc.)
31 * 02/09/20 D. Mosberger Simplified by taking advantage of ACPI's pci_irq code.
32 * 03/02/19 B. Helgaas Make pcat_compat system-wide, not per-IOSAPIC.
33 * Remove iosapic_address & gsi_base from external interfaces.
34 * Rationalize __init/__devinit attributes.
35 * 04/12/04 Ashok Raj <ashok.raj@intel.com> Intel Corporation 2004
36 * Updated to work with irq migration necessary for CPU Hotplug
37 */
38 /*
39 * Here is what the interrupt logic between a PCI device and the kernel looks like:
40 *
41 * (1) A PCI device raises one of the four interrupt pins (INTA, INTB, INTC, INTD). The
42 * device is uniquely identified by its bus--, and slot-number (the function
43 * number does not matter here because all functions share the same interrupt
44 * lines).
45 *
46 * (2) The motherboard routes the interrupt line to a pin on a IOSAPIC controller.
47 * Multiple interrupt lines may have to share the same IOSAPIC pin (if they're level
48 * triggered and use the same polarity). Each interrupt line has a unique Global
49 * System Interrupt (GSI) number which can be calculated as the sum of the controller's
50 * base GSI number and the IOSAPIC pin number to which the line connects.
51 *
52 * (3) The IOSAPIC uses an internal routing table entries (RTEs) to map the IOSAPIC pin
53 * into the IA-64 interrupt vector. This interrupt vector is then sent to the CPU.
54 *
55 * (4) The kernel recognizes an interrupt as an IRQ. The IRQ interface is used as
56 * architecture-independent interrupt handling mechanism in Linux. As an
57 * IRQ is a number, we have to have IA-64 interrupt vector number <-> IRQ number
58 * mapping. On smaller systems, we use one-to-one mapping between IA-64 vector and
59 * IRQ. A platform can implement platform_irq_to_vector(irq) and
60 * platform_local_vector_to_irq(vector) APIs to differentiate the mapping.
61 * Please see also include/asm-ia64/hw_irq.h for those APIs.
62 *
63 * To sum up, there are three levels of mappings involved:
64 *
65 * PCI pin -> global system interrupt (GSI) -> IA-64 vector <-> IRQ
66 *
67 * Note: The term "IRQ" is loosely used everywhere in Linux kernel to describe interrupts.
68 * Now we use "IRQ" only for Linux IRQ's. ISA IRQ (isa_irq) is the only exception in this
69 * source code.
70 */
71 #include <linux/config.h>
73 #include <linux/acpi.h>
74 #include <linux/init.h>
75 #include <linux/irq.h>
76 #include <linux/kernel.h>
77 #include <linux/list.h>
78 #ifndef XEN
79 #include <linux/pci.h>
80 #endif
81 #include <linux/smp.h>
82 #include <linux/smp_lock.h>
83 #include <linux/string.h>
84 #include <linux/bootmem.h>
86 #include <asm/delay.h>
87 #include <asm/hw_irq.h>
88 #include <asm/io.h>
89 #include <asm/iosapic.h>
90 #include <asm/machvec.h>
91 #include <asm/processor.h>
92 #include <asm/ptrace.h>
93 #include <asm/system.h>
96 #undef DEBUG_INTERRUPT_ROUTING
98 #ifdef DEBUG_INTERRUPT_ROUTING
99 #define DBG(fmt...) printk(fmt)
100 #else
101 #define DBG(fmt...)
102 #endif
104 #define NR_PREALLOCATE_RTE_ENTRIES (PAGE_SIZE / sizeof(struct iosapic_rte_info))
105 #define RTE_PREALLOCATED (1)
107 static DEFINE_SPINLOCK(iosapic_lock);
109 /* These tables map IA-64 vectors to the IOSAPIC pin that generates this vector. */
111 struct iosapic_rte_info {
112 struct list_head rte_list; /* node in list of RTEs sharing the same vector */
113 char __iomem *addr; /* base address of IOSAPIC */
114 unsigned int gsi_base; /* first GSI assigned to this IOSAPIC */
115 char rte_index; /* IOSAPIC RTE index */
116 int refcnt; /* reference counter */
117 unsigned int flags; /* flags */
118 } ____cacheline_aligned;
120 static struct iosapic_intr_info {
121 struct list_head rtes; /* RTEs using this vector (empty => not an IOSAPIC interrupt) */
122 int count; /* # of RTEs that shares this vector */
123 u32 low32; /* current value of low word of Redirection table entry */
124 unsigned int dest; /* destination CPU physical ID */
125 unsigned char dmode : 3; /* delivery mode (see iosapic.h) */
126 unsigned char polarity: 1; /* interrupt polarity (see iosapic.h) */
127 unsigned char trigger : 1; /* trigger mode (see iosapic.h) */
128 } iosapic_intr_info[IA64_NUM_VECTORS];
130 static struct iosapic {
131 char __iomem *addr; /* base address of IOSAPIC */
132 unsigned int gsi_base; /* first GSI assigned to this IOSAPIC */
133 unsigned short num_rte; /* number of RTE in this IOSAPIC */
134 int rtes_inuse; /* # of RTEs in use on this IOSAPIC */
135 #ifdef CONFIG_NUMA
136 unsigned short node; /* numa node association via pxm */
137 #endif
138 } iosapic_lists[NR_IOSAPICS];
140 static unsigned char pcat_compat __devinitdata; /* 8259 compatibility flag */
142 static int iosapic_kmalloc_ok;
143 static LIST_HEAD(free_rte_list);
145 /*
146 * Find an IOSAPIC associated with a GSI
147 */
148 static inline int
149 find_iosapic (unsigned int gsi)
150 {
151 int i;
153 for (i = 0; i < NR_IOSAPICS; i++) {
154 if ((unsigned) (gsi - iosapic_lists[i].gsi_base) < iosapic_lists[i].num_rte)
155 return i;
156 }
158 return -1;
159 }
161 static inline int
162 _gsi_to_vector (unsigned int gsi)
163 {
164 struct iosapic_intr_info *info;
165 struct iosapic_rte_info *rte;
167 for (info = iosapic_intr_info; info < iosapic_intr_info + IA64_NUM_VECTORS; ++info)
168 list_for_each_entry(rte, &info->rtes, rte_list)
169 if (rte->gsi_base + rte->rte_index == gsi)
170 return info - iosapic_intr_info;
171 return -1;
172 }
174 /*
175 * Translate GSI number to the corresponding IA-64 interrupt vector. If no
176 * entry exists, return -1.
177 */
178 inline int
179 gsi_to_vector (unsigned int gsi)
180 {
181 return _gsi_to_vector(gsi);
182 }
184 int
185 gsi_to_irq (unsigned int gsi)
186 {
187 unsigned long flags;
188 int irq;
189 /*
190 * XXX fix me: this assumes an identity mapping vetween IA-64 vector and Linux irq
191 * numbers...
192 */
193 spin_lock_irqsave(&iosapic_lock, flags);
194 {
195 irq = _gsi_to_vector(gsi);
196 }
197 spin_unlock_irqrestore(&iosapic_lock, flags);
199 return irq;
200 }
202 static struct iosapic_rte_info *gsi_vector_to_rte(unsigned int gsi, unsigned int vec)
203 {
204 struct iosapic_rte_info *rte;
206 list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list)
207 if (rte->gsi_base + rte->rte_index == gsi)
208 return rte;
209 return NULL;
210 }
212 static void
213 set_rte (unsigned int gsi, unsigned int vector, unsigned int dest, int mask)
214 {
215 unsigned long pol, trigger, dmode;
216 u32 low32, high32;
217 char __iomem *addr;
218 int rte_index;
219 char redir;
220 struct iosapic_rte_info *rte;
222 DBG(KERN_DEBUG"IOSAPIC: routing vector %d to 0x%x\n", vector, dest);
224 rte = gsi_vector_to_rte(gsi, vector);
225 if (!rte)
226 return; /* not an IOSAPIC interrupt */
228 rte_index = rte->rte_index;
229 addr = rte->addr;
230 pol = iosapic_intr_info[vector].polarity;
231 trigger = iosapic_intr_info[vector].trigger;
232 dmode = iosapic_intr_info[vector].dmode;
234 redir = (dmode == IOSAPIC_LOWEST_PRIORITY) ? 1 : 0;
236 #ifdef CONFIG_SMP
237 {
238 unsigned int irq;
240 for (irq = 0; irq < NR_IRQS; ++irq)
241 if (irq_to_vector(irq) == vector) {
242 set_irq_affinity_info(irq, (int)(dest & 0xffff), redir);
243 break;
244 }
245 }
246 #endif
248 low32 = ((pol << IOSAPIC_POLARITY_SHIFT) |
249 (trigger << IOSAPIC_TRIGGER_SHIFT) |
250 (dmode << IOSAPIC_DELIVERY_SHIFT) |
251 ((mask ? 1 : 0) << IOSAPIC_MASK_SHIFT) |
252 vector);
254 /* dest contains both id and eid */
255 high32 = (dest << IOSAPIC_DEST_SHIFT);
257 iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index), high32);
258 iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
259 iosapic_intr_info[vector].low32 = low32;
260 iosapic_intr_info[vector].dest = dest;
261 }
263 static void
264 nop (unsigned int vector)
265 {
266 /* do nothing... */
267 }
269 static void
270 mask_irq (unsigned int irq)
271 {
272 unsigned long flags;
273 char __iomem *addr;
274 u32 low32;
275 int rte_index;
276 ia64_vector vec = irq_to_vector(irq);
277 struct iosapic_rte_info *rte;
279 if (list_empty(&iosapic_intr_info[vec].rtes))
280 return; /* not an IOSAPIC interrupt! */
282 spin_lock_irqsave(&iosapic_lock, flags);
283 {
284 /* set only the mask bit */
285 low32 = iosapic_intr_info[vec].low32 |= IOSAPIC_MASK;
286 list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list) {
287 addr = rte->addr;
288 rte_index = rte->rte_index;
289 iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
290 }
291 }
292 spin_unlock_irqrestore(&iosapic_lock, flags);
293 }
295 static void
296 unmask_irq (unsigned int irq)
297 {
298 unsigned long flags;
299 char __iomem *addr;
300 u32 low32;
301 int rte_index;
302 ia64_vector vec = irq_to_vector(irq);
303 struct iosapic_rte_info *rte;
305 if (list_empty(&iosapic_intr_info[vec].rtes))
306 return; /* not an IOSAPIC interrupt! */
308 spin_lock_irqsave(&iosapic_lock, flags);
309 {
310 low32 = iosapic_intr_info[vec].low32 &= ~IOSAPIC_MASK;
311 list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list) {
312 addr = rte->addr;
313 rte_index = rte->rte_index;
314 iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
315 }
316 }
317 spin_unlock_irqrestore(&iosapic_lock, flags);
318 }
321 static void
322 iosapic_set_affinity (unsigned int irq, cpumask_t mask)
323 {
324 #ifdef CONFIG_SMP
325 unsigned long flags;
326 u32 high32, low32;
327 int dest, rte_index;
328 char __iomem *addr;
329 int redir = (irq & IA64_IRQ_REDIRECTED) ? 1 : 0;
330 ia64_vector vec;
331 struct iosapic_rte_info *rte;
333 irq &= (~IA64_IRQ_REDIRECTED);
334 vec = irq_to_vector(irq);
336 if (cpus_empty(mask))
337 return;
339 dest = cpu_physical_id(first_cpu(mask));
341 if (list_empty(&iosapic_intr_info[vec].rtes))
342 return; /* not an IOSAPIC interrupt */
344 set_irq_affinity_info(irq, dest, redir);
346 /* dest contains both id and eid */
347 high32 = dest << IOSAPIC_DEST_SHIFT;
349 spin_lock_irqsave(&iosapic_lock, flags);
350 {
351 low32 = iosapic_intr_info[vec].low32 & ~(7 << IOSAPIC_DELIVERY_SHIFT);
353 if (redir)
354 /* change delivery mode to lowest priority */
355 low32 |= (IOSAPIC_LOWEST_PRIORITY << IOSAPIC_DELIVERY_SHIFT);
356 else
357 /* change delivery mode to fixed */
358 low32 |= (IOSAPIC_FIXED << IOSAPIC_DELIVERY_SHIFT);
360 iosapic_intr_info[vec].low32 = low32;
361 iosapic_intr_info[vec].dest = dest;
362 list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list) {
363 addr = rte->addr;
364 rte_index = rte->rte_index;
365 iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index), high32);
366 iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
367 }
368 }
369 spin_unlock_irqrestore(&iosapic_lock, flags);
370 #endif
371 }
373 /*
374 * Handlers for level-triggered interrupts.
375 */
377 static unsigned int
378 iosapic_startup_level_irq (unsigned int irq)
379 {
380 unmask_irq(irq);
381 return 0;
382 }
384 static void
385 iosapic_end_level_irq (unsigned int irq)
386 {
387 ia64_vector vec = irq_to_vector(irq);
388 struct iosapic_rte_info *rte;
390 move_irq(irq);
391 list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list)
392 iosapic_eoi(rte->addr, vec);
393 }
395 #define iosapic_shutdown_level_irq mask_irq
396 #define iosapic_enable_level_irq unmask_irq
397 #define iosapic_disable_level_irq mask_irq
398 #define iosapic_ack_level_irq nop
400 struct hw_interrupt_type irq_type_iosapic_level = {
401 .typename = "IO-SAPIC-level",
402 .startup = iosapic_startup_level_irq,
403 .shutdown = iosapic_shutdown_level_irq,
404 .enable = iosapic_enable_level_irq,
405 .disable = iosapic_disable_level_irq,
406 .ack = iosapic_ack_level_irq,
407 .end = iosapic_end_level_irq,
408 .set_affinity = iosapic_set_affinity
409 };
411 /*
412 * Handlers for edge-triggered interrupts.
413 */
415 static unsigned int
416 iosapic_startup_edge_irq (unsigned int irq)
417 {
418 unmask_irq(irq);
419 /*
420 * IOSAPIC simply drops interrupts pended while the
421 * corresponding pin was masked, so we can't know if an
422 * interrupt is pending already. Let's hope not...
423 */
424 return 0;
425 }
427 static void
428 iosapic_ack_edge_irq (unsigned int irq)
429 {
430 irq_desc_t *idesc = irq_descp(irq);
432 move_irq(irq);
433 /*
434 * Once we have recorded IRQ_PENDING already, we can mask the
435 * interrupt for real. This prevents IRQ storms from unhandled
436 * devices.
437 */
438 if ((idesc->status & (IRQ_PENDING|IRQ_DISABLED)) == (IRQ_PENDING|IRQ_DISABLED))
439 mask_irq(irq);
440 }
442 #define iosapic_enable_edge_irq unmask_irq
443 #define iosapic_disable_edge_irq nop
444 #define iosapic_end_edge_irq nop
446 struct hw_interrupt_type irq_type_iosapic_edge = {
447 .typename = "IO-SAPIC-edge",
448 .startup = iosapic_startup_edge_irq,
449 .shutdown = iosapic_disable_edge_irq,
450 .enable = iosapic_enable_edge_irq,
451 .disable = iosapic_disable_edge_irq,
452 .ack = iosapic_ack_edge_irq,
453 .end = iosapic_end_edge_irq,
454 .set_affinity = iosapic_set_affinity
455 };
457 unsigned int
458 iosapic_version (char __iomem *addr)
459 {
460 /*
461 * IOSAPIC Version Register return 32 bit structure like:
462 * {
463 * unsigned int version : 8;
464 * unsigned int reserved1 : 8;
465 * unsigned int max_redir : 8;
466 * unsigned int reserved2 : 8;
467 * }
468 */
469 return iosapic_read(addr, IOSAPIC_VERSION);
470 }
472 static int iosapic_find_sharable_vector (unsigned long trigger, unsigned long pol)
473 {
474 int i, vector = -1, min_count = -1;
475 struct iosapic_intr_info *info;
477 /*
478 * shared vectors for edge-triggered interrupts are not
479 * supported yet
480 */
481 if (trigger == IOSAPIC_EDGE)
482 return -1;
484 for (i = IA64_FIRST_DEVICE_VECTOR; i <= IA64_LAST_DEVICE_VECTOR; i++) {
485 info = &iosapic_intr_info[i];
486 if (info->trigger == trigger && info->polarity == pol &&
487 (info->dmode == IOSAPIC_FIXED || info->dmode == IOSAPIC_LOWEST_PRIORITY)) {
488 if (min_count == -1 || info->count < min_count) {
489 vector = i;
490 min_count = info->count;
491 }
492 }
493 }
495 return vector;
496 }
498 /*
499 * if the given vector is already owned by other,
500 * assign a new vector for the other and make the vector available
501 */
502 static void __init
503 iosapic_reassign_vector (int vector)
504 {
505 int new_vector;
507 if (!list_empty(&iosapic_intr_info[vector].rtes)) {
508 new_vector = assign_irq_vector(AUTO_ASSIGN);
509 if (new_vector < 0)
510 panic("%s: out of interrupt vectors!\n", __FUNCTION__);
511 printk(KERN_INFO "Reassigning vector %d to %d\n", vector, new_vector);
512 memcpy(&iosapic_intr_info[new_vector], &iosapic_intr_info[vector],
513 sizeof(struct iosapic_intr_info));
514 INIT_LIST_HEAD(&iosapic_intr_info[new_vector].rtes);
515 list_move(iosapic_intr_info[vector].rtes.next, &iosapic_intr_info[new_vector].rtes);
516 memset(&iosapic_intr_info[vector], 0, sizeof(struct iosapic_intr_info));
517 iosapic_intr_info[vector].low32 = IOSAPIC_MASK;
518 INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes);
519 }
520 }
522 static struct iosapic_rte_info *iosapic_alloc_rte (void)
523 {
524 int i;
525 struct iosapic_rte_info *rte;
526 int preallocated = 0;
528 if (!iosapic_kmalloc_ok && list_empty(&free_rte_list)) {
529 #ifdef XEN
530 rte = xmalloc_bytes(sizeof(struct iosapic_rte_info) * NR_PREALLOCATE_RTE_ENTRIES);
531 #else
532 rte = alloc_bootmem(sizeof(struct iosapic_rte_info) * NR_PREALLOCATE_RTE_ENTRIES);
533 #endif
534 if (!rte)
535 return NULL;
536 for (i = 0; i < NR_PREALLOCATE_RTE_ENTRIES; i++, rte++)
537 list_add(&rte->rte_list, &free_rte_list);
538 }
540 if (!list_empty(&free_rte_list)) {
541 rte = list_entry(free_rte_list.next, struct iosapic_rte_info, rte_list);
542 list_del(&rte->rte_list);
543 preallocated++;
544 } else {
545 rte = kmalloc(sizeof(struct iosapic_rte_info), GFP_ATOMIC);
546 if (!rte)
547 return NULL;
548 }
550 memset(rte, 0, sizeof(struct iosapic_rte_info));
551 if (preallocated)
552 rte->flags |= RTE_PREALLOCATED;
554 return rte;
555 }
557 static void iosapic_free_rte (struct iosapic_rte_info *rte)
558 {
559 if (rte->flags & RTE_PREALLOCATED)
560 list_add_tail(&rte->rte_list, &free_rte_list);
561 else
562 kfree(rte);
563 }
565 static inline int vector_is_shared (int vector)
566 {
567 return (iosapic_intr_info[vector].count > 1);
568 }
570 static int
571 register_intr (unsigned int gsi, int vector, unsigned char delivery,
572 unsigned long polarity, unsigned long trigger)
573 {
574 irq_desc_t *idesc;
575 struct hw_interrupt_type *irq_type;
576 int rte_index;
577 int index;
578 unsigned long gsi_base;
579 void __iomem *iosapic_address;
580 struct iosapic_rte_info *rte;
582 index = find_iosapic(gsi);
583 if (index < 0) {
584 printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n", __FUNCTION__, gsi);
585 return -ENODEV;
586 }
588 iosapic_address = iosapic_lists[index].addr;
589 gsi_base = iosapic_lists[index].gsi_base;
591 rte = gsi_vector_to_rte(gsi, vector);
592 if (!rte) {
593 rte = iosapic_alloc_rte();
594 if (!rte) {
595 printk(KERN_WARNING "%s: cannot allocate memory\n", __FUNCTION__);
596 return -ENOMEM;
597 }
599 rte_index = gsi - gsi_base;
600 rte->rte_index = rte_index;
601 rte->addr = iosapic_address;
602 rte->gsi_base = gsi_base;
603 rte->refcnt++;
604 list_add_tail(&rte->rte_list, &iosapic_intr_info[vector].rtes);
605 iosapic_intr_info[vector].count++;
606 iosapic_lists[index].rtes_inuse++;
607 }
608 else if (vector_is_shared(vector)) {
609 struct iosapic_intr_info *info = &iosapic_intr_info[vector];
610 if (info->trigger != trigger || info->polarity != polarity) {
611 printk (KERN_WARNING "%s: cannot override the interrupt\n", __FUNCTION__);
612 return -EINVAL;
613 }
614 }
616 iosapic_intr_info[vector].polarity = polarity;
617 iosapic_intr_info[vector].dmode = delivery;
618 iosapic_intr_info[vector].trigger = trigger;
620 if (trigger == IOSAPIC_EDGE)
621 irq_type = &irq_type_iosapic_edge;
622 else
623 irq_type = &irq_type_iosapic_level;
625 idesc = irq_descp(vector);
626 if (idesc->handler != irq_type) {
627 if (idesc->handler != &no_irq_type)
628 printk(KERN_WARNING "%s: changing vector %d from %s to %s\n",
629 __FUNCTION__, vector, idesc->handler->typename, irq_type->typename);
630 idesc->handler = irq_type;
631 }
632 return 0;
633 }
635 static unsigned int
636 get_target_cpu (unsigned int gsi, int vector)
637 {
638 #ifdef CONFIG_SMP
639 static int cpu = -1;
641 /*
642 * In case of vector shared by multiple RTEs, all RTEs that
643 * share the vector need to use the same destination CPU.
644 */
645 if (!list_empty(&iosapic_intr_info[vector].rtes))
646 return iosapic_intr_info[vector].dest;
648 /*
649 * If the platform supports redirection via XTP, let it
650 * distribute interrupts.
651 */
652 if (smp_int_redirect & SMP_IRQ_REDIRECTION)
653 return cpu_physical_id(smp_processor_id());
655 /*
656 * Some interrupts (ACPI SCI, for instance) are registered
657 * before the BSP is marked as online.
658 */
659 if (!cpu_online(smp_processor_id()))
660 return cpu_physical_id(smp_processor_id());
662 #ifdef CONFIG_NUMA
663 {
664 int num_cpus, cpu_index, iosapic_index, numa_cpu, i = 0;
665 cpumask_t cpu_mask;
667 iosapic_index = find_iosapic(gsi);
668 if (iosapic_index < 0 ||
669 iosapic_lists[iosapic_index].node == MAX_NUMNODES)
670 goto skip_numa_setup;
672 cpu_mask = node_to_cpumask(iosapic_lists[iosapic_index].node);
674 for_each_cpu_mask(numa_cpu, cpu_mask) {
675 if (!cpu_online(numa_cpu))
676 cpu_clear(numa_cpu, cpu_mask);
677 }
679 num_cpus = cpus_weight(cpu_mask);
681 if (!num_cpus)
682 goto skip_numa_setup;
684 /* Use vector assigment to distribute across cpus in node */
685 cpu_index = vector % num_cpus;
687 for (numa_cpu = first_cpu(cpu_mask) ; i < cpu_index ; i++)
688 numa_cpu = next_cpu(numa_cpu, cpu_mask);
690 if (numa_cpu != NR_CPUS)
691 return cpu_physical_id(numa_cpu);
692 }
693 skip_numa_setup:
694 #endif
695 /*
696 * Otherwise, round-robin interrupt vectors across all the
697 * processors. (It'd be nice if we could be smarter in the
698 * case of NUMA.)
699 */
700 do {
701 if (++cpu >= NR_CPUS)
702 cpu = 0;
703 } while (!cpu_online(cpu));
705 return cpu_physical_id(cpu);
706 #else
707 return cpu_physical_id(smp_processor_id());
708 #endif
709 }
711 /*
712 * ACPI can describe IOSAPIC interrupts via static tables and namespace
713 * methods. This provides an interface to register those interrupts and
714 * program the IOSAPIC RTE.
715 */
716 int
717 iosapic_register_intr (unsigned int gsi,
718 unsigned long polarity, unsigned long trigger)
719 {
720 int vector, mask = 1, err;
721 unsigned int dest;
722 unsigned long flags;
723 struct iosapic_rte_info *rte;
724 u32 low32;
725 again:
726 /*
727 * If this GSI has already been registered (i.e., it's a
728 * shared interrupt, or we lost a race to register it),
729 * don't touch the RTE.
730 */
731 spin_lock_irqsave(&iosapic_lock, flags);
732 {
733 vector = gsi_to_vector(gsi);
734 if (vector > 0) {
735 rte = gsi_vector_to_rte(gsi, vector);
736 rte->refcnt++;
737 spin_unlock_irqrestore(&iosapic_lock, flags);
738 return vector;
739 }
740 }
741 spin_unlock_irqrestore(&iosapic_lock, flags);
743 /* If vector is running out, we try to find a sharable vector */
744 vector = assign_irq_vector(AUTO_ASSIGN);
745 if (vector < 0) {
746 vector = iosapic_find_sharable_vector(trigger, polarity);
747 if (vector < 0)
748 return -ENOSPC;
749 }
751 spin_lock_irqsave(&irq_descp(vector)->lock, flags);
752 spin_lock(&iosapic_lock);
753 {
754 if (gsi_to_vector(gsi) > 0) {
755 if (list_empty(&iosapic_intr_info[vector].rtes))
756 free_irq_vector(vector);
757 spin_unlock(&iosapic_lock);
758 spin_unlock_irqrestore(&irq_descp(vector)->lock, flags);
759 goto again;
760 }
762 dest = get_target_cpu(gsi, vector);
763 err = register_intr(gsi, vector, IOSAPIC_LOWEST_PRIORITY,
764 polarity, trigger);
765 if (err < 0) {
766 spin_unlock(&iosapic_lock);
767 spin_unlock_irqrestore(&irq_descp(vector)->lock, flags);
768 return err;
769 }
771 /*
772 * If the vector is shared and already unmasked for
773 * other interrupt sources, don't mask it.
774 */
775 low32 = iosapic_intr_info[vector].low32;
776 if (vector_is_shared(vector) && !(low32 & IOSAPIC_MASK))
777 mask = 0;
778 set_rte(gsi, vector, dest, mask);
779 }
780 spin_unlock(&iosapic_lock);
781 spin_unlock_irqrestore(&irq_descp(vector)->lock, flags);
783 printk(KERN_INFO "GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d\n",
784 gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
785 (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
786 cpu_logical_id(dest), dest, vector);
788 return vector;
789 }
791 void
792 iosapic_unregister_intr (unsigned int gsi)
793 {
794 unsigned long flags;
795 int irq, vector, index;
796 irq_desc_t *idesc;
797 u32 low32;
798 unsigned long trigger, polarity;
799 unsigned int dest;
800 struct iosapic_rte_info *rte;
802 /*
803 * If the irq associated with the gsi is not found,
804 * iosapic_unregister_intr() is unbalanced. We need to check
805 * this again after getting locks.
806 */
807 irq = gsi_to_irq(gsi);
808 if (irq < 0) {
809 printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n", gsi);
810 WARN_ON(1);
811 return;
812 }
813 vector = irq_to_vector(irq);
815 idesc = irq_descp(irq);
816 spin_lock_irqsave(&idesc->lock, flags);
817 spin_lock(&iosapic_lock);
818 {
819 if ((rte = gsi_vector_to_rte(gsi, vector)) == NULL) {
820 printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n", gsi);
821 WARN_ON(1);
822 goto out;
823 }
825 if (--rte->refcnt > 0)
826 goto out;
828 /* Mask the interrupt */
829 low32 = iosapic_intr_info[vector].low32 | IOSAPIC_MASK;
830 iosapic_write(rte->addr, IOSAPIC_RTE_LOW(rte->rte_index), low32);
832 /* Remove the rte entry from the list */
833 list_del(&rte->rte_list);
834 iosapic_intr_info[vector].count--;
835 iosapic_free_rte(rte);
836 index = find_iosapic(gsi);
837 iosapic_lists[index].rtes_inuse--;
838 WARN_ON(iosapic_lists[index].rtes_inuse < 0);
840 trigger = iosapic_intr_info[vector].trigger;
841 polarity = iosapic_intr_info[vector].polarity;
842 dest = iosapic_intr_info[vector].dest;
843 printk(KERN_INFO "GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d unregistered\n",
844 gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
845 (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
846 cpu_logical_id(dest), dest, vector);
848 if (list_empty(&iosapic_intr_info[vector].rtes)) {
849 /* Sanity check */
850 BUG_ON(iosapic_intr_info[vector].count);
852 /* Clear the interrupt controller descriptor */
853 idesc->handler = &no_irq_type;
855 /* Clear the interrupt information */
856 memset(&iosapic_intr_info[vector], 0, sizeof(struct iosapic_intr_info));
857 iosapic_intr_info[vector].low32 |= IOSAPIC_MASK;
858 INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes);
860 if (idesc->action) {
861 printk(KERN_ERR "interrupt handlers still exist on IRQ %u\n", irq);
862 WARN_ON(1);
863 }
865 /* Free the interrupt vector */
866 free_irq_vector(vector);
867 }
868 }
869 out:
870 spin_unlock(&iosapic_lock);
871 spin_unlock_irqrestore(&idesc->lock, flags);
872 }
874 /*
875 * ACPI calls this when it finds an entry for a platform interrupt.
876 * Note that the irq_base and IOSAPIC address must be set in iosapic_init().
877 */
878 int __init
879 iosapic_register_platform_intr (u32 int_type, unsigned int gsi,
880 int iosapic_vector, u16 eid, u16 id,
881 unsigned long polarity, unsigned long trigger)
882 {
883 static const char * const name[] = {"unknown", "PMI", "INIT", "CPEI"};
884 unsigned char delivery;
885 int vector, mask = 0;
886 unsigned int dest = ((id << 8) | eid) & 0xffff;
888 switch (int_type) {
889 case ACPI_INTERRUPT_PMI:
890 vector = iosapic_vector;
891 /*
892 * since PMI vector is alloc'd by FW(ACPI) not by kernel,
893 * we need to make sure the vector is available
894 */
895 iosapic_reassign_vector(vector);
896 delivery = IOSAPIC_PMI;
897 break;
898 case ACPI_INTERRUPT_INIT:
899 vector = assign_irq_vector(AUTO_ASSIGN);
900 if (vector < 0)
901 panic("%s: out of interrupt vectors!\n", __FUNCTION__);
902 delivery = IOSAPIC_INIT;
903 break;
904 case ACPI_INTERRUPT_CPEI:
905 vector = IA64_CPE_VECTOR;
906 delivery = IOSAPIC_LOWEST_PRIORITY;
907 mask = 1;
908 break;
909 default:
910 printk(KERN_ERR "iosapic_register_platform_irq(): invalid int type 0x%x\n", int_type);
911 return -1;
912 }
914 register_intr(gsi, vector, delivery, polarity, trigger);
916 printk(KERN_INFO "PLATFORM int %s (0x%x): GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d\n",
917 int_type < ARRAY_SIZE(name) ? name[int_type] : "unknown",
918 int_type, gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
919 (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
920 cpu_logical_id(dest), dest, vector);
922 set_rte(gsi, vector, dest, mask);
923 return vector;
924 }
927 /*
928 * ACPI calls this when it finds an entry for a legacy ISA IRQ override.
929 * Note that the gsi_base and IOSAPIC address must be set in iosapic_init().
930 */
931 void __init
932 iosapic_override_isa_irq (unsigned int isa_irq, unsigned int gsi,
933 unsigned long polarity,
934 unsigned long trigger)
935 {
936 int vector;
937 unsigned int dest = cpu_physical_id(smp_processor_id());
939 vector = isa_irq_to_vector(isa_irq);
941 register_intr(gsi, vector, IOSAPIC_LOWEST_PRIORITY, polarity, trigger);
943 DBG("ISA: IRQ %u -> GSI %u (%s,%s) -> CPU %d (0x%04x) vector %d\n",
944 isa_irq, gsi, trigger == IOSAPIC_EDGE ? "edge" : "level",
945 polarity == IOSAPIC_POL_HIGH ? "high" : "low",
946 cpu_logical_id(dest), dest, vector);
948 set_rte(gsi, vector, dest, 1);
949 }
951 void __init
952 iosapic_system_init (int system_pcat_compat)
953 {
954 int vector;
956 for (vector = 0; vector < IA64_NUM_VECTORS; ++vector) {
957 iosapic_intr_info[vector].low32 = IOSAPIC_MASK;
958 INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes); /* mark as unused */
959 }
961 pcat_compat = system_pcat_compat;
962 if (pcat_compat) {
963 /*
964 * Disable the compatibility mode interrupts (8259 style), needs IN/OUT support
965 * enabled.
966 */
967 printk(KERN_INFO "%s: Disabling PC-AT compatible 8259 interrupts\n", __FUNCTION__);
968 outb(0xff, 0xA1);
969 outb(0xff, 0x21);
970 }
971 }
973 static inline int
974 iosapic_alloc (void)
975 {
976 int index;
978 for (index = 0; index < NR_IOSAPICS; index++)
979 if (!iosapic_lists[index].addr)
980 return index;
982 printk(KERN_WARNING "%s: failed to allocate iosapic\n", __FUNCTION__);
983 return -1;
984 }
986 static inline void
987 iosapic_free (int index)
988 {
989 memset(&iosapic_lists[index], 0, sizeof(iosapic_lists[0]));
990 }
992 static inline int
993 iosapic_check_gsi_range (unsigned int gsi_base, unsigned int ver)
994 {
995 int index;
996 unsigned int gsi_end, base, end;
998 /* check gsi range */
999 gsi_end = gsi_base + ((ver >> 16) & 0xff);
1000 for (index = 0; index < NR_IOSAPICS; index++) {
1001 if (!iosapic_lists[index].addr)
1002 continue;
1004 base = iosapic_lists[index].gsi_base;
1005 end = base + iosapic_lists[index].num_rte - 1;
1007 if (gsi_base < base && gsi_end < base)
1008 continue;/* OK */
1010 if (gsi_base > end && gsi_end > end)
1011 continue; /* OK */
1013 return -EBUSY;
1015 return 0;
1018 int __devinit
1019 iosapic_init (unsigned long phys_addr, unsigned int gsi_base)
1021 int num_rte, err, index;
1022 unsigned int isa_irq, ver;
1023 char __iomem *addr;
1024 unsigned long flags;
1026 spin_lock_irqsave(&iosapic_lock, flags);
1028 addr = ioremap(phys_addr, 0);
1029 ver = iosapic_version(addr);
1031 if ((err = iosapic_check_gsi_range(gsi_base, ver))) {
1032 iounmap(addr);
1033 spin_unlock_irqrestore(&iosapic_lock, flags);
1034 return err;
1037 /*
1038 * The MAX_REDIR register holds the highest input pin
1039 * number (starting from 0).
1040 * We add 1 so that we can use it for number of pins (= RTEs)
1041 */
1042 num_rte = ((ver >> 16) & 0xff) + 1;
1044 index = iosapic_alloc();
1045 iosapic_lists[index].addr = addr;
1046 iosapic_lists[index].gsi_base = gsi_base;
1047 iosapic_lists[index].num_rte = num_rte;
1048 #ifdef CONFIG_NUMA
1049 iosapic_lists[index].node = MAX_NUMNODES;
1050 #endif
1052 spin_unlock_irqrestore(&iosapic_lock, flags);
1054 if ((gsi_base == 0) && pcat_compat) {
1055 /*
1056 * Map the legacy ISA devices into the IOSAPIC data. Some of these may
1057 * get reprogrammed later on with data from the ACPI Interrupt Source
1058 * Override table.
1059 */
1060 for (isa_irq = 0; isa_irq < 16; ++isa_irq)
1061 iosapic_override_isa_irq(isa_irq, isa_irq, IOSAPIC_POL_HIGH, IOSAPIC_EDGE);
1063 return 0;
1066 #ifdef CONFIG_HOTPLUG
1067 int
1068 iosapic_remove (unsigned int gsi_base)
1070 int index, err = 0;
1071 unsigned long flags;
1073 spin_lock_irqsave(&iosapic_lock, flags);
1075 index = find_iosapic(gsi_base);
1076 if (index < 0) {
1077 printk(KERN_WARNING "%s: No IOSAPIC for GSI base %u\n",
1078 __FUNCTION__, gsi_base);
1079 goto out;
1082 if (iosapic_lists[index].rtes_inuse) {
1083 err = -EBUSY;
1084 printk(KERN_WARNING "%s: IOSAPIC for GSI base %u is busy\n",
1085 __FUNCTION__, gsi_base);
1086 goto out;
1089 iounmap(iosapic_lists[index].addr);
1090 iosapic_free(index);
1092 out:
1093 spin_unlock_irqrestore(&iosapic_lock, flags);
1094 return err;
1096 #endif /* CONFIG_HOTPLUG */
1098 #ifdef CONFIG_NUMA
1099 void __devinit
1100 map_iosapic_to_node(unsigned int gsi_base, int node)
1102 int index;
1104 index = find_iosapic(gsi_base);
1105 if (index < 0) {
1106 printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n",
1107 __FUNCTION__, gsi_base);
1108 return;
1110 iosapic_lists[index].node = node;
1111 return;
1113 #endif
1115 static int __init iosapic_enable_kmalloc (void)
1117 iosapic_kmalloc_ok = 1;
1118 return 0;
1120 core_initcall (iosapic_enable_kmalloc);
1122 #ifdef XEN
1123 /* nop for now */
1124 void set_irq_affinity_info(unsigned int irq, int hwid, int redir) {}
1126 static int iosapic_physbase_to_id(unsigned long physbase)
1128 int i;
1129 unsigned long addr = physbase | __IA64_UNCACHED_OFFSET;
1131 for (i = 0; i < NR_IOSAPICS; i++) {
1132 if ((unsigned long)(iosapic_lists[i].addr) == addr)
1133 return i;
1136 return -1;
1139 int iosapic_guest_read(unsigned long physbase, unsigned int reg, u32 *pval)
1141 int id;
1142 unsigned long flags;
1144 if ((id = (iosapic_physbase_to_id(physbase))) < 0)
1145 return id;
1147 spin_lock_irqsave(&iosapic_lock, flags);
1148 *pval = iosapic_read(iosapic_lists[id].addr, reg);
1149 spin_unlock_irqrestore(&iosapic_lock, flags);
1151 return 0;
1154 int iosapic_guest_write(unsigned long physbase, unsigned int reg, u32 val)
1156 unsigned int id, gsi, vec, dest, high32;
1157 char rte_index;
1158 struct iosapic *ios;
1159 struct iosapic_intr_info *info;
1160 struct rte_entry rte;
1161 unsigned long flags;
1163 if ((id = (iosapic_physbase_to_id(physbase))) < 0)
1164 return -EINVAL;
1165 ios = &iosapic_lists[id];
1167 /* Only handle first half of RTE update */
1168 if ((reg < 0x10) || (reg & 1))
1169 return 0;
1171 rte.val = val;
1172 rte_index = IOSAPIC_RTEINDEX(reg);
1173 vec = rte.lo.vector;
1174 #if 0
1175 /* Take PMI/NMI/INIT/EXTINT handled by xen */
1176 if (rte.delivery_mode > IOSAPIC_LOWEST_PRIORITY) {
1177 printk("Attempt to write IOSAPIC dest mode owned by xen!\n");
1178 printk("IOSAPIC/PIN = (%d/%d), lo = 0x%x\n",
1179 id, rte_index, val);
1180 return -EINVAL;
1182 #endif
1184 /* Sanity check. Vector should be allocated before this update */
1185 if ((rte_index > ios->num_rte) ||
1186 ((vec > IA64_FIRST_DEVICE_VECTOR) &&
1187 (vec < IA64_LAST_DEVICE_VECTOR) &&
1188 (!test_bit(vec - IA64_FIRST_DEVICE_VECTOR, ia64_vector_mask))))
1189 return -EINVAL;
1191 gsi = ios->gsi_base + rte_index;
1192 info = &iosapic_intr_info[vec];
1193 spin_lock_irqsave(&irq_descp(vec)->lock, flags);
1194 spin_lock(&iosapic_lock);
1195 if (!gsi_vector_to_rte(gsi, vec)) {
1196 register_intr(gsi, vec, IOSAPIC_LOWEST_PRIORITY,
1197 rte.lo.polarity, rte.lo.trigger);
1198 } else if (vector_is_shared(vec)) {
1199 if ((info->trigger != rte.lo.trigger) ||
1200 (info->polarity != rte.lo.polarity)) {
1201 printk("WARN: can't override shared interrupt vec\n");
1202 printk("IOSAPIC/PIN = (%d/%d), ori = 0x%x, new = 0x%x\n",
1203 id, rte_index, info->low32, rte.val);
1204 spin_unlock(&iosapic_lock);
1205 spin_unlock_irqrestore(&irq_descp(vec)->lock, flags);
1206 return -EINVAL;
1209 /* If the vector is shared and already unmasked for other
1210 * interrupt sources, don't mask it.
1212 * Same check may also apply to single gsi pin, which may
1213 * be shared by devices belonging to different domain. But
1214 * let's see how to act later on demand.
1215 */
1216 if (!(info->low32 & IOSAPIC_MASK))
1217 rte.lo.mask = 0;
1220 /* time to update physical RTE */
1221 dest = cpu_physical_id(smp_processor_id());
1222 high32 = (dest << IOSAPIC_DEST_SHIFT);
1223 iosapic_write(iosapic_lists[id].addr, reg + 1, high32);
1224 iosapic_write(iosapic_lists[id].addr, reg, rte.val);
1225 info->low32 = rte.val;
1226 info->dest = dest;
1227 spin_unlock(&iosapic_lock);
1228 spin_unlock_irqrestore(&irq_descp(vec)->lock, flags);
1229 return 0;
1231 #endif /* XEN */