ia64/xen-unstable

changeset 4767:c87040213aeb

bitkeeper revision 1.1389.7.2 (42792827eIDpZENcmPicVv96afzrMQ)

io_apic.c:
Pull in code from original Linux i386 io_apic.c.
Signed-off-by: Christian Limpach <Christian.Limpach@cl.cam.ac.uk>
author cl349@firebug.cl.cam.ac.uk
date Wed May 04 19:53:11 2005 +0000 (2005-05-04)
parents 92d76230f6b2
children 3fd85cfa1932
files linux-2.6.11-xen-sparse/arch/xen/i386/kernel/io_apic.c
line diff
     1.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/io_apic.c	Wed May 04 19:28:17 2005 +0000
     1.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/io_apic.c	Wed May 04 19:53:11 2005 +0000
     1.3 @@ -48,7 +48,6 @@ int (*ioapic_renumber_irq)(int ioapic, i
     1.4  atomic_t irq_mis_count;
     1.5  
     1.6  unsigned long io_apic_irqs;
     1.7 -int skip_ioapic_setup;
     1.8  
     1.9  static DEFINE_SPINLOCK(ioapic_lock);
    1.10  
    1.11 @@ -89,24 +88,6 @@ int vector_irq[NR_VECTORS] = { [0 ... NR
    1.12  #define vector_to_irq(vector)	(vector)
    1.13  #endif
    1.14  
    1.15 -
    1.16 -#ifndef CONFIG_SMP
    1.17 -void fastcall send_IPI_self(int vector)
    1.18 -{
    1.19 -     return; 
    1.20 -}
    1.21 -#endif
    1.22 -
    1.23 -int irqbalance_disable(char *str)
    1.24 -{
    1.25 -     return 0; 
    1.26 -}
    1.27 -
    1.28 -void print_IO_APIC(void)
    1.29 -{
    1.30 -     return; 
    1.31 -}
    1.32 -
    1.33  /*
    1.34   * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
    1.35   * shared ISA-space IRQs, so we have to support them. We are super
    1.36 @@ -131,6 +112,582 @@ static void add_pin_to_irq(unsigned int 
    1.37  }
    1.38  
    1.39  /*
    1.40 + * Reroute an IRQ to a different pin.
    1.41 + */
    1.42 +static void __init replace_pin_at_irq(unsigned int irq,
    1.43 +				      int oldapic, int oldpin,
    1.44 +				      int newapic, int newpin)
    1.45 +{
    1.46 +	struct irq_pin_list *entry = irq_2_pin + irq;
    1.47 +
    1.48 +	while (1) {
    1.49 +		if (entry->apic == oldapic && entry->pin == oldpin) {
    1.50 +			entry->apic = newapic;
    1.51 +			entry->pin = newpin;
    1.52 +		}
    1.53 +		if (!entry->next)
    1.54 +			break;
    1.55 +		entry = irq_2_pin + entry->next;
    1.56 +	}
    1.57 +}
    1.58 +
    1.59 +static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable, unsigned long disable)
    1.60 +{
    1.61 +	struct irq_pin_list *entry = irq_2_pin + irq;
    1.62 +	unsigned int pin, reg;
    1.63 +
    1.64 +	for (;;) {
    1.65 +		pin = entry->pin;
    1.66 +		if (pin == -1)
    1.67 +			break;
    1.68 +		reg = io_apic_read(entry->apic, 0x10 + pin*2);
    1.69 +		reg &= ~disable;
    1.70 +		reg |= enable;
    1.71 +		io_apic_modify(entry->apic, 0x10 + pin*2, reg);
    1.72 +		if (!entry->next)
    1.73 +			break;
    1.74 +		entry = irq_2_pin + entry->next;
    1.75 +	}
    1.76 +}
    1.77 +
    1.78 +/* mask = 1 */
    1.79 +static void __mask_IO_APIC_irq (unsigned int irq)
    1.80 +{
    1.81 +	__modify_IO_APIC_irq(irq, 0x00010000, 0);
    1.82 +}
    1.83 +
    1.84 +/* mask = 0 */
    1.85 +static void __unmask_IO_APIC_irq (unsigned int irq)
    1.86 +{
    1.87 +	__modify_IO_APIC_irq(irq, 0, 0x00010000);
    1.88 +}
    1.89 +
    1.90 +/* mask = 1, trigger = 0 */
    1.91 +static void __mask_and_edge_IO_APIC_irq (unsigned int irq)
    1.92 +{
    1.93 +	__modify_IO_APIC_irq(irq, 0x00010000, 0x00008000);
    1.94 +}
    1.95 +
    1.96 +/* mask = 0, trigger = 1 */
    1.97 +static void __unmask_and_level_IO_APIC_irq (unsigned int irq)
    1.98 +{
    1.99 +	__modify_IO_APIC_irq(irq, 0x00008000, 0x00010000);
   1.100 +}
   1.101 +
   1.102 +static void mask_IO_APIC_irq (unsigned int irq)
   1.103 +{
   1.104 +	unsigned long flags;
   1.105 +
   1.106 +	spin_lock_irqsave(&ioapic_lock, flags);
   1.107 +	__mask_IO_APIC_irq(irq);
   1.108 +	spin_unlock_irqrestore(&ioapic_lock, flags);
   1.109 +}
   1.110 +
   1.111 +static void unmask_IO_APIC_irq (unsigned int irq)
   1.112 +{
   1.113 +	unsigned long flags;
   1.114 +
   1.115 +	spin_lock_irqsave(&ioapic_lock, flags);
   1.116 +	__unmask_IO_APIC_irq(irq);
   1.117 +	spin_unlock_irqrestore(&ioapic_lock, flags);
   1.118 +}
   1.119 +
   1.120 +void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
   1.121 +{
   1.122 +	struct IO_APIC_route_entry entry;
   1.123 +	unsigned long flags;
   1.124 +	
   1.125 +	/* Check delivery_mode to be sure we're not clearing an SMI pin */
   1.126 +	spin_lock_irqsave(&ioapic_lock, flags);
   1.127 +	*(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
   1.128 +	*(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
   1.129 +	spin_unlock_irqrestore(&ioapic_lock, flags);
   1.130 +	if (entry.delivery_mode == dest_SMI)
   1.131 +		return;
   1.132 +
   1.133 +	/*
   1.134 +	 * Disable it in the IO-APIC irq-routing table:
   1.135 +	 */
   1.136 +	memset(&entry, 0, sizeof(entry));
   1.137 +	entry.mask = 1;
   1.138 +	spin_lock_irqsave(&ioapic_lock, flags);
   1.139 +	io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0));
   1.140 +	io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1));
   1.141 +	spin_unlock_irqrestore(&ioapic_lock, flags);
   1.142 +}
   1.143 +
   1.144 +static void clear_IO_APIC (void)
   1.145 +{
   1.146 +	int apic, pin;
   1.147 +
   1.148 +	for (apic = 0; apic < nr_ioapics; apic++)
   1.149 +		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
   1.150 +			clear_IO_APIC_pin(apic, pin);
   1.151 +}
   1.152 +
   1.153 +static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
   1.154 +{
   1.155 +	unsigned long flags;
   1.156 +	int pin;
   1.157 +	struct irq_pin_list *entry = irq_2_pin + irq;
   1.158 +	unsigned int apicid_value;
   1.159 +	
   1.160 +	apicid_value = cpu_mask_to_apicid(cpumask);
   1.161 +	/* Prepare to do the io_apic_write */
   1.162 +	apicid_value = apicid_value << 24;
   1.163 +	spin_lock_irqsave(&ioapic_lock, flags);
   1.164 +	for (;;) {
   1.165 +		pin = entry->pin;
   1.166 +		if (pin == -1)
   1.167 +			break;
   1.168 +		io_apic_write(entry->apic, 0x10 + 1 + pin*2, apicid_value);
   1.169 +		if (!entry->next)
   1.170 +			break;
   1.171 +		entry = irq_2_pin + entry->next;
   1.172 +	}
   1.173 +	spin_unlock_irqrestore(&ioapic_lock, flags);
   1.174 +}
   1.175 +
   1.176 +#if defined(CONFIG_IRQBALANCE)
   1.177 +# include <asm/processor.h>	/* kernel_thread() */
   1.178 +# include <linux/kernel_stat.h>	/* kstat */
   1.179 +# include <linux/slab.h>		/* kmalloc() */
   1.180 +# include <linux/timer.h>	/* time_after() */
   1.181 + 
   1.182 +# ifdef CONFIG_BALANCED_IRQ_DEBUG
   1.183 +#  define TDprintk(x...) do { printk("<%ld:%s:%d>: ", jiffies, __FILE__, __LINE__); printk(x); } while (0)
   1.184 +#  define Dprintk(x...) do { TDprintk(x); } while (0)
   1.185 +# else
   1.186 +#  define TDprintk(x...) 
   1.187 +#  define Dprintk(x...) 
   1.188 +# endif
   1.189 +
   1.190 +cpumask_t __cacheline_aligned pending_irq_balance_cpumask[NR_IRQS];
   1.191 +
   1.192 +#define IRQBALANCE_CHECK_ARCH -999
   1.193 +static int irqbalance_disabled = IRQBALANCE_CHECK_ARCH;
   1.194 +static int physical_balance = 0;
   1.195 +
   1.196 +struct irq_cpu_info {
   1.197 +	unsigned long * last_irq;
   1.198 +	unsigned long * irq_delta;
   1.199 +	unsigned long irq;
   1.200 +} irq_cpu_data[NR_CPUS];
   1.201 +
   1.202 +#define CPU_IRQ(cpu)		(irq_cpu_data[cpu].irq)
   1.203 +#define LAST_CPU_IRQ(cpu,irq)   (irq_cpu_data[cpu].last_irq[irq])
   1.204 +#define IRQ_DELTA(cpu,irq) 	(irq_cpu_data[cpu].irq_delta[irq])
   1.205 +
   1.206 +#define IDLE_ENOUGH(cpu,now) \
   1.207 +		(idle_cpu(cpu) && ((now) - irq_stat[(cpu)].idle_timestamp > 1))
   1.208 +
   1.209 +#define IRQ_ALLOWED(cpu, allowed_mask)	cpu_isset(cpu, allowed_mask)
   1.210 +
   1.211 +#define CPU_TO_PACKAGEINDEX(i) (first_cpu(cpu_sibling_map[i]))
   1.212 +
   1.213 +#define MAX_BALANCED_IRQ_INTERVAL	(5*HZ)
   1.214 +#define MIN_BALANCED_IRQ_INTERVAL	(HZ/2)
   1.215 +#define BALANCED_IRQ_MORE_DELTA		(HZ/10)
   1.216 +#define BALANCED_IRQ_LESS_DELTA		(HZ)
   1.217 +
   1.218 +long balanced_irq_interval = MAX_BALANCED_IRQ_INTERVAL;
   1.219 +
   1.220 +static unsigned long move(int curr_cpu, cpumask_t allowed_mask,
   1.221 +			unsigned long now, int direction)
   1.222 +{
   1.223 +	int search_idle = 1;
   1.224 +	int cpu = curr_cpu;
   1.225 +
   1.226 +	goto inside;
   1.227 +
   1.228 +	do {
   1.229 +		if (unlikely(cpu == curr_cpu))
   1.230 +			search_idle = 0;
   1.231 +inside:
   1.232 +		if (direction == 1) {
   1.233 +			cpu++;
   1.234 +			if (cpu >= NR_CPUS)
   1.235 +				cpu = 0;
   1.236 +		} else {
   1.237 +			cpu--;
   1.238 +			if (cpu == -1)
   1.239 +				cpu = NR_CPUS-1;
   1.240 +		}
   1.241 +	} while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu,allowed_mask) ||
   1.242 +			(search_idle && !IDLE_ENOUGH(cpu,now)));
   1.243 +
   1.244 +	return cpu;
   1.245 +}
   1.246 +
   1.247 +static inline void balance_irq(int cpu, int irq)
   1.248 +{
   1.249 +	unsigned long now = jiffies;
   1.250 +	cpumask_t allowed_mask;
   1.251 +	unsigned int new_cpu;
   1.252 +		
   1.253 +	if (irqbalance_disabled)
   1.254 +		return; 
   1.255 +
   1.256 +	cpus_and(allowed_mask, cpu_online_map, irq_affinity[irq]);
   1.257 +	new_cpu = move(cpu, allowed_mask, now, 1);
   1.258 +	if (cpu != new_cpu) {
   1.259 +		irq_desc_t *desc = irq_desc + irq;
   1.260 +		unsigned long flags;
   1.261 +
   1.262 +		spin_lock_irqsave(&desc->lock, flags);
   1.263 +		pending_irq_balance_cpumask[irq] = cpumask_of_cpu(new_cpu);
   1.264 +		spin_unlock_irqrestore(&desc->lock, flags);
   1.265 +	}
   1.266 +}
   1.267 +
   1.268 +static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
   1.269 +{
   1.270 +	int i, j;
   1.271 +	Dprintk("Rotating IRQs among CPUs.\n");
   1.272 +	for (i = 0; i < NR_CPUS; i++) {
   1.273 +		for (j = 0; cpu_online(i) && (j < NR_IRQS); j++) {
   1.274 +			if (!irq_desc[j].action)
   1.275 +				continue;
   1.276 +			/* Is it a significant load ?  */
   1.277 +			if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i),j) <
   1.278 +						useful_load_threshold)
   1.279 +				continue;
   1.280 +			balance_irq(i, j);
   1.281 +		}
   1.282 +	}
   1.283 +	balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
   1.284 +		balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);	
   1.285 +	return;
   1.286 +}
   1.287 +
   1.288 +static void do_irq_balance(void)
   1.289 +{
   1.290 +	int i, j;
   1.291 +	unsigned long max_cpu_irq = 0, min_cpu_irq = (~0);
   1.292 +	unsigned long move_this_load = 0;
   1.293 +	int max_loaded = 0, min_loaded = 0;
   1.294 +	int load;
   1.295 +	unsigned long useful_load_threshold = balanced_irq_interval + 10;
   1.296 +	int selected_irq;
   1.297 +	int tmp_loaded, first_attempt = 1;
   1.298 +	unsigned long tmp_cpu_irq;
   1.299 +	unsigned long imbalance = 0;
   1.300 +	cpumask_t allowed_mask, target_cpu_mask, tmp;
   1.301 +
   1.302 +	for (i = 0; i < NR_CPUS; i++) {
   1.303 +		int package_index;
   1.304 +		CPU_IRQ(i) = 0;
   1.305 +		if (!cpu_online(i))
   1.306 +			continue;
   1.307 +		package_index = CPU_TO_PACKAGEINDEX(i);
   1.308 +		for (j = 0; j < NR_IRQS; j++) {
   1.309 +			unsigned long value_now, delta;
   1.310 +			/* Is this an active IRQ? */
   1.311 +			if (!irq_desc[j].action)
   1.312 +				continue;
   1.313 +			if ( package_index == i )
   1.314 +				IRQ_DELTA(package_index,j) = 0;
   1.315 +			/* Determine the total count per processor per IRQ */
   1.316 +			value_now = (unsigned long) kstat_cpu(i).irqs[j];
   1.317 +
   1.318 +			/* Determine the activity per processor per IRQ */
   1.319 +			delta = value_now - LAST_CPU_IRQ(i,j);
   1.320 +
   1.321 +			/* Update last_cpu_irq[][] for the next time */
   1.322 +			LAST_CPU_IRQ(i,j) = value_now;
   1.323 +
   1.324 +			/* Ignore IRQs whose rate is less than the clock */
   1.325 +			if (delta < useful_load_threshold)
   1.326 +				continue;
   1.327 +			/* update the load for the processor or package total */
   1.328 +			IRQ_DELTA(package_index,j) += delta;
   1.329 +
   1.330 +			/* Keep track of the higher numbered sibling as well */
   1.331 +			if (i != package_index)
   1.332 +				CPU_IRQ(i) += delta;
   1.333 +			/*
   1.334 +			 * We have sibling A and sibling B in the package
   1.335 +			 *
   1.336 +			 * cpu_irq[A] = load for cpu A + load for cpu B
   1.337 +			 * cpu_irq[B] = load for cpu B
   1.338 +			 */
   1.339 +			CPU_IRQ(package_index) += delta;
   1.340 +		}
   1.341 +	}
   1.342 +	/* Find the least loaded processor package */
   1.343 +	for (i = 0; i < NR_CPUS; i++) {
   1.344 +		if (!cpu_online(i))
   1.345 +			continue;
   1.346 +		if (i != CPU_TO_PACKAGEINDEX(i))
   1.347 +			continue;
   1.348 +		if (min_cpu_irq > CPU_IRQ(i)) {
   1.349 +			min_cpu_irq = CPU_IRQ(i);
   1.350 +			min_loaded = i;
   1.351 +		}
   1.352 +	}
   1.353 +	max_cpu_irq = ULONG_MAX;
   1.354 +
   1.355 +tryanothercpu:
   1.356 +	/* Look for heaviest loaded processor.
   1.357 +	 * We may come back to get the next heaviest loaded processor.
   1.358 +	 * Skip processors with trivial loads.
   1.359 +	 */
   1.360 +	tmp_cpu_irq = 0;
   1.361 +	tmp_loaded = -1;
   1.362 +	for (i = 0; i < NR_CPUS; i++) {
   1.363 +		if (!cpu_online(i))
   1.364 +			continue;
   1.365 +		if (i != CPU_TO_PACKAGEINDEX(i))
   1.366 +			continue;
   1.367 +		if (max_cpu_irq <= CPU_IRQ(i)) 
   1.368 +			continue;
   1.369 +		if (tmp_cpu_irq < CPU_IRQ(i)) {
   1.370 +			tmp_cpu_irq = CPU_IRQ(i);
   1.371 +			tmp_loaded = i;
   1.372 +		}
   1.373 +	}
   1.374 +
   1.375 +	if (tmp_loaded == -1) {
   1.376 + 	 /* In the case of small number of heavy interrupt sources, 
   1.377 +	  * loading some of the cpus too much. We use Ingo's original 
   1.378 +	  * approach to rotate them around.
   1.379 +	  */
   1.380 +		if (!first_attempt && imbalance >= useful_load_threshold) {
   1.381 +			rotate_irqs_among_cpus(useful_load_threshold);
   1.382 +			return;
   1.383 +		}
   1.384 +		goto not_worth_the_effort;
   1.385 +	}
   1.386 +	
   1.387 +	first_attempt = 0;		/* heaviest search */
   1.388 +	max_cpu_irq = tmp_cpu_irq;	/* load */
   1.389 +	max_loaded = tmp_loaded;	/* processor */
   1.390 +	imbalance = (max_cpu_irq - min_cpu_irq) / 2;
   1.391 +	
   1.392 +	Dprintk("max_loaded cpu = %d\n", max_loaded);
   1.393 +	Dprintk("min_loaded cpu = %d\n", min_loaded);
   1.394 +	Dprintk("max_cpu_irq load = %ld\n", max_cpu_irq);
   1.395 +	Dprintk("min_cpu_irq load = %ld\n", min_cpu_irq);
   1.396 +	Dprintk("load imbalance = %lu\n", imbalance);
   1.397 +
   1.398 +	/* if imbalance is less than approx 10% of max load, then
   1.399 +	 * observe diminishing returns action. - quit
   1.400 +	 */
   1.401 +	if (imbalance < (max_cpu_irq >> 3)) {
   1.402 +		Dprintk("Imbalance too trivial\n");
   1.403 +		goto not_worth_the_effort;
   1.404 +	}
   1.405 +
   1.406 +tryanotherirq:
   1.407 +	/* if we select an IRQ to move that can't go where we want, then
   1.408 +	 * see if there is another one to try.
   1.409 +	 */
   1.410 +	move_this_load = 0;
   1.411 +	selected_irq = -1;
   1.412 +	for (j = 0; j < NR_IRQS; j++) {
   1.413 +		/* Is this an active IRQ? */
   1.414 +		if (!irq_desc[j].action)
   1.415 +			continue;
   1.416 +		if (imbalance <= IRQ_DELTA(max_loaded,j))
   1.417 +			continue;
   1.418 +		/* Try to find the IRQ that is closest to the imbalance
   1.419 +		 * without going over.
   1.420 +		 */
   1.421 +		if (move_this_load < IRQ_DELTA(max_loaded,j)) {
   1.422 +			move_this_load = IRQ_DELTA(max_loaded,j);
   1.423 +			selected_irq = j;
   1.424 +		}
   1.425 +	}
   1.426 +	if (selected_irq == -1) {
   1.427 +		goto tryanothercpu;
   1.428 +	}
   1.429 +
   1.430 +	imbalance = move_this_load;
   1.431 +	
   1.432 +	/* For physical_balance case, we accumlated both load
   1.433 +	 * values in the one of the siblings cpu_irq[],
   1.434 +	 * to use the same code for physical and logical processors
   1.435 +	 * as much as possible. 
   1.436 +	 *
   1.437 +	 * NOTE: the cpu_irq[] array holds the sum of the load for
   1.438 +	 * sibling A and sibling B in the slot for the lowest numbered
   1.439 +	 * sibling (A), _AND_ the load for sibling B in the slot for
   1.440 +	 * the higher numbered sibling.
   1.441 +	 *
   1.442 +	 * We seek the least loaded sibling by making the comparison
   1.443 +	 * (A+B)/2 vs B
   1.444 +	 */
   1.445 +	load = CPU_IRQ(min_loaded) >> 1;
   1.446 +	for_each_cpu_mask(j, cpu_sibling_map[min_loaded]) {
   1.447 +		if (load > CPU_IRQ(j)) {
   1.448 +			/* This won't change cpu_sibling_map[min_loaded] */
   1.449 +			load = CPU_IRQ(j);
   1.450 +			min_loaded = j;
   1.451 +		}
   1.452 +	}
   1.453 +
   1.454 +	cpus_and(allowed_mask, cpu_online_map, irq_affinity[selected_irq]);
   1.455 +	target_cpu_mask = cpumask_of_cpu(min_loaded);
   1.456 +	cpus_and(tmp, target_cpu_mask, allowed_mask);
   1.457 +
   1.458 +	if (!cpus_empty(tmp)) {
   1.459 +		irq_desc_t *desc = irq_desc + selected_irq;
   1.460 +		unsigned long flags;
   1.461 +
   1.462 +		Dprintk("irq = %d moved to cpu = %d\n",
   1.463 +				selected_irq, min_loaded);
   1.464 +		/* mark for change destination */
   1.465 +		spin_lock_irqsave(&desc->lock, flags);
   1.466 +		pending_irq_balance_cpumask[selected_irq] =
   1.467 +					cpumask_of_cpu(min_loaded);
   1.468 +		spin_unlock_irqrestore(&desc->lock, flags);
   1.469 +		/* Since we made a change, come back sooner to 
   1.470 +		 * check for more variation.
   1.471 +		 */
   1.472 +		balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
   1.473 +			balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);	
   1.474 +		return;
   1.475 +	}
   1.476 +	goto tryanotherirq;
   1.477 +
   1.478 +not_worth_the_effort:
   1.479 +	/*
   1.480 +	 * if we did not find an IRQ to move, then adjust the time interval
   1.481 +	 * upward
   1.482 +	 */
   1.483 +	balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL,
   1.484 +		balanced_irq_interval + BALANCED_IRQ_MORE_DELTA);	
   1.485 +	Dprintk("IRQ worth rotating not found\n");
   1.486 +	return;
   1.487 +}
   1.488 +
   1.489 +static int balanced_irq(void *unused)
   1.490 +{
   1.491 +	int i;
   1.492 +	unsigned long prev_balance_time = jiffies;
   1.493 +	long time_remaining = balanced_irq_interval;
   1.494 +
   1.495 +	daemonize("kirqd");
   1.496 +	
   1.497 +	/* push everything to CPU 0 to give us a starting point.  */
   1.498 +	for (i = 0 ; i < NR_IRQS ; i++) {
   1.499 +		pending_irq_balance_cpumask[i] = cpumask_of_cpu(0);
   1.500 +	}
   1.501 +
   1.502 +	for ( ; ; ) {
   1.503 +		set_current_state(TASK_INTERRUPTIBLE);
   1.504 +		time_remaining = schedule_timeout(time_remaining);
   1.505 +		try_to_freeze(PF_FREEZE);
   1.506 +		if (time_after(jiffies,
   1.507 +				prev_balance_time+balanced_irq_interval)) {
   1.508 +			do_irq_balance();
   1.509 +			prev_balance_time = jiffies;
   1.510 +			time_remaining = balanced_irq_interval;
   1.511 +		}
   1.512 +	}
   1.513 +	return 0;
   1.514 +}
   1.515 +
   1.516 +static int __init balanced_irq_init(void)
   1.517 +{
   1.518 +	int i;
   1.519 +	struct cpuinfo_x86 *c;
   1.520 +	cpumask_t tmp;
   1.521 +
   1.522 +	cpus_shift_right(tmp, cpu_online_map, 2);
   1.523 +        c = &boot_cpu_data;
   1.524 +	/* When not overwritten by the command line ask subarchitecture. */
   1.525 +	if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH)
   1.526 +		irqbalance_disabled = NO_BALANCE_IRQ;
   1.527 +	if (irqbalance_disabled)
   1.528 +		return 0;
   1.529 +	
   1.530 +	 /* disable irqbalance completely if there is only one processor online */
   1.531 +	if (num_online_cpus() < 2) {
   1.532 +		irqbalance_disabled = 1;
   1.533 +		return 0;
   1.534 +	}
   1.535 +	/*
   1.536 +	 * Enable physical balance only if more than 1 physical processor
   1.537 +	 * is present
   1.538 +	 */
   1.539 +	if (smp_num_siblings > 1 && !cpus_empty(tmp))
   1.540 +		physical_balance = 1;
   1.541 +
   1.542 +	for (i = 0; i < NR_CPUS; i++) {
   1.543 +		if (!cpu_online(i))
   1.544 +			continue;
   1.545 +		irq_cpu_data[i].irq_delta = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
   1.546 +		irq_cpu_data[i].last_irq = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
   1.547 +		if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) {
   1.548 +			printk(KERN_ERR "balanced_irq_init: out of memory");
   1.549 +			goto failed;
   1.550 +		}
   1.551 +		memset(irq_cpu_data[i].irq_delta,0,sizeof(unsigned long) * NR_IRQS);
   1.552 +		memset(irq_cpu_data[i].last_irq,0,sizeof(unsigned long) * NR_IRQS);
   1.553 +	}
   1.554 +	
   1.555 +	printk(KERN_INFO "Starting balanced_irq\n");
   1.556 +	if (kernel_thread(balanced_irq, NULL, CLONE_KERNEL) >= 0) 
   1.557 +		return 0;
   1.558 +	else 
   1.559 +		printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
   1.560 +failed:
   1.561 +	for (i = 0; i < NR_CPUS; i++) {
   1.562 +		if(irq_cpu_data[i].irq_delta)
   1.563 +			kfree(irq_cpu_data[i].irq_delta);
   1.564 +		if(irq_cpu_data[i].last_irq)
   1.565 +			kfree(irq_cpu_data[i].last_irq);
   1.566 +	}
   1.567 +	return 0;
   1.568 +}
   1.569 +
   1.570 +int __init irqbalance_disable(char *str)
   1.571 +{
   1.572 +	irqbalance_disabled = 1;
   1.573 +	return 0;
   1.574 +}
   1.575 +
   1.576 +__setup("noirqbalance", irqbalance_disable);
   1.577 +
   1.578 +static inline void move_irq(int irq)
   1.579 +{
   1.580 +	/* note - we hold the desc->lock */
   1.581 +	if (unlikely(!cpus_empty(pending_irq_balance_cpumask[irq]))) {
   1.582 +		set_ioapic_affinity_irq(irq, pending_irq_balance_cpumask[irq]);
   1.583 +		cpus_clear(pending_irq_balance_cpumask[irq]);
   1.584 +	}
   1.585 +}
   1.586 +
   1.587 +late_initcall(balanced_irq_init);
   1.588 +
   1.589 +#else /* !CONFIG_IRQBALANCE */
   1.590 +static inline void move_irq(int irq) { }
   1.591 +#endif /* CONFIG_IRQBALANCE */
   1.592 +
   1.593 +#ifndef CONFIG_SMP
   1.594 +void fastcall send_IPI_self(int vector)
   1.595 +{
   1.596 +#if 1
   1.597 +	return;
   1.598 +#else
   1.599 +	unsigned int cfg;
   1.600 +
   1.601 +	/*
   1.602 +	 * Wait for idle.
   1.603 +	 */
   1.604 +	apic_wait_icr_idle();
   1.605 +	cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL;
   1.606 +	/*
   1.607 +	 * Send the IPI. The write to APIC_ICR fires this off.
   1.608 +	 */
   1.609 +	apic_write_around(APIC_ICR, cfg);
   1.610 +#endif
   1.611 +}
   1.612 +#endif /* !CONFIG_SMP */
   1.613 +
   1.614 +
   1.615 +/*
   1.616   * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
   1.617   * specific CPU-side IRQs.
   1.618   */
   1.619 @@ -138,6 +695,86 @@ static void add_pin_to_irq(unsigned int 
   1.620  #define MAX_PIRQS 8
   1.621  int pirq_entries [MAX_PIRQS];
   1.622  int pirqs_enabled;
   1.623 +int skip_ioapic_setup;
   1.624 +
   1.625 +static int __init ioapic_setup(char *str)
   1.626 +{
   1.627 +	skip_ioapic_setup = 1;
   1.628 +	return 1;
   1.629 +}
   1.630 +
   1.631 +__setup("noapic", ioapic_setup);
   1.632 +
   1.633 +static int __init ioapic_pirq_setup(char *str)
   1.634 +{
   1.635 +	int i, max;
   1.636 +	int ints[MAX_PIRQS+1];
   1.637 +
   1.638 +	get_options(str, ARRAY_SIZE(ints), ints);
   1.639 +
   1.640 +	for (i = 0; i < MAX_PIRQS; i++)
   1.641 +		pirq_entries[i] = -1;
   1.642 +
   1.643 +	pirqs_enabled = 1;
   1.644 +	apic_printk(APIC_VERBOSE, KERN_INFO
   1.645 +			"PIRQ redirection, working around broken MP-BIOS.\n");
   1.646 +	max = MAX_PIRQS;
   1.647 +	if (ints[0] < MAX_PIRQS)
   1.648 +		max = ints[0];
   1.649 +
   1.650 +	for (i = 0; i < max; i++) {
   1.651 +		apic_printk(APIC_VERBOSE, KERN_DEBUG
   1.652 +				"... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
   1.653 +		/*
   1.654 +		 * PIRQs are mapped upside down, usually.
   1.655 +		 */
   1.656 +		pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
   1.657 +	}
   1.658 +	return 1;
   1.659 +}
   1.660 +
   1.661 +__setup("pirq=", ioapic_pirq_setup);
   1.662 +
   1.663 +/*
   1.664 + * Find the IRQ entry number of a certain pin.
   1.665 + */
   1.666 +static int find_irq_entry(int apic, int pin, int type)
   1.667 +{
   1.668 +	int i;
   1.669 +
   1.670 +	for (i = 0; i < mp_irq_entries; i++)
   1.671 +		if (mp_irqs[i].mpc_irqtype == type &&
   1.672 +		    (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid ||
   1.673 +		     mp_irqs[i].mpc_dstapic == MP_APIC_ALL) &&
   1.674 +		    mp_irqs[i].mpc_dstirq == pin)
   1.675 +			return i;
   1.676 +
   1.677 +	return -1;
   1.678 +}
   1.679 +
   1.680 +/*
   1.681 + * Find the pin to which IRQ[irq] (ISA) is connected
   1.682 + */
   1.683 +static int find_isa_irq_pin(int irq, int type)
   1.684 +{
   1.685 +	int i;
   1.686 +
   1.687 +	for (i = 0; i < mp_irq_entries; i++) {
   1.688 +		int lbus = mp_irqs[i].mpc_srcbus;
   1.689 +
   1.690 +		if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
   1.691 +		     mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
   1.692 +		     mp_bus_id_to_type[lbus] == MP_BUS_MCA ||
   1.693 +		     mp_bus_id_to_type[lbus] == MP_BUS_NEC98
   1.694 +		    ) &&
   1.695 +		    (mp_irqs[i].mpc_irqtype == type) &&
   1.696 +		    (mp_irqs[i].mpc_srcbusirq == irq))
   1.697 +
   1.698 +			return mp_irqs[i].mpc_dstirq;
   1.699 +	}
   1.700 +	return -1;
   1.701 +}
   1.702 +
   1.703  /*
   1.704   * Find a specific PCI IRQ entry.
   1.705   * Not an __init, possibly needed by modules
   1.706 @@ -184,6 +821,234 @@ int IO_APIC_get_PCI_irq_vector(int bus, 
   1.707  	return best_guess;
   1.708  }
   1.709  
   1.710 +/*
   1.711 + * This function currently is only a helper for the i386 smp boot process where 
   1.712 + * we need to reprogram the ioredtbls to cater for the cpus which have come online
   1.713 + * so mask in all cases should simply be TARGET_CPUS
   1.714 + */
   1.715 +void __init setup_ioapic_dest(void)
   1.716 +{
   1.717 +	int pin, ioapic, irq, irq_entry;
   1.718 +
   1.719 +	if (skip_ioapic_setup == 1)
   1.720 +		return;
   1.721 +
   1.722 +	for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
   1.723 +		for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
   1.724 +			irq_entry = find_irq_entry(ioapic, pin, mp_INT);
   1.725 +			if (irq_entry == -1)
   1.726 +				continue;
   1.727 +			irq = pin_2_irq(irq_entry, ioapic, pin);
   1.728 +			set_ioapic_affinity_irq(irq, TARGET_CPUS);
   1.729 +		}
   1.730 +
   1.731 +	}
   1.732 +}
   1.733 +
   1.734 +/*
   1.735 + * EISA Edge/Level control register, ELCR
   1.736 + */
   1.737 +static int EISA_ELCR(unsigned int irq)
   1.738 +{
   1.739 +	if (irq < 16) {
   1.740 +		unsigned int port = 0x4d0 + (irq >> 3);
   1.741 +		return (inb(port) >> (irq & 7)) & 1;
   1.742 +	}
   1.743 +	apic_printk(APIC_VERBOSE, KERN_INFO
   1.744 +			"Broken MPtable reports ISA irq %d\n", irq);
   1.745 +	return 0;
   1.746 +}
   1.747 +
   1.748 +/* EISA interrupts are always polarity zero and can be edge or level
   1.749 + * trigger depending on the ELCR value.  If an interrupt is listed as
   1.750 + * EISA conforming in the MP table, that means its trigger type must
   1.751 + * be read in from the ELCR */
   1.752 +
   1.753 +#define default_EISA_trigger(idx)	(EISA_ELCR(mp_irqs[idx].mpc_srcbusirq))
   1.754 +#define default_EISA_polarity(idx)	(0)
   1.755 +
   1.756 +/* ISA interrupts are always polarity zero edge triggered,
   1.757 + * when listed as conforming in the MP table. */
   1.758 +
   1.759 +#define default_ISA_trigger(idx)	(0)
   1.760 +#define default_ISA_polarity(idx)	(0)
   1.761 +
   1.762 +/* PCI interrupts are always polarity one level triggered,
   1.763 + * when listed as conforming in the MP table. */
   1.764 +
   1.765 +#define default_PCI_trigger(idx)	(1)
   1.766 +#define default_PCI_polarity(idx)	(1)
   1.767 +
   1.768 +/* MCA interrupts are always polarity zero level triggered,
   1.769 + * when listed as conforming in the MP table. */
   1.770 +
   1.771 +#define default_MCA_trigger(idx)	(1)
   1.772 +#define default_MCA_polarity(idx)	(0)
   1.773 +
   1.774 +/* NEC98 interrupts are always polarity zero edge triggered,
   1.775 + * when listed as conforming in the MP table. */
   1.776 +
   1.777 +#define default_NEC98_trigger(idx)     (0)
   1.778 +#define default_NEC98_polarity(idx)    (0)
   1.779 +
   1.780 +static int __init MPBIOS_polarity(int idx)
   1.781 +{
   1.782 +	int bus = mp_irqs[idx].mpc_srcbus;
   1.783 +	int polarity;
   1.784 +
   1.785 +	/*
   1.786 +	 * Determine IRQ line polarity (high active or low active):
   1.787 +	 */
   1.788 +	switch (mp_irqs[idx].mpc_irqflag & 3)
   1.789 +	{
   1.790 +		case 0: /* conforms, ie. bus-type dependent polarity */
   1.791 +		{
   1.792 +			switch (mp_bus_id_to_type[bus])
   1.793 +			{
   1.794 +				case MP_BUS_ISA: /* ISA pin */
   1.795 +				{
   1.796 +					polarity = default_ISA_polarity(idx);
   1.797 +					break;
   1.798 +				}
   1.799 +				case MP_BUS_EISA: /* EISA pin */
   1.800 +				{
   1.801 +					polarity = default_EISA_polarity(idx);
   1.802 +					break;
   1.803 +				}
   1.804 +				case MP_BUS_PCI: /* PCI pin */
   1.805 +				{
   1.806 +					polarity = default_PCI_polarity(idx);
   1.807 +					break;
   1.808 +				}
   1.809 +				case MP_BUS_MCA: /* MCA pin */
   1.810 +				{
   1.811 +					polarity = default_MCA_polarity(idx);
   1.812 +					break;
   1.813 +				}
   1.814 +				case MP_BUS_NEC98: /* NEC 98 pin */
   1.815 +				{
   1.816 +					polarity = default_NEC98_polarity(idx);
   1.817 +					break;
   1.818 +				}
   1.819 +				default:
   1.820 +				{
   1.821 +					printk(KERN_WARNING "broken BIOS!!\n");
   1.822 +					polarity = 1;
   1.823 +					break;
   1.824 +				}
   1.825 +			}
   1.826 +			break;
   1.827 +		}
   1.828 +		case 1: /* high active */
   1.829 +		{
   1.830 +			polarity = 0;
   1.831 +			break;
   1.832 +		}
   1.833 +		case 2: /* reserved */
   1.834 +		{
   1.835 +			printk(KERN_WARNING "broken BIOS!!\n");
   1.836 +			polarity = 1;
   1.837 +			break;
   1.838 +		}
   1.839 +		case 3: /* low active */
   1.840 +		{
   1.841 +			polarity = 1;
   1.842 +			break;
   1.843 +		}
   1.844 +		default: /* invalid */
   1.845 +		{
   1.846 +			printk(KERN_WARNING "broken BIOS!!\n");
   1.847 +			polarity = 1;
   1.848 +			break;
   1.849 +		}
   1.850 +	}
   1.851 +	return polarity;
   1.852 +}
   1.853 +
   1.854 +static int MPBIOS_trigger(int idx)
   1.855 +{
   1.856 +	int bus = mp_irqs[idx].mpc_srcbus;
   1.857 +	int trigger;
   1.858 +
   1.859 +	/*
   1.860 +	 * Determine IRQ trigger mode (edge or level sensitive):
   1.861 +	 */
   1.862 +	switch ((mp_irqs[idx].mpc_irqflag>>2) & 3)
   1.863 +	{
   1.864 +		case 0: /* conforms, ie. bus-type dependent */
   1.865 +		{
   1.866 +			switch (mp_bus_id_to_type[bus])
   1.867 +			{
   1.868 +				case MP_BUS_ISA: /* ISA pin */
   1.869 +				{
   1.870 +					trigger = default_ISA_trigger(idx);
   1.871 +					break;
   1.872 +				}
   1.873 +				case MP_BUS_EISA: /* EISA pin */
   1.874 +				{
   1.875 +					trigger = default_EISA_trigger(idx);
   1.876 +					break;
   1.877 +				}
   1.878 +				case MP_BUS_PCI: /* PCI pin */
   1.879 +				{
   1.880 +					trigger = default_PCI_trigger(idx);
   1.881 +					break;
   1.882 +				}
   1.883 +				case MP_BUS_MCA: /* MCA pin */
   1.884 +				{
   1.885 +					trigger = default_MCA_trigger(idx);
   1.886 +					break;
   1.887 +				}
   1.888 +				case MP_BUS_NEC98: /* NEC 98 pin */
   1.889 +				{
   1.890 +					trigger = default_NEC98_trigger(idx);
   1.891 +					break;
   1.892 +				}
   1.893 +				default:
   1.894 +				{
   1.895 +					printk(KERN_WARNING "broken BIOS!!\n");
   1.896 +					trigger = 1;
   1.897 +					break;
   1.898 +				}
   1.899 +			}
   1.900 +			break;
   1.901 +		}
   1.902 +		case 1: /* edge */
   1.903 +		{
   1.904 +			trigger = 0;
   1.905 +			break;
   1.906 +		}
   1.907 +		case 2: /* reserved */
   1.908 +		{
   1.909 +			printk(KERN_WARNING "broken BIOS!!\n");
   1.910 +			trigger = 1;
   1.911 +			break;
   1.912 +		}
   1.913 +		case 3: /* level */
   1.914 +		{
   1.915 +			trigger = 1;
   1.916 +			break;
   1.917 +		}
   1.918 +		default: /* invalid */
   1.919 +		{
   1.920 +			printk(KERN_WARNING "broken BIOS!!\n");
   1.921 +			trigger = 0;
   1.922 +			break;
   1.923 +		}
   1.924 +	}
   1.925 +	return trigger;
   1.926 +}
   1.927 +
   1.928 +static inline int irq_polarity(int idx)
   1.929 +{
   1.930 +	return MPBIOS_polarity(idx);
   1.931 +}
   1.932 +
   1.933 +static inline int irq_trigger(int idx)
   1.934 +{
   1.935 +	return MPBIOS_trigger(idx);
   1.936 +}
   1.937 +
   1.938  static int pin_2_irq(int idx, int apic, int pin)
   1.939  {
   1.940  	int irq, i;
   1.941 @@ -250,26 +1115,41 @@ static int pin_2_irq(int idx, int apic, 
   1.942  	return irq;
   1.943  }
   1.944  
   1.945 +static inline int IO_APIC_irq_trigger(int irq)
   1.946 +{
   1.947 +	int apic, idx, pin;
   1.948 +
   1.949 +	for (apic = 0; apic < nr_ioapics; apic++) {
   1.950 +		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
   1.951 +			idx = find_irq_entry(apic,pin,mp_INT);
   1.952 +			if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin)))
   1.953 +				return irq_trigger(idx);
   1.954 +		}
   1.955 +	}
   1.956 +	/*
   1.957 +	 * nonexistent IRQs are edge default
   1.958 +	 */
   1.959 +	return 0;
   1.960 +}
   1.961 +
   1.962  /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
   1.963  u8 irq_vector[NR_IRQ_VECTORS] = { FIRST_DEVICE_VECTOR , 0 };
   1.964  
   1.965  int assign_irq_vector(int irq)
   1.966  {
   1.967  	static int current_vector = FIRST_DEVICE_VECTOR;
   1.968 -        physdev_op_t op;
   1.969 -        int ret;
   1.970 +	physdev_op_t op;
   1.971  
   1.972  	BUG_ON(irq >= NR_IRQ_VECTORS);
   1.973  	if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0)
   1.974  		return IO_APIC_VECTOR(irq);
   1.975  
   1.976 -        op.cmd = PHYSDEVOP_ASSIGN_VECTOR;
   1.977 -        op.u.irq_op.irq = irq;
   1.978 -        ret = HYPERVISOR_physdev_op(&op);
   1.979 -        if (ret)
   1.980 -            return -ENOSPC;
   1.981 +	op.cmd = PHYSDEVOP_ASSIGN_VECTOR;
   1.982 +	op.u.irq_op.irq = irq;
   1.983 +	if (HYPERVISOR_physdev_op(&op))
   1.984 +		return -ENOSPC;
   1.985 +	current_vector = op.u.irq_op.vector;
   1.986  
   1.987 -        current_vector = op.u.irq_op.vector;
   1.988  	vector_irq[current_vector] = irq;
   1.989  	if (irq != AUTO_ASSIGN)
   1.990  		IO_APIC_VECTOR(irq) = current_vector;
   1.991 @@ -277,12 +1157,1271 @@ int assign_irq_vector(int irq)
   1.992  	return current_vector;
   1.993  }
   1.994  
   1.995 +static struct hw_interrupt_type ioapic_level_type;
   1.996 +static struct hw_interrupt_type ioapic_edge_type;
   1.997 +
   1.998 +#define IOAPIC_AUTO	-1
   1.999 +#define IOAPIC_EDGE	0
  1.1000 +#define IOAPIC_LEVEL	1
  1.1001 +
  1.1002 +static inline void ioapic_register_intr(int irq, int vector, unsigned long trigger)
  1.1003 +{
  1.1004 +	if (use_pci_vector() && !platform_legacy_irq(irq)) {
  1.1005 +		if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
  1.1006 +				trigger == IOAPIC_LEVEL)
  1.1007 +			irq_desc[vector].handler = &ioapic_level_type;
  1.1008 +		else
  1.1009 +			irq_desc[vector].handler = &ioapic_edge_type;
  1.1010 +#if 0
  1.1011 +		set_intr_gate(vector, interrupt[vector]);
  1.1012 +#endif
  1.1013 +	} else	{
  1.1014 +		if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
  1.1015 +				trigger == IOAPIC_LEVEL)
  1.1016 +			irq_desc[irq].handler = &ioapic_level_type;
  1.1017 +		else
  1.1018 +			irq_desc[irq].handler = &ioapic_edge_type;
  1.1019 +#if 0
  1.1020 +		set_intr_gate(vector, interrupt[irq]);
  1.1021 +#endif
  1.1022 +	}
  1.1023 +}
  1.1024 +
  1.1025 +void __init setup_IO_APIC_irqs(void)
  1.1026 +{
  1.1027 +	struct IO_APIC_route_entry entry;
  1.1028 +	int apic, pin, idx, irq, first_notcon = 1, vector;
  1.1029 +	unsigned long flags;
  1.1030 +
  1.1031 +	apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
  1.1032 +
  1.1033 +	for (apic = 0; apic < nr_ioapics; apic++) {
  1.1034 +	for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
  1.1035 +
  1.1036 +		/*
  1.1037 +		 * add it to the IO-APIC irq-routing table:
  1.1038 +		 */
  1.1039 +		memset(&entry,0,sizeof(entry));
  1.1040 +
  1.1041 +		entry.delivery_mode = INT_DELIVERY_MODE;
  1.1042 +		entry.dest_mode = INT_DEST_MODE;
  1.1043 +		entry.mask = 0;				/* enable IRQ */
  1.1044 +		entry.dest.logical.logical_dest = 
  1.1045 +					cpu_mask_to_apicid(TARGET_CPUS);
  1.1046 +
  1.1047 +		idx = find_irq_entry(apic,pin,mp_INT);
  1.1048 +		if (idx == -1) {
  1.1049 +			if (first_notcon) {
  1.1050 +				apic_printk(APIC_VERBOSE, KERN_DEBUG
  1.1051 +						" IO-APIC (apicid-pin) %d-%d",
  1.1052 +						mp_ioapics[apic].mpc_apicid,
  1.1053 +						pin);
  1.1054 +				first_notcon = 0;
  1.1055 +			} else
  1.1056 +				apic_printk(APIC_VERBOSE, ", %d-%d",
  1.1057 +					mp_ioapics[apic].mpc_apicid, pin);
  1.1058 +			continue;
  1.1059 +		}
  1.1060 +
  1.1061 +		entry.trigger = irq_trigger(idx);
  1.1062 +		entry.polarity = irq_polarity(idx);
  1.1063 +
  1.1064 +		if (irq_trigger(idx)) {
  1.1065 +			entry.trigger = 1;
  1.1066 +			entry.mask = 1;
  1.1067 +		}
  1.1068 +
  1.1069 +		irq = pin_2_irq(idx, apic, pin);
  1.1070 +		/*
  1.1071 +		 * skip adding the timer int on secondary nodes, which causes
  1.1072 +		 * a small but painful rift in the time-space continuum
  1.1073 +		 */
  1.1074 +		if (multi_timer_check(apic, irq))
  1.1075 +			continue;
  1.1076 +		else
  1.1077 +			add_pin_to_irq(irq, apic, pin);
  1.1078 +
  1.1079 +		if (!apic && !IO_APIC_IRQ(irq))
  1.1080 +			continue;
  1.1081 +
  1.1082 +		if (IO_APIC_IRQ(irq)) {
  1.1083 +			vector = assign_irq_vector(irq);
  1.1084 +			entry.vector = vector;
  1.1085 +			ioapic_register_intr(irq, vector, IOAPIC_AUTO);
  1.1086 +		
  1.1087 +#if 0
  1.1088 +			if (!apic && (irq < 16))
  1.1089 +				disable_8259A_irq(irq);
  1.1090 +#endif
  1.1091 +		}
  1.1092 +		spin_lock_irqsave(&ioapic_lock, flags);
  1.1093 +		io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
  1.1094 +		io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
  1.1095 +		spin_unlock_irqrestore(&ioapic_lock, flags);
  1.1096 +	}
  1.1097 +	}
  1.1098 +
  1.1099 +	if (!first_notcon)
  1.1100 +		apic_printk(APIC_VERBOSE, " not connected.\n");
  1.1101 +}
  1.1102 +
  1.1103 +/*
  1.1104 + * Set up the 8259A-master output pin:
  1.1105 + */
  1.1106 +void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector)
  1.1107 +{
  1.1108 +	struct IO_APIC_route_entry entry;
  1.1109 +	unsigned long flags;
  1.1110 +
  1.1111 +	memset(&entry,0,sizeof(entry));
  1.1112 +
  1.1113 +#if 0
  1.1114 +	disable_8259A_irq(0);
  1.1115 +#endif
  1.1116 +
  1.1117 +	/* mask LVT0 */
  1.1118 +	apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
  1.1119 +
  1.1120 +	/*
  1.1121 +	 * We use logical delivery to get the timer IRQ
  1.1122 +	 * to the first CPU.
  1.1123 +	 */
  1.1124 +	entry.dest_mode = INT_DEST_MODE;
  1.1125 +	entry.mask = 0;					/* unmask IRQ now */
  1.1126 +	entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
  1.1127 +	entry.delivery_mode = INT_DELIVERY_MODE;
  1.1128 +	entry.polarity = 0;
  1.1129 +	entry.trigger = 0;
  1.1130 +	entry.vector = vector;
  1.1131 +
  1.1132 +	/*
  1.1133 +	 * The timer IRQ doesn't have to know that behind the
  1.1134 +	 * scene we have a 8259A-master in AEOI mode ...
  1.1135 +	 */
  1.1136 +	irq_desc[0].handler = &ioapic_edge_type;
  1.1137 +
  1.1138 +	/*
  1.1139 +	 * Add it to the IO-APIC irq-routing table:
  1.1140 +	 */
  1.1141 +	spin_lock_irqsave(&ioapic_lock, flags);
  1.1142 +	io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1));
  1.1143 +	io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0));
  1.1144 +	spin_unlock_irqrestore(&ioapic_lock, flags);
  1.1145 +
  1.1146 +#if 0
  1.1147 +	enable_8259A_irq(0);
  1.1148 +#endif
  1.1149 +}
  1.1150 +
  1.1151 +static inline void UNEXPECTED_IO_APIC(void)
  1.1152 +{
  1.1153 +}
  1.1154 +
  1.1155 +void __init print_IO_APIC(void)
  1.1156 +{
  1.1157 +	int apic, i;
  1.1158 +	union IO_APIC_reg_00 reg_00;
  1.1159 +	union IO_APIC_reg_01 reg_01;
  1.1160 +	union IO_APIC_reg_02 reg_02;
  1.1161 +	union IO_APIC_reg_03 reg_03;
  1.1162 +	unsigned long flags;
  1.1163 +
  1.1164 +	if (apic_verbosity == APIC_QUIET)
  1.1165 +		return;
  1.1166 +
  1.1167 + 	printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
  1.1168 +	for (i = 0; i < nr_ioapics; i++)
  1.1169 +		printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
  1.1170 +		       mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]);
  1.1171 +
  1.1172 +	/*
  1.1173 +	 * We are a bit conservative about what we expect.  We have to
  1.1174 +	 * know about every hardware change ASAP.
  1.1175 +	 */
  1.1176 +	printk(KERN_INFO "testing the IO APIC.......................\n");
  1.1177 +
  1.1178 +	for (apic = 0; apic < nr_ioapics; apic++) {
  1.1179 +
  1.1180 +	spin_lock_irqsave(&ioapic_lock, flags);
  1.1181 +	reg_00.raw = io_apic_read(apic, 0);
  1.1182 +	reg_01.raw = io_apic_read(apic, 1);
  1.1183 +	if (reg_01.bits.version >= 0x10)
  1.1184 +		reg_02.raw = io_apic_read(apic, 2);
  1.1185 +	if (reg_01.bits.version >= 0x20)
  1.1186 +		reg_03.raw = io_apic_read(apic, 3);
  1.1187 +	spin_unlock_irqrestore(&ioapic_lock, flags);
  1.1188 +
  1.1189 +	printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid);
  1.1190 +	printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
  1.1191 +	printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
  1.1192 +	printk(KERN_DEBUG ".......    : Delivery Type: %X\n", reg_00.bits.delivery_type);
  1.1193 +	printk(KERN_DEBUG ".......    : LTS          : %X\n", reg_00.bits.LTS);
  1.1194 +	if (reg_00.bits.ID >= get_physical_broadcast())
  1.1195 +		UNEXPECTED_IO_APIC();
  1.1196 +	if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2)
  1.1197 +		UNEXPECTED_IO_APIC();
  1.1198 +
  1.1199 +	printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw);
  1.1200 +	printk(KERN_DEBUG ".......     : max redirection entries: %04X\n", reg_01.bits.entries);
  1.1201 +	if (	(reg_01.bits.entries != 0x0f) && /* older (Neptune) boards */
  1.1202 +		(reg_01.bits.entries != 0x17) && /* typical ISA+PCI boards */
  1.1203 +		(reg_01.bits.entries != 0x1b) && /* Compaq Proliant boards */
  1.1204 +		(reg_01.bits.entries != 0x1f) && /* dual Xeon boards */
  1.1205 +		(reg_01.bits.entries != 0x22) && /* bigger Xeon boards */
  1.1206 +		(reg_01.bits.entries != 0x2E) &&
  1.1207 +		(reg_01.bits.entries != 0x3F)
  1.1208 +	)
  1.1209 +		UNEXPECTED_IO_APIC();
  1.1210 +
  1.1211 +	printk(KERN_DEBUG ".......     : PRQ implemented: %X\n", reg_01.bits.PRQ);
  1.1212 +	printk(KERN_DEBUG ".......     : IO APIC version: %04X\n", reg_01.bits.version);
  1.1213 +	if (	(reg_01.bits.version != 0x01) && /* 82489DX IO-APICs */
  1.1214 +		(reg_01.bits.version != 0x10) && /* oldest IO-APICs */
  1.1215 +		(reg_01.bits.version != 0x11) && /* Pentium/Pro IO-APICs */
  1.1216 +		(reg_01.bits.version != 0x13) && /* Xeon IO-APICs */
  1.1217 +		(reg_01.bits.version != 0x20)    /* Intel P64H (82806 AA) */
  1.1218 +	)
  1.1219 +		UNEXPECTED_IO_APIC();
  1.1220 +	if (reg_01.bits.__reserved_1 || reg_01.bits.__reserved_2)
  1.1221 +		UNEXPECTED_IO_APIC();
  1.1222 +
  1.1223 +	/*
  1.1224 +	 * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
  1.1225 +	 * but the value of reg_02 is read as the previous read register
  1.1226 +	 * value, so ignore it if reg_02 == reg_01.
  1.1227 +	 */
  1.1228 +	if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
  1.1229 +		printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
  1.1230 +		printk(KERN_DEBUG ".......     : arbitration: %02X\n", reg_02.bits.arbitration);
  1.1231 +		if (reg_02.bits.__reserved_1 || reg_02.bits.__reserved_2)
  1.1232 +			UNEXPECTED_IO_APIC();
  1.1233 +	}
  1.1234 +
  1.1235 +	/*
  1.1236 +	 * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02
  1.1237 +	 * or reg_03, but the value of reg_0[23] is read as the previous read
  1.1238 +	 * register value, so ignore it if reg_03 == reg_0[12].
  1.1239 +	 */
  1.1240 +	if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
  1.1241 +	    reg_03.raw != reg_01.raw) {
  1.1242 +		printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
  1.1243 +		printk(KERN_DEBUG ".......     : Boot DT    : %X\n", reg_03.bits.boot_DT);
  1.1244 +		if (reg_03.bits.__reserved_1)
  1.1245 +			UNEXPECTED_IO_APIC();
  1.1246 +	}
  1.1247 +
  1.1248 +	printk(KERN_DEBUG ".... IRQ redirection table:\n");
  1.1249 +
  1.1250 +	printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol"
  1.1251 +			  " Stat Dest Deli Vect:   \n");
  1.1252 +
  1.1253 +	for (i = 0; i <= reg_01.bits.entries; i++) {
  1.1254 +		struct IO_APIC_route_entry entry;
  1.1255 +
  1.1256 +		spin_lock_irqsave(&ioapic_lock, flags);
  1.1257 +		*(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
  1.1258 +		*(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
  1.1259 +		spin_unlock_irqrestore(&ioapic_lock, flags);
  1.1260 +
  1.1261 +		printk(KERN_DEBUG " %02x %03X %02X  ",
  1.1262 +			i,
  1.1263 +			entry.dest.logical.logical_dest,
  1.1264 +			entry.dest.physical.physical_dest
  1.1265 +		);
  1.1266 +
  1.1267 +		printk("%1d    %1d    %1d   %1d   %1d    %1d    %1d    %02X\n",
  1.1268 +			entry.mask,
  1.1269 +			entry.trigger,
  1.1270 +			entry.irr,
  1.1271 +			entry.polarity,
  1.1272 +			entry.delivery_status,
  1.1273 +			entry.dest_mode,
  1.1274 +			entry.delivery_mode,
  1.1275 +			entry.vector
  1.1276 +		);
  1.1277 +	}
  1.1278 +	}
  1.1279 +	if (use_pci_vector())
  1.1280 +		printk(KERN_INFO "Using vector-based indexing\n");
  1.1281 +	printk(KERN_DEBUG "IRQ to pin mappings:\n");
  1.1282 +	for (i = 0; i < NR_IRQS; i++) {
  1.1283 +		struct irq_pin_list *entry = irq_2_pin + i;
  1.1284 +		if (entry->pin < 0)
  1.1285 +			continue;
  1.1286 + 		if (use_pci_vector() && !platform_legacy_irq(i))
  1.1287 +			printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i));
  1.1288 +		else
  1.1289 +			printk(KERN_DEBUG "IRQ%d ", i);
  1.1290 +		for (;;) {
  1.1291 +			printk("-> %d:%d", entry->apic, entry->pin);
  1.1292 +			if (!entry->next)
  1.1293 +				break;
  1.1294 +			entry = irq_2_pin + entry->next;
  1.1295 +		}
  1.1296 +		printk("\n");
  1.1297 +	}
  1.1298 +
  1.1299 +	printk(KERN_INFO ".................................... done.\n");
  1.1300 +
  1.1301 +	return;
  1.1302 +}
  1.1303 +
  1.1304 +static void print_APIC_bitfield (int base)
  1.1305 +{
  1.1306 +	unsigned int v;
  1.1307 +	int i, j;
  1.1308 +
  1.1309 +	if (apic_verbosity == APIC_QUIET)
  1.1310 +		return;
  1.1311 +
  1.1312 +	printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG);
  1.1313 +	for (i = 0; i < 8; i++) {
  1.1314 +		v = apic_read(base + i*0x10);
  1.1315 +		for (j = 0; j < 32; j++) {
  1.1316 +			if (v & (1<<j))
  1.1317 +				printk("1");
  1.1318 +			else
  1.1319 +				printk("0");
  1.1320 +		}
  1.1321 +		printk("\n");
  1.1322 +	}
  1.1323 +}
  1.1324 +
  1.1325 +void /*__init*/ print_local_APIC(void * dummy)
  1.1326 +{
  1.1327 +#if 0
  1.1328 +	unsigned int v, ver, maxlvt;
  1.1329 +
  1.1330 +	if (apic_verbosity == APIC_QUIET)
  1.1331 +		return;
  1.1332 +
  1.1333 +	printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
  1.1334 +		smp_processor_id(), hard_smp_processor_id());
  1.1335 +	v = apic_read(APIC_ID);
  1.1336 +	printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, GET_APIC_ID(v));
  1.1337 +	v = apic_read(APIC_LVR);
  1.1338 +	printk(KERN_INFO "... APIC VERSION: %08x\n", v);
  1.1339 +	ver = GET_APIC_VERSION(v);
  1.1340 +	maxlvt = get_maxlvt();
  1.1341 +
  1.1342 +	v = apic_read(APIC_TASKPRI);
  1.1343 +	printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
  1.1344 +
  1.1345 +	if (APIC_INTEGRATED(ver)) {			/* !82489DX */
  1.1346 +		v = apic_read(APIC_ARBPRI);
  1.1347 +		printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
  1.1348 +			v & APIC_ARBPRI_MASK);
  1.1349 +		v = apic_read(APIC_PROCPRI);
  1.1350 +		printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
  1.1351 +	}
  1.1352 +
  1.1353 +	v = apic_read(APIC_EOI);
  1.1354 +	printk(KERN_DEBUG "... APIC EOI: %08x\n", v);
  1.1355 +	v = apic_read(APIC_RRR);
  1.1356 +	printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
  1.1357 +	v = apic_read(APIC_LDR);
  1.1358 +	printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
  1.1359 +	v = apic_read(APIC_DFR);
  1.1360 +	printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
  1.1361 +	v = apic_read(APIC_SPIV);
  1.1362 +	printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
  1.1363 +
  1.1364 +	printk(KERN_DEBUG "... APIC ISR field:\n");
  1.1365 +	print_APIC_bitfield(APIC_ISR);
  1.1366 +	printk(KERN_DEBUG "... APIC TMR field:\n");
  1.1367 +	print_APIC_bitfield(APIC_TMR);
  1.1368 +	printk(KERN_DEBUG "... APIC IRR field:\n");
  1.1369 +	print_APIC_bitfield(APIC_IRR);
  1.1370 +
  1.1371 +	if (APIC_INTEGRATED(ver)) {		/* !82489DX */
  1.1372 +		if (maxlvt > 3)		/* Due to the Pentium erratum 3AP. */
  1.1373 +			apic_write(APIC_ESR, 0);
  1.1374 +		v = apic_read(APIC_ESR);
  1.1375 +		printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
  1.1376 +	}
  1.1377 +
  1.1378 +	v = apic_read(APIC_ICR);
  1.1379 +	printk(KERN_DEBUG "... APIC ICR: %08x\n", v);
  1.1380 +	v = apic_read(APIC_ICR2);
  1.1381 +	printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
  1.1382 +
  1.1383 +	v = apic_read(APIC_LVTT);
  1.1384 +	printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
  1.1385 +
  1.1386 +	if (maxlvt > 3) {                       /* PC is LVT#4. */
  1.1387 +		v = apic_read(APIC_LVTPC);
  1.1388 +		printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
  1.1389 +	}
  1.1390 +	v = apic_read(APIC_LVT0);
  1.1391 +	printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
  1.1392 +	v = apic_read(APIC_LVT1);
  1.1393 +	printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);
  1.1394 +
  1.1395 +	if (maxlvt > 2) {			/* ERR is LVT#3. */
  1.1396 +		v = apic_read(APIC_LVTERR);
  1.1397 +		printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
  1.1398 +	}
  1.1399 +
  1.1400 +	v = apic_read(APIC_TMICT);
  1.1401 +	printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
  1.1402 +	v = apic_read(APIC_TMCCT);
  1.1403 +	printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
  1.1404 +	v = apic_read(APIC_TDCR);
  1.1405 +	printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
  1.1406 +	printk("\n");
  1.1407 +#endif
  1.1408 +}
  1.1409 +
  1.1410 +void print_all_local_APICs (void)
  1.1411 +{
  1.1412 +	on_each_cpu(print_local_APIC, NULL, 1, 1);
  1.1413 +}
  1.1414 +
  1.1415 +void /*__init*/ print_PIC(void)
  1.1416 +{
  1.1417 +	extern spinlock_t i8259A_lock;
  1.1418 +	unsigned int v;
  1.1419 +	unsigned long flags;
  1.1420 +
  1.1421 +	if (apic_verbosity == APIC_QUIET)
  1.1422 +		return;
  1.1423 +
  1.1424 +	printk(KERN_DEBUG "\nprinting PIC contents\n");
  1.1425 +
  1.1426 +	spin_lock_irqsave(&i8259A_lock, flags);
  1.1427 +
  1.1428 +	v = inb(0xa1) << 8 | inb(0x21);
  1.1429 +	printk(KERN_DEBUG "... PIC  IMR: %04x\n", v);
  1.1430 +
  1.1431 +	v = inb(0xa0) << 8 | inb(0x20);
  1.1432 +	printk(KERN_DEBUG "... PIC  IRR: %04x\n", v);
  1.1433 +
  1.1434 +	outb(0x0b,0xa0);
  1.1435 +	outb(0x0b,0x20);
  1.1436 +	v = inb(0xa0) << 8 | inb(0x20);
  1.1437 +	outb(0x0a,0xa0);
  1.1438 +	outb(0x0a,0x20);
  1.1439 +
  1.1440 +	spin_unlock_irqrestore(&i8259A_lock, flags);
  1.1441 +
  1.1442 +	printk(KERN_DEBUG "... PIC  ISR: %04x\n", v);
  1.1443 +
  1.1444 +	v = inb(0x4d1) << 8 | inb(0x4d0);
  1.1445 +	printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
  1.1446 +}
  1.1447 +
  1.1448 +static void __init enable_IO_APIC(void)
  1.1449 +{
  1.1450 +	union IO_APIC_reg_01 reg_01;
  1.1451 +	int i;
  1.1452 +	unsigned long flags;
  1.1453 +
  1.1454 +	for (i = 0; i < PIN_MAP_SIZE; i++) {
  1.1455 +		irq_2_pin[i].pin = -1;
  1.1456 +		irq_2_pin[i].next = 0;
  1.1457 +	}
  1.1458 +	if (!pirqs_enabled)
  1.1459 +		for (i = 0; i < MAX_PIRQS; i++)
  1.1460 +			pirq_entries[i] = -1;
  1.1461 +
  1.1462 +	/*
  1.1463 +	 * The number of IO-APIC IRQ registers (== #pins):
  1.1464 +	 */
  1.1465 +	for (i = 0; i < nr_ioapics; i++) {
  1.1466 +		spin_lock_irqsave(&ioapic_lock, flags);
  1.1467 +		reg_01.raw = io_apic_read(i, 1);
  1.1468 +		spin_unlock_irqrestore(&ioapic_lock, flags);
  1.1469 +		nr_ioapic_registers[i] = reg_01.bits.entries+1;
  1.1470 +	}
  1.1471 +
  1.1472 +	/*
  1.1473 +	 * Do not trust the IO-APIC being empty at bootup
  1.1474 +	 */
  1.1475 +	clear_IO_APIC();
  1.1476 +}
  1.1477 +
  1.1478 +/*
  1.1479 + * Not an __init, needed by the reboot code
  1.1480 + */
  1.1481 +void disable_IO_APIC(void)
  1.1482 +{
  1.1483 +	/*
  1.1484 +	 * Clear the IO-APIC before rebooting:
  1.1485 +	 */
  1.1486 +	clear_IO_APIC();
  1.1487 +
  1.1488 +#if 0
  1.1489 +	disconnect_bsp_APIC();
  1.1490 +#endif
  1.1491 +}
  1.1492 +
  1.1493 +/*
  1.1494 + * function to set the IO-APIC physical IDs based on the
  1.1495 + * values stored in the MPC table.
  1.1496 + *
  1.1497 + * by Matt Domsch <Matt_Domsch@dell.com>  Tue Dec 21 12:25:05 CST 1999
  1.1498 + */
  1.1499 +
  1.1500 +#ifndef CONFIG_X86_NUMAQ
  1.1501 +static void __init setup_ioapic_ids_from_mpc(void)
  1.1502 +{
  1.1503 +	union IO_APIC_reg_00 reg_00;
  1.1504 +	physid_mask_t phys_id_present_map;
  1.1505 +	int apic;
  1.1506 +	int i;
  1.1507 +	unsigned char old_id;
  1.1508 +	unsigned long flags;
  1.1509 +
  1.1510 +	/*
  1.1511 +	 * This is broken; anything with a real cpu count has to
  1.1512 +	 * circumvent this idiocy regardless.
  1.1513 +	 */
  1.1514 +	phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map);
  1.1515 +
  1.1516 +	/*
  1.1517 +	 * Set the IOAPIC ID to the value stored in the MPC table.
  1.1518 +	 */
  1.1519 +	for (apic = 0; apic < nr_ioapics; apic++) {
  1.1520 +
  1.1521 +		/* Read the register 0 value */
  1.1522 +		spin_lock_irqsave(&ioapic_lock, flags);
  1.1523 +		reg_00.raw = io_apic_read(apic, 0);
  1.1524 +		spin_unlock_irqrestore(&ioapic_lock, flags);
  1.1525 +		
  1.1526 +		old_id = mp_ioapics[apic].mpc_apicid;
  1.1527 +
  1.1528 +		if (mp_ioapics[apic].mpc_apicid >= get_physical_broadcast()) {
  1.1529 +			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
  1.1530 +				apic, mp_ioapics[apic].mpc_apicid);
  1.1531 +			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
  1.1532 +				reg_00.bits.ID);
  1.1533 +			mp_ioapics[apic].mpc_apicid = reg_00.bits.ID;
  1.1534 +		}
  1.1535 +
  1.1536 +		/* Don't check I/O APIC IDs for some xAPIC systems.  They have
  1.1537 +		 * no meaning without the serial APIC bus. */
  1.1538 +		if (NO_IOAPIC_CHECK)
  1.1539 +			continue;
  1.1540 +		/*
  1.1541 +		 * Sanity check, is the ID really free? Every APIC in a
  1.1542 +		 * system must have a unique ID or we get lots of nice
  1.1543 +		 * 'stuck on smp_invalidate_needed IPI wait' messages.
  1.1544 +		 */
  1.1545 +		if (check_apicid_used(phys_id_present_map,
  1.1546 +					mp_ioapics[apic].mpc_apicid)) {
  1.1547 +			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
  1.1548 +				apic, mp_ioapics[apic].mpc_apicid);
  1.1549 +			for (i = 0; i < get_physical_broadcast(); i++)
  1.1550 +				if (!physid_isset(i, phys_id_present_map))
  1.1551 +					break;
  1.1552 +			if (i >= get_physical_broadcast())
  1.1553 +				panic("Max APIC ID exceeded!\n");
  1.1554 +			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
  1.1555 +				i);
  1.1556 +			physid_set(i, phys_id_present_map);
  1.1557 +			mp_ioapics[apic].mpc_apicid = i;
  1.1558 +		} else {
  1.1559 +			physid_mask_t tmp;
  1.1560 +			tmp = apicid_to_cpu_present(mp_ioapics[apic].mpc_apicid);
  1.1561 +			apic_printk(APIC_VERBOSE, "Setting %d in the "
  1.1562 +					"phys_id_present_map\n",
  1.1563 +					mp_ioapics[apic].mpc_apicid);
  1.1564 +			physids_or(phys_id_present_map, phys_id_present_map, tmp);
  1.1565 +		}
  1.1566 +
  1.1567 +
  1.1568 +		/*
  1.1569 +		 * We need to adjust the IRQ routing table
  1.1570 +		 * if the ID changed.
  1.1571 +		 */
  1.1572 +		if (old_id != mp_ioapics[apic].mpc_apicid)
  1.1573 +			for (i = 0; i < mp_irq_entries; i++)
  1.1574 +				if (mp_irqs[i].mpc_dstapic == old_id)
  1.1575 +					mp_irqs[i].mpc_dstapic
  1.1576 +						= mp_ioapics[apic].mpc_apicid;
  1.1577 +
  1.1578 +		/*
  1.1579 +		 * Read the right value from the MPC table and
  1.1580 +		 * write it into the ID register.
  1.1581 +	 	 */
  1.1582 +		apic_printk(APIC_VERBOSE, KERN_INFO
  1.1583 +			"...changing IO-APIC physical APIC ID to %d ...",
  1.1584 +			mp_ioapics[apic].mpc_apicid);
  1.1585 +
  1.1586 +		reg_00.bits.ID = mp_ioapics[apic].mpc_apicid;
  1.1587 +		spin_lock_irqsave(&ioapic_lock, flags);
  1.1588 +		io_apic_write(apic, 0, reg_00.raw);
  1.1589 +		spin_unlock_irqrestore(&ioapic_lock, flags);
  1.1590 +
  1.1591 +		/*
  1.1592 +		 * Sanity check
  1.1593 +		 */
  1.1594 +		spin_lock_irqsave(&ioapic_lock, flags);
  1.1595 +		reg_00.raw = io_apic_read(apic, 0);
  1.1596 +		spin_unlock_irqrestore(&ioapic_lock, flags);
  1.1597 +		if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid)
  1.1598 +			printk("could not set ID!\n");
  1.1599 +		else
  1.1600 +			apic_printk(APIC_VERBOSE, " ok.\n");
  1.1601 +	}
  1.1602 +}
  1.1603 +#else
  1.1604 +static void __init setup_ioapic_ids_from_mpc(void) { }
  1.1605 +#endif
  1.1606 +
  1.1607 +/*
  1.1608 + * There is a nasty bug in some older SMP boards, their mptable lies
  1.1609 + * about the timer IRQ. We do the following to work around the situation:
  1.1610 + *
  1.1611 + *	- timer IRQ defaults to IO-APIC IRQ
  1.1612 + *	- if this function detects that timer IRQs are defunct, then we fall
  1.1613 + *	  back to ISA timer IRQs
  1.1614 + */
  1.1615 +static int __init timer_irq_works(void)
  1.1616 +{
  1.1617 +	unsigned long t1 = jiffies;
  1.1618 +
  1.1619 +	local_irq_enable();
  1.1620 +	/* Let ten ticks pass... */
  1.1621 +	mdelay((10 * 1000) / HZ);
  1.1622 +
  1.1623 +	/*
  1.1624 +	 * Expect a few ticks at least, to be sure some possible
  1.1625 +	 * glue logic does not lock up after one or two first
  1.1626 +	 * ticks in a non-ExtINT mode.  Also the local APIC
  1.1627 +	 * might have cached one ExtINT interrupt.  Finally, at
  1.1628 +	 * least one tick may be lost due to delays.
  1.1629 +	 */
  1.1630 +	if (jiffies - t1 > 4)
  1.1631 +		return 1;
  1.1632 +
  1.1633 +	return 0;
  1.1634 +}
  1.1635 +
  1.1636 +/*
  1.1637 + * In the SMP+IOAPIC case it might happen that there are an unspecified
  1.1638 + * number of pending IRQ events unhandled. These cases are very rare,
  1.1639 + * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
  1.1640 + * better to do it this way as thus we do not have to be aware of
  1.1641 + * 'pending' interrupts in the IRQ path, except at this point.
  1.1642 + */
  1.1643 +/*
  1.1644 + * Edge triggered needs to resend any interrupt
  1.1645 + * that was delayed but this is now handled in the device
  1.1646 + * independent code.
  1.1647 + */
  1.1648 +
  1.1649 +/*
  1.1650 + * Starting up a edge-triggered IO-APIC interrupt is
  1.1651 + * nasty - we need to make sure that we get the edge.
  1.1652 + * If it is already asserted for some reason, we need
  1.1653 + * return 1 to indicate that is was pending.
  1.1654 + *
  1.1655 + * This is not complete - we should be able to fake
  1.1656 + * an edge even if it isn't on the 8259A...
  1.1657 + */
  1.1658 +static unsigned int startup_edge_ioapic_irq(unsigned int irq)
  1.1659 +{
  1.1660 +	int was_pending = 0;
  1.1661 +	unsigned long flags;
  1.1662 +
  1.1663 +	spin_lock_irqsave(&ioapic_lock, flags);
  1.1664 +#if 0
  1.1665 +	if (irq < 16) {
  1.1666 +		disable_8259A_irq(irq);
  1.1667 +		if (i8259A_irq_pending(irq))
  1.1668 +			was_pending = 1;
  1.1669 +	}
  1.1670 +#endif
  1.1671 +	__unmask_IO_APIC_irq(irq);
  1.1672 +	spin_unlock_irqrestore(&ioapic_lock, flags);
  1.1673 +
  1.1674 +	return was_pending;
  1.1675 +}
  1.1676 +
  1.1677 +/*
  1.1678 + * Once we have recorded IRQ_PENDING already, we can mask the
  1.1679 + * interrupt for real. This prevents IRQ storms from unhandled
  1.1680 + * devices.
  1.1681 + */
  1.1682 +static void ack_edge_ioapic_irq(unsigned int irq)
  1.1683 +{
  1.1684 +	move_irq(irq);
  1.1685 +	if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
  1.1686 +					== (IRQ_PENDING | IRQ_DISABLED))
  1.1687 +		mask_IO_APIC_irq(irq);
  1.1688 +	ack_APIC_irq();
  1.1689 +}
  1.1690 +
  1.1691 +/*
  1.1692 + * Level triggered interrupts can just be masked,
  1.1693 + * and shutting down and starting up the interrupt
  1.1694 + * is the same as enabling and disabling them -- except
  1.1695 + * with a startup need to return a "was pending" value.
  1.1696 + *
  1.1697 + * Level triggered interrupts are special because we
  1.1698 + * do not touch any IO-APIC register while handling
  1.1699 + * them. We ack the APIC in the end-IRQ handler, not
  1.1700 + * in the start-IRQ-handler. Protection against reentrance
  1.1701 + * from the same interrupt is still provided, both by the
  1.1702 + * generic IRQ layer and by the fact that an unacked local
  1.1703 + * APIC does not accept IRQs.
  1.1704 + */
  1.1705 +static unsigned int startup_level_ioapic_irq (unsigned int irq)
  1.1706 +{
  1.1707 +	unmask_IO_APIC_irq(irq);
  1.1708 +
  1.1709 +	return 0; /* don't check for pending */
  1.1710 +}
  1.1711 +
  1.1712 +static void end_level_ioapic_irq (unsigned int irq)
  1.1713 +{
  1.1714 +	unsigned long v;
  1.1715 +	int i;
  1.1716 +
  1.1717 +	move_irq(irq);
  1.1718 +/*
  1.1719 + * It appears there is an erratum which affects at least version 0x11
  1.1720 + * of I/O APIC (that's the 82093AA and cores integrated into various
  1.1721 + * chipsets).  Under certain conditions a level-triggered interrupt is
  1.1722 + * erroneously delivered as edge-triggered one but the respective IRR
  1.1723 + * bit gets set nevertheless.  As a result the I/O unit expects an EOI
  1.1724 + * message but it will never arrive and further interrupts are blocked
  1.1725 + * from the source.  The exact reason is so far unknown, but the
  1.1726 + * phenomenon was observed when two consecutive interrupt requests
  1.1727 + * from a given source get delivered to the same CPU and the source is
  1.1728 + * temporarily disabled in between.
  1.1729 + *
  1.1730 + * A workaround is to simulate an EOI message manually.  We achieve it
  1.1731 + * by setting the trigger mode to edge and then to level when the edge
  1.1732 + * trigger mode gets detected in the TMR of a local APIC for a
  1.1733 + * level-triggered interrupt.  We mask the source for the time of the
  1.1734 + * operation to prevent an edge-triggered interrupt escaping meanwhile.
  1.1735 + * The idea is from Manfred Spraul.  --macro
  1.1736 + */
  1.1737 +	i = IO_APIC_VECTOR(irq);
  1.1738 +
  1.1739 +	v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
  1.1740 +
  1.1741 +	ack_APIC_irq();
  1.1742 +
  1.1743 +	if (!(v & (1 << (i & 0x1f)))) {
  1.1744 +		atomic_inc(&irq_mis_count);
  1.1745 +		spin_lock(&ioapic_lock);
  1.1746 +		__mask_and_edge_IO_APIC_irq(irq);
  1.1747 +		__unmask_and_level_IO_APIC_irq(irq);
  1.1748 +		spin_unlock(&ioapic_lock);
  1.1749 +	}
  1.1750 +}
  1.1751 +
  1.1752 +#ifdef CONFIG_PCI_MSI
  1.1753 +static unsigned int startup_edge_ioapic_vector(unsigned int vector)
  1.1754 +{
  1.1755 +	int irq = vector_to_irq(vector);
  1.1756 +
  1.1757 +	return startup_edge_ioapic_irq(irq);
  1.1758 +}
  1.1759 +
  1.1760 +static void ack_edge_ioapic_vector(unsigned int vector)
  1.1761 +{
  1.1762 +	int irq = vector_to_irq(vector);
  1.1763 +
  1.1764 +	ack_edge_ioapic_irq(irq);
  1.1765 +}
  1.1766 +
  1.1767 +static unsigned int startup_level_ioapic_vector (unsigned int vector)
  1.1768 +{
  1.1769 +	int irq = vector_to_irq(vector);
  1.1770 +
  1.1771 +	return startup_level_ioapic_irq (irq);
  1.1772 +}
  1.1773 +
  1.1774 +static void end_level_ioapic_vector (unsigned int vector)
  1.1775 +{
  1.1776 +	int irq = vector_to_irq(vector);
  1.1777 +
  1.1778 +	end_level_ioapic_irq(irq);
  1.1779 +}
  1.1780 +
  1.1781 +static void mask_IO_APIC_vector (unsigned int vector)
  1.1782 +{
  1.1783 +	int irq = vector_to_irq(vector);
  1.1784 +
  1.1785 +	mask_IO_APIC_irq(irq);
  1.1786 +}
  1.1787 +
  1.1788 +static void unmask_IO_APIC_vector (unsigned int vector)
  1.1789 +{
  1.1790 +	int irq = vector_to_irq(vector);
  1.1791 +
  1.1792 +	unmask_IO_APIC_irq(irq);
  1.1793 +}
  1.1794 +
  1.1795 +static void set_ioapic_affinity_vector (unsigned int vector,
  1.1796 +					cpumask_t cpu_mask)
  1.1797 +{
  1.1798 +	int irq = vector_to_irq(vector);
  1.1799 +
  1.1800 +	set_ioapic_affinity_irq(irq, cpu_mask);
  1.1801 +}
  1.1802 +#endif
  1.1803 +
  1.1804 +/*
  1.1805 + * Level and edge triggered IO-APIC interrupts need different handling,
  1.1806 + * so we use two separate IRQ descriptors. Edge triggered IRQs can be
  1.1807 + * handled with the level-triggered descriptor, but that one has slightly
  1.1808 + * more overhead. Level-triggered interrupts cannot be handled with the
  1.1809 + * edge-triggered handler, without risking IRQ storms and other ugly
  1.1810 + * races.
  1.1811 + */
  1.1812 +static struct hw_interrupt_type ioapic_edge_type = {
  1.1813 +	.typename 	= "IO-APIC-edge",
  1.1814 +	.startup 	= startup_edge_ioapic,
  1.1815 +	.shutdown 	= shutdown_edge_ioapic,
  1.1816 +	.enable 	= enable_edge_ioapic,
  1.1817 +	.disable 	= disable_edge_ioapic,
  1.1818 +	.ack 		= ack_edge_ioapic,
  1.1819 +	.end 		= end_edge_ioapic,
  1.1820 +	.set_affinity 	= set_ioapic_affinity,
  1.1821 +};
  1.1822 +
  1.1823 +static struct hw_interrupt_type ioapic_level_type = {
  1.1824 +	.typename 	= "IO-APIC-level",
  1.1825 +	.startup 	= startup_level_ioapic,
  1.1826 +	.shutdown 	= shutdown_level_ioapic,
  1.1827 +	.enable 	= enable_level_ioapic,
  1.1828 +	.disable 	= disable_level_ioapic,
  1.1829 +	.ack 		= mask_and_ack_level_ioapic,
  1.1830 +	.end 		= end_level_ioapic,
  1.1831 +	.set_affinity 	= set_ioapic_affinity,
  1.1832 +};
  1.1833 +
  1.1834 +static inline void init_IO_APIC_traps(void)
  1.1835 +{
  1.1836 +	int irq;
  1.1837 +
  1.1838 +	/*
  1.1839 +	 * NOTE! The local APIC isn't very good at handling
  1.1840 +	 * multiple interrupts at the same interrupt level.
  1.1841 +	 * As the interrupt level is determined by taking the
  1.1842 +	 * vector number and shifting that right by 4, we
  1.1843 +	 * want to spread these out a bit so that they don't
  1.1844 +	 * all fall in the same interrupt level.
  1.1845 +	 *
  1.1846 +	 * Also, we've got to be careful not to trash gate
  1.1847 +	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
  1.1848 +	 */
  1.1849 +	for (irq = 0; irq < NR_IRQS ; irq++) {
  1.1850 +		int tmp = irq;
  1.1851 +		if (use_pci_vector()) {
  1.1852 +			if (!platform_legacy_irq(tmp))
  1.1853 +				if ((tmp = vector_to_irq(tmp)) == -1)
  1.1854 +					continue;
  1.1855 +		}
  1.1856 +		if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) {
  1.1857 +			/*
  1.1858 +			 * Hmm.. We don't have an entry for this,
  1.1859 +			 * so default to an old-fashioned 8259
  1.1860 +			 * interrupt if we can..
  1.1861 +			 */
  1.1862 +#if 0
  1.1863 +			if (irq < 16)
  1.1864 +				make_8259A_irq(irq);
  1.1865 +			else
  1.1866 +#endif
  1.1867 +				/* Strange. Oh, well.. */
  1.1868 +				irq_desc[irq].handler = &no_irq_type;
  1.1869 +		}
  1.1870 +	}
  1.1871 +}
  1.1872 +
  1.1873 +static void enable_lapic_irq (unsigned int irq)
  1.1874 +{
  1.1875 +	unsigned long v;
  1.1876 +
  1.1877 +	v = apic_read(APIC_LVT0);
  1.1878 +	apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
  1.1879 +}
  1.1880 +
  1.1881 +static void disable_lapic_irq (unsigned int irq)
  1.1882 +{
  1.1883 +	unsigned long v;
  1.1884 +
  1.1885 +	v = apic_read(APIC_LVT0);
  1.1886 +	apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
  1.1887 +}
  1.1888 +
  1.1889 +static void ack_lapic_irq (unsigned int irq)
  1.1890 +{
  1.1891 +	ack_APIC_irq();
  1.1892 +}
  1.1893 +
  1.1894 +static void end_lapic_irq (unsigned int i) { /* nothing */ }
  1.1895 +
  1.1896 +static struct hw_interrupt_type lapic_irq_type = {
  1.1897 +	.typename 	= "local-APIC-edge",
  1.1898 +	.startup 	= NULL, /* startup_irq() not used for IRQ0 */
  1.1899 +	.shutdown 	= NULL, /* shutdown_irq() not used for IRQ0 */
  1.1900 +	.enable 	= enable_lapic_irq,
  1.1901 +	.disable 	= disable_lapic_irq,
  1.1902 +	.ack 		= ack_lapic_irq,
  1.1903 +	.end 		= end_lapic_irq
  1.1904 +};
  1.1905 +
  1.1906 +static void setup_nmi (void)
  1.1907 +{
  1.1908 +	/*
  1.1909 + 	 * Dirty trick to enable the NMI watchdog ...
  1.1910 +	 * We put the 8259A master into AEOI mode and
  1.1911 +	 * unmask on all local APICs LVT0 as NMI.
  1.1912 +	 *
  1.1913 +	 * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
  1.1914 +	 * is from Maciej W. Rozycki - so we do not have to EOI from
  1.1915 +	 * the NMI handler or the timer interrupt.
  1.1916 +	 */ 
  1.1917 +	apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
  1.1918 +
  1.1919 +#if 0
  1.1920 +	on_each_cpu(enable_NMI_through_LVT0, NULL, 1, 1);
  1.1921 +#endif
  1.1922 +
  1.1923 +	apic_printk(APIC_VERBOSE, " done.\n");
  1.1924 +}
  1.1925 +
  1.1926 +/*
  1.1927 + * This looks a bit hackish but it's about the only one way of sending
  1.1928 + * a few INTA cycles to 8259As and any associated glue logic.  ICR does
  1.1929 + * not support the ExtINT mode, unfortunately.  We need to send these
  1.1930 + * cycles as some i82489DX-based boards have glue logic that keeps the
  1.1931 + * 8259A interrupt line asserted until INTA.  --macro
  1.1932 + */
  1.1933 +static inline void unlock_ExtINT_logic(void)
  1.1934 +{
  1.1935 +	int pin, i;
  1.1936 +	struct IO_APIC_route_entry entry0, entry1;
  1.1937 +	unsigned char save_control, save_freq_select;
  1.1938 +	unsigned long flags;
  1.1939 +
  1.1940 +	pin = find_isa_irq_pin(8, mp_INT);
  1.1941 +	if (pin == -1)
  1.1942 +		return;
  1.1943 +
  1.1944 +	spin_lock_irqsave(&ioapic_lock, flags);
  1.1945 +	*(((int *)&entry0) + 1) = io_apic_read(0, 0x11 + 2 * pin);
  1.1946 +	*(((int *)&entry0) + 0) = io_apic_read(0, 0x10 + 2 * pin);
  1.1947 +	spin_unlock_irqrestore(&ioapic_lock, flags);
  1.1948 +	clear_IO_APIC_pin(0, pin);
  1.1949 +
  1.1950 +	memset(&entry1, 0, sizeof(entry1));
  1.1951 +
  1.1952 +	entry1.dest_mode = 0;			/* physical delivery */
  1.1953 +	entry1.mask = 0;			/* unmask IRQ now */
  1.1954 +	entry1.dest.physical.physical_dest = hard_smp_processor_id();
  1.1955 +	entry1.delivery_mode = dest_ExtINT;
  1.1956 +	entry1.polarity = entry0.polarity;
  1.1957 +	entry1.trigger = 0;
  1.1958 +	entry1.vector = 0;
  1.1959 +
  1.1960 +	spin_lock_irqsave(&ioapic_lock, flags);
  1.1961 +	io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
  1.1962 +	io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
  1.1963 +	spin_unlock_irqrestore(&ioapic_lock, flags);
  1.1964 +
  1.1965 +	save_control = CMOS_READ(RTC_CONTROL);
  1.1966 +	save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
  1.1967 +	CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
  1.1968 +		   RTC_FREQ_SELECT);
  1.1969 +	CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);
  1.1970 +
  1.1971 +	i = 100;
  1.1972 +	while (i-- > 0) {
  1.1973 +		mdelay(10);
  1.1974 +		if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
  1.1975 +			i -= 10;
  1.1976 +	}
  1.1977 +
  1.1978 +	CMOS_WRITE(save_control, RTC_CONTROL);
  1.1979 +	CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
  1.1980 +	clear_IO_APIC_pin(0, pin);
  1.1981 +
  1.1982 +	spin_lock_irqsave(&ioapic_lock, flags);
  1.1983 +	io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
  1.1984 +	io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
  1.1985 +	spin_unlock_irqrestore(&ioapic_lock, flags);
  1.1986 +}
  1.1987 +
  1.1988 +/*
  1.1989 + * This code may look a bit paranoid, but it's supposed to cooperate with
  1.1990 + * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ
  1.1991 + * is so screwy.  Thanks to Brian Perkins for testing/hacking this beast
  1.1992 + * fanatically on his truly buggy board.
  1.1993 + */
  1.1994 +static inline void check_timer(void)
  1.1995 +{
  1.1996 +#if 0
  1.1997 +	int pin1, pin2;
  1.1998 +	int vector;
  1.1999 +
  1.2000 +	/*
  1.2001 +	 * get/set the timer IRQ vector:
  1.2002 +	 */
  1.2003 +	disable_8259A_irq(0);
  1.2004 +	vector = assign_irq_vector(0);
  1.2005 +	set_intr_gate(vector, interrupt[0]);
  1.2006 +
  1.2007 +	/*
  1.2008 +	 * Subtle, code in do_timer_interrupt() expects an AEOI
  1.2009 +	 * mode for the 8259A whenever interrupts are routed
  1.2010 +	 * through I/O APICs.  Also IRQ0 has to be enabled in
  1.2011 +	 * the 8259A which implies the virtual wire has to be
  1.2012 +	 * disabled in the local APIC.
  1.2013 +	 */
  1.2014 +	apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
  1.2015 +	init_8259A(1);
  1.2016 +	timer_ack = 1;
  1.2017 +	enable_8259A_irq(0);
  1.2018 +
  1.2019 +	pin1 = find_isa_irq_pin(0, mp_INT);
  1.2020 +	pin2 = find_isa_irq_pin(0, mp_ExtINT);
  1.2021 +
  1.2022 +	printk(KERN_INFO "..TIMER: vector=0x%02X pin1=%d pin2=%d\n", vector, pin1, pin2);
  1.2023 +
  1.2024 +	if (pin1 != -1) {
  1.2025 +		/*
  1.2026 +		 * Ok, does IRQ0 through the IOAPIC work?
  1.2027 +		 */
  1.2028 +		unmask_IO_APIC_irq(0);
  1.2029 +		if (timer_irq_works()) {
  1.2030 +			if (nmi_watchdog == NMI_IO_APIC) {
  1.2031 +				disable_8259A_irq(0);
  1.2032 +				setup_nmi();
  1.2033 +				enable_8259A_irq(0);
  1.2034 +				check_nmi_watchdog();
  1.2035 +			}
  1.2036 +			return;
  1.2037 +		}
  1.2038 +		clear_IO_APIC_pin(0, pin1);
  1.2039 +		printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n");
  1.2040 +	}
  1.2041 +
  1.2042 +	printk(KERN_INFO "...trying to set up timer (IRQ0) through the 8259A ... ");
  1.2043 +	if (pin2 != -1) {
  1.2044 +		printk("\n..... (found pin %d) ...", pin2);
  1.2045 +		/*
  1.2046 +		 * legacy devices should be connected to IO APIC #0
  1.2047 +		 */
  1.2048 +		setup_ExtINT_IRQ0_pin(pin2, vector);
  1.2049 +		if (timer_irq_works()) {
  1.2050 +			printk("works.\n");
  1.2051 +			if (pin1 != -1)
  1.2052 +				replace_pin_at_irq(0, 0, pin1, 0, pin2);
  1.2053 +			else
  1.2054 +				add_pin_to_irq(0, 0, pin2);
  1.2055 +			if (nmi_watchdog == NMI_IO_APIC) {
  1.2056 +				setup_nmi();
  1.2057 +				check_nmi_watchdog();
  1.2058 +			}
  1.2059 +			return;
  1.2060 +		}
  1.2061 +		/*
  1.2062 +		 * Cleanup, just in case ...
  1.2063 +		 */
  1.2064 +		clear_IO_APIC_pin(0, pin2);
  1.2065 +	}
  1.2066 +	printk(" failed.\n");
  1.2067 +
  1.2068 +	if (nmi_watchdog == NMI_IO_APIC) {
  1.2069 +		printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n");
  1.2070 +		nmi_watchdog = 0;
  1.2071 +	}
  1.2072 +
  1.2073 +	printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
  1.2074 +
  1.2075 +	disable_8259A_irq(0);
  1.2076 +	irq_desc[0].handler = &lapic_irq_type;
  1.2077 +	apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector);	/* Fixed mode */
  1.2078 +	enable_8259A_irq(0);
  1.2079 +
  1.2080 +	if (timer_irq_works()) {
  1.2081 +		printk(" works.\n");
  1.2082 +		return;
  1.2083 +	}
  1.2084 +	apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
  1.2085 +	printk(" failed.\n");
  1.2086 +
  1.2087 +	printk(KERN_INFO "...trying to set up timer as ExtINT IRQ...");
  1.2088 +
  1.2089 +	timer_ack = 0;
  1.2090 +	init_8259A(0);
  1.2091 +	make_8259A_irq(0);
  1.2092 +	apic_write_around(APIC_LVT0, APIC_DM_EXTINT);
  1.2093 +
  1.2094 +	unlock_ExtINT_logic();
  1.2095 +
  1.2096 +	if (timer_irq_works()) {
  1.2097 +		printk(" works.\n");
  1.2098 +		return;
  1.2099 +	}
  1.2100 +	printk(" failed :(.\n");
  1.2101 +	panic("IO-APIC + timer doesn't work!  Boot with apic=debug and send a "
  1.2102 +		"report.  Then try booting with the 'noapic' option");
  1.2103 +#endif
  1.2104 +}
  1.2105 +
  1.2106 +/*
  1.2107 + *
  1.2108 + * IRQ's that are handled by the PIC in the MPS IOAPIC case.
  1.2109 + * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ.
  1.2110 + *   Linux doesn't really care, as it's not actually used
  1.2111 + *   for any interrupt handling anyway.
  1.2112 + */
  1.2113 +#define PIC_IRQS	(1 << PIC_CASCADE_IR)
  1.2114 +
  1.2115 +void __init setup_IO_APIC(void)
  1.2116 +{
  1.2117 +	enable_IO_APIC();
  1.2118 +
  1.2119 +	if (acpi_ioapic)
  1.2120 +		io_apic_irqs = ~0;	/* all IRQs go through IOAPIC */
  1.2121 +	else
  1.2122 +		io_apic_irqs = ~PIC_IRQS;
  1.2123 +
  1.2124 +	printk("ENABLING IO-APIC IRQs\n");
  1.2125 +
  1.2126 +	/*
  1.2127 +	 * Set up IO-APIC IRQ routing.
  1.2128 +	 */
  1.2129 +	if (!acpi_ioapic)
  1.2130 +		setup_ioapic_ids_from_mpc();
  1.2131 +#if 0
  1.2132 +	sync_Arb_IDs();
  1.2133 +#endif
  1.2134 +	setup_IO_APIC_irqs();
  1.2135 +	init_IO_APIC_traps();
  1.2136 +	check_timer();
  1.2137 +	if (!acpi_ioapic)
  1.2138 +		print_IO_APIC();
  1.2139 +}
  1.2140 +
  1.2141 +/*
  1.2142 + *	Called after all the initialization is done. If we didnt find any
  1.2143 + *	APIC bugs then we can allow the modify fast path
  1.2144 + */
  1.2145 + 
  1.2146 +static int __init io_apic_bug_finalize(void)
  1.2147 +{
  1.2148 +	if(sis_apic_bug == -1)
  1.2149 +		sis_apic_bug = 0;
  1.2150 +	return 0;
  1.2151 +}
  1.2152 +
  1.2153 +late_initcall(io_apic_bug_finalize);
  1.2154 +
  1.2155 +struct sysfs_ioapic_data {
  1.2156 +	struct sys_device dev;
  1.2157 +	struct IO_APIC_route_entry entry[0];
  1.2158 +};
  1.2159 +static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS];
  1.2160 +
  1.2161 +static int ioapic_suspend(struct sys_device *dev, u32 state)
  1.2162 +{
  1.2163 +	struct IO_APIC_route_entry *entry;
  1.2164 +	struct sysfs_ioapic_data *data;
  1.2165 +	unsigned long flags;
  1.2166 +	int i;
  1.2167 +	
  1.2168 +	data = container_of(dev, struct sysfs_ioapic_data, dev);
  1.2169 +	entry = data->entry;
  1.2170 +	spin_lock_irqsave(&ioapic_lock, flags);
  1.2171 +	for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
  1.2172 +		*(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i);
  1.2173 +		*(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i);
  1.2174 +	}
  1.2175 +	spin_unlock_irqrestore(&ioapic_lock, flags);
  1.2176 +
  1.2177 +	return 0;
  1.2178 +}
  1.2179 +
  1.2180 +static int ioapic_resume(struct sys_device *dev)
  1.2181 +{
  1.2182 +	struct IO_APIC_route_entry *entry;
  1.2183 +	struct sysfs_ioapic_data *data;
  1.2184 +	unsigned long flags;
  1.2185 +	union IO_APIC_reg_00 reg_00;
  1.2186 +	int i;
  1.2187 +	
  1.2188 +	data = container_of(dev, struct sysfs_ioapic_data, dev);
  1.2189 +	entry = data->entry;
  1.2190 +
  1.2191 +	spin_lock_irqsave(&ioapic_lock, flags);
  1.2192 +	reg_00.raw = io_apic_read(dev->id, 0);
  1.2193 +	if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) {
  1.2194 +		reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
  1.2195 +		io_apic_write(dev->id, 0, reg_00.raw);
  1.2196 +	}
  1.2197 +	for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
  1.2198 +		io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1));
  1.2199 +		io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0));
  1.2200 +	}
  1.2201 +	spin_unlock_irqrestore(&ioapic_lock, flags);
  1.2202 +
  1.2203 +	return 0;
  1.2204 +}
  1.2205 +
  1.2206 +static struct sysdev_class ioapic_sysdev_class = {
  1.2207 +	set_kset_name("ioapic"),
  1.2208 +	.suspend = ioapic_suspend,
  1.2209 +	.resume = ioapic_resume,
  1.2210 +};
  1.2211 +
  1.2212 +static int __init ioapic_init_sysfs(void)
  1.2213 +{
  1.2214 +	struct sys_device * dev;
  1.2215 +	int i, size, error = 0;
  1.2216 +
  1.2217 +	error = sysdev_class_register(&ioapic_sysdev_class);
  1.2218 +	if (error)
  1.2219 +		return error;
  1.2220 +
  1.2221 +	for (i = 0; i < nr_ioapics; i++ ) {
  1.2222 +		size = sizeof(struct sys_device) + nr_ioapic_registers[i] 
  1.2223 +			* sizeof(struct IO_APIC_route_entry);
  1.2224 +		mp_ioapic_data[i] = kmalloc(size, GFP_KERNEL);
  1.2225 +		if (!mp_ioapic_data[i]) {
  1.2226 +			printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
  1.2227 +			continue;
  1.2228 +		}
  1.2229 +		memset(mp_ioapic_data[i], 0, size);
  1.2230 +		dev = &mp_ioapic_data[i]->dev;
  1.2231 +		dev->id = i; 
  1.2232 +		dev->cls = &ioapic_sysdev_class;
  1.2233 +		error = sysdev_register(dev);
  1.2234 +		if (error) {
  1.2235 +			kfree(mp_ioapic_data[i]);
  1.2236 +			mp_ioapic_data[i] = NULL;
  1.2237 +			printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
  1.2238 +			continue;
  1.2239 +		}
  1.2240 +	}
  1.2241 +
  1.2242 +	return 0;
  1.2243 +}
  1.2244 +
  1.2245 +device_initcall(ioapic_init_sysfs);
  1.2246 +
  1.2247 +/* --------------------------------------------------------------------------
  1.2248 +                          ACPI-based IOAPIC Configuration
  1.2249 +   -------------------------------------------------------------------------- */
  1.2250 +
  1.2251  #ifdef CONFIG_ACPI_BOOT
  1.2252 +
  1.2253  int __init io_apic_get_unique_id (int ioapic, int apic_id)
  1.2254  {
  1.2255  	union IO_APIC_reg_00 reg_00;
  1.2256  	static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
  1.2257 +	physid_mask_t tmp;
  1.2258  	unsigned long flags;
  1.2259 +	int i = 0;
  1.2260  
  1.2261  	/*
  1.2262  	 * The P4 platform supports up to 256 APIC IDs on two separate APIC 
  1.2263 @@ -306,6 +2445,44 @@ int __init io_apic_get_unique_id (int io
  1.2264  		apic_id = reg_00.bits.ID;
  1.2265  	}
  1.2266  
  1.2267 +#if 0
  1.2268 +	/*
  1.2269 +	 * Every APIC in a system must have a unique ID or we get lots of nice 
  1.2270 +	 * 'stuck on smp_invalidate_needed IPI wait' messages.
  1.2271 +	 */
  1.2272 +	if (check_apicid_used(apic_id_map, apic_id)) {
  1.2273 +
  1.2274 +		for (i = 0; i < get_physical_broadcast(); i++) {
  1.2275 +			if (!check_apicid_used(apic_id_map, i))
  1.2276 +				break;
  1.2277 +		}
  1.2278 +
  1.2279 +		if (i == get_physical_broadcast())
  1.2280 +			panic("Max apic_id exceeded!\n");
  1.2281 +
  1.2282 +		printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
  1.2283 +			"trying %d\n", ioapic, apic_id, i);
  1.2284 +
  1.2285 +		apic_id = i;
  1.2286 +	} 
  1.2287 +
  1.2288 +	tmp = apicid_to_cpu_present(apic_id);
  1.2289 +	physids_or(apic_id_map, apic_id_map, tmp);
  1.2290 +
  1.2291 +	if (reg_00.bits.ID != apic_id) {
  1.2292 +		reg_00.bits.ID = apic_id;
  1.2293 +
  1.2294 +		spin_lock_irqsave(&ioapic_lock, flags);
  1.2295 +		io_apic_write(ioapic, 0, reg_00.raw);
  1.2296 +		reg_00.raw = io_apic_read(ioapic, 0);
  1.2297 +		spin_unlock_irqrestore(&ioapic_lock, flags);
  1.2298 +
  1.2299 +		/* Sanity check */
  1.2300 +		if (reg_00.bits.ID != apic_id)
  1.2301 +			panic("IOAPIC[%d]: Unable change apic_id!\n", ioapic);
  1.2302 +	}
  1.2303 +#endif
  1.2304 +
  1.2305  	apic_printk(APIC_VERBOSE, KERN_INFO
  1.2306  			"IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
  1.2307  
  1.2308 @@ -338,6 +2515,7 @@ int __init io_apic_get_redir_entries (in
  1.2309  	return reg_01.bits.entries;
  1.2310  }
  1.2311  
  1.2312 +
  1.2313  int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low)
  1.2314  {
  1.2315  	struct IO_APIC_route_entry entry;
  1.2316 @@ -391,4 +2569,5 @@ int io_apic_set_pci_routing (int ioapic,
  1.2317  
  1.2318  	return 0;
  1.2319  }
  1.2320 +
  1.2321  #endif /*CONFIG_ACPI_BOOT*/