ia64/xen-unstable

changeset 4837:e2f39ab492f6

bitkeeper revision 1.1389.15.4 (4280e2e1TW-3Y8iE13utT8fyuaozWA)

Cset exclude: kaf24@firebug.cl.cam.ac.uk|ChangeSet|20050510144837|42684
author kaf24@firebug.cl.cam.ac.uk
date Tue May 10 16:35:45 2005 +0000 (2005-05-10)
parents 2ba74b440f13
children 9143c15ed7c3
files xen/arch/ia64/irq.c xen/arch/x86/acpi/boot.c xen/arch/x86/io_apic.c xen/arch/x86/irq.c xen/arch/x86/physdev.c xen/include/asm-x86/io_apic.h xen/include/asm-x86/mach-default/irq_vectors_limits.h xen/include/xen/irq.h
line diff
     1.1 --- a/xen/arch/ia64/irq.c	Tue May 10 14:49:26 2005 +0000
     1.2 +++ b/xen/arch/ia64/irq.c	Tue May 10 16:35:45 2005 +0000
     1.3 @@ -1468,6 +1468,29 @@ int pirq_guest_unbind(struct domain *d, 
     1.4      spin_unlock_irqrestore(&desc->lock, flags);    
     1.5      return 0;
     1.6  }
     1.7 +
     1.8 +int pirq_guest_bindable(int irq, int will_share)
     1.9 +{
    1.10 +    irq_desc_t         *desc = &irq_desc[irq];
    1.11 +    irq_guest_action_t *action;
    1.12 +    unsigned long       flags;
    1.13 +    int                 okay;
    1.14 +
    1.15 +    spin_lock_irqsave(&desc->lock, flags);
    1.16 +
    1.17 +    action = (irq_guest_action_t *)desc->action;
    1.18 +
    1.19 +    /*
    1.20 +     * To be bindable the IRQ must either be not currently bound (1), or
    1.21 +     * it must be shareable (2) and not at its share limit (3).
    1.22 +     */
    1.23 +    okay = ((!(desc->status & IRQ_GUEST) && (action == NULL)) || /* 1 */
    1.24 +            (action->shareable && will_share &&                  /* 2 */
    1.25 +             (action->nr_guests != IRQ_MAX_GUESTS)));            /* 3 */
    1.26 +
    1.27 +    spin_unlock_irqrestore(&desc->lock, flags);
    1.28 +    return okay;
    1.29 +}
    1.30  #endif
    1.31  
    1.32  #ifdef XEN
     2.1 --- a/xen/arch/x86/acpi/boot.c	Tue May 10 14:49:26 2005 +0000
     2.2 +++ b/xen/arch/x86/acpi/boot.c	Tue May 10 16:35:45 2005 +0000
     2.3 @@ -447,6 +447,44 @@ acpi_pic_sci_set_trigger(unsigned int ir
     2.4  
     2.5  #endif /* CONFIG_ACPI_BUS */
     2.6  
     2.7 +int acpi_gsi_to_irq(u32 gsi, unsigned int *irq)
     2.8 +{
     2.9 +#ifdef CONFIG_X86_IO_APIC
    2.10 +	if (use_pci_vector() && !platform_legacy_irq(gsi))
    2.11 + 		*irq = IO_APIC_VECTOR(gsi);
    2.12 +	else
    2.13 +#endif
    2.14 +		*irq = gsi;
    2.15 +	return 0;
    2.16 +}
    2.17 +
    2.18 +unsigned int acpi_register_gsi(u32 gsi, int edge_level, int active_high_low)
    2.19 +{
    2.20 +	unsigned int irq;
    2.21 +	unsigned int plat_gsi = gsi;
    2.22 +
    2.23 +#ifdef CONFIG_PCI
    2.24 +	/*
    2.25 +	 * Make sure all (legacy) PCI IRQs are set as level-triggered.
    2.26 +	 */
    2.27 +	if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) {
    2.28 +		extern void eisa_set_level_irq(unsigned int irq);
    2.29 +
    2.30 +		if (edge_level == ACPI_LEVEL_SENSITIVE)
    2.31 +				eisa_set_level_irq(gsi);
    2.32 +	}
    2.33 +#endif
    2.34 +
    2.35 +#ifdef CONFIG_X86_IO_APIC
    2.36 +	if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) {
    2.37 +		plat_gsi = mp_register_gsi(gsi, edge_level, active_high_low);
    2.38 +	}
    2.39 +#endif
    2.40 +	acpi_gsi_to_irq(plat_gsi, &irq);
    2.41 +	return irq;
    2.42 +}
    2.43 +EXPORT_SYMBOL(acpi_register_gsi);
    2.44 +
    2.45  /*
    2.46   *  ACPI based hotplug support for CPU
    2.47   */
    2.48 @@ -818,6 +856,10 @@ acpi_boot_table_init(void)
    2.49  		return error;
    2.50  	}
    2.51  
    2.52 +#if 0 /*def __i386__*/
    2.53 +	check_acpi_pci();
    2.54 +#endif
    2.55 +
    2.56  	acpi_table_parse(ACPI_BOOT, acpi_parse_sbf);
    2.57  
    2.58  	/*
     3.1 --- a/xen/arch/x86/io_apic.c	Tue May 10 14:49:26 2005 +0000
     3.2 +++ b/xen/arch/x86/io_apic.c	Tue May 10 16:35:45 2005 +0000
     3.3 @@ -64,8 +64,12 @@ static struct irq_pin_list {
     3.4  } irq_2_pin[PIN_MAP_SIZE];
     3.5  
     3.6  int vector_irq[NR_VECTORS] = { [0 ... NR_VECTORS - 1] = -1};
     3.7 +#ifdef CONFIG_PCI_MSI
     3.8  #define vector_to_irq(vector) 	\
     3.9  	(platform_legacy_irq(vector) ? vector : vector_irq[vector])
    3.10 +#else
    3.11 +#define vector_to_irq(vector)	(vector)
    3.12 +#endif
    3.13  
    3.14  /*
    3.15   * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
    3.16 @@ -141,16 +145,16 @@ static void __unmask_IO_APIC_irq (unsign
    3.17  	__modify_IO_APIC_irq(irq, 0, 0x00010000);
    3.18  }
    3.19  
    3.20 -/* trigger = 0 */
    3.21 -static void __edge_IO_APIC_irq (unsigned int irq)
    3.22 +/* mask = 1, trigger = 0 */
    3.23 +static void __mask_and_edge_IO_APIC_irq (unsigned int irq)
    3.24  {
    3.25 -	__modify_IO_APIC_irq(irq, 0, 0x00008000);
    3.26 +	__modify_IO_APIC_irq(irq, 0x00010000, 0x00008000);
    3.27  }
    3.28  
    3.29 -/* trigger = 1 */
    3.30 -static void __level_IO_APIC_irq (unsigned int irq)
    3.31 +/* mask = 0, trigger = 1 */
    3.32 +static void __unmask_and_level_IO_APIC_irq (unsigned int irq)
    3.33  {
    3.34 -	__modify_IO_APIC_irq(irq, 0x00008000, 0);
    3.35 +	__modify_IO_APIC_irq(irq, 0x00008000, 0x00010000);
    3.36  }
    3.37  
    3.38  static void mask_IO_APIC_irq (unsigned int irq)
    3.39 @@ -227,6 +231,423 @@ static void set_ioapic_affinity_irq(unsi
    3.40  	spin_unlock_irqrestore(&ioapic_lock, flags);
    3.41  }
    3.42  
    3.43 +#if defined(CONFIG_IRQBALANCE)
    3.44 +# include <asm/processor.h>	/* kernel_thread() */
    3.45 +# include <xen/kernel_stat.h>	/* kstat */
    3.46 +# include <xen/slab.h>		/* kmalloc() */
    3.47 +# include <xen/timer.h>	/* time_after() */
    3.48 + 
    3.49 +# ifdef CONFIG_BALANCED_IRQ_DEBUG
    3.50 +#  define TDprintk(x...) do { printk("<%ld:%s:%d>: ", jiffies, __FILE__, __LINE__); printk(x); } while (0)
    3.51 +#  define Dprintk(x...) do { TDprintk(x); } while (0)
    3.52 +# else
    3.53 +#  define TDprintk(x...) 
    3.54 +#  define Dprintk(x...) 
    3.55 +# endif
    3.56 +
    3.57 +cpumask_t __cacheline_aligned pending_irq_balance_cpumask[NR_IRQS];
    3.58 +
    3.59 +#define IRQBALANCE_CHECK_ARCH -999
    3.60 +static int irqbalance_disabled = IRQBALANCE_CHECK_ARCH;
    3.61 +static int physical_balance = 0;
    3.62 +
    3.63 +struct irq_cpu_info {
    3.64 +	unsigned long * last_irq;
    3.65 +	unsigned long * irq_delta;
    3.66 +	unsigned long irq;
    3.67 +} irq_cpu_data[NR_CPUS];
    3.68 +
    3.69 +#define CPU_IRQ(cpu)		(irq_cpu_data[cpu].irq)
    3.70 +#define LAST_CPU_IRQ(cpu,irq)   (irq_cpu_data[cpu].last_irq[irq])
    3.71 +#define IRQ_DELTA(cpu,irq) 	(irq_cpu_data[cpu].irq_delta[irq])
    3.72 +
    3.73 +#define IDLE_ENOUGH(cpu,now) \
    3.74 +		(idle_cpu(cpu) && ((now) - irq_stat[(cpu)].idle_timestamp > 1))
    3.75 +
    3.76 +#define IRQ_ALLOWED(cpu, allowed_mask)	cpu_isset(cpu, allowed_mask)
    3.77 +
    3.78 +#define CPU_TO_PACKAGEINDEX(i) (first_cpu(cpu_sibling_map[i]))
    3.79 +
    3.80 +#define MAX_BALANCED_IRQ_INTERVAL	(5*HZ)
    3.81 +#define MIN_BALANCED_IRQ_INTERVAL	(HZ/2)
    3.82 +#define BALANCED_IRQ_MORE_DELTA		(HZ/10)
    3.83 +#define BALANCED_IRQ_LESS_DELTA		(HZ)
    3.84 +
    3.85 +long balanced_irq_interval = MAX_BALANCED_IRQ_INTERVAL;
    3.86 +
    3.87 +static unsigned long move(int curr_cpu, cpumask_t allowed_mask,
    3.88 +			unsigned long now, int direction)
    3.89 +{
    3.90 +	int search_idle = 1;
    3.91 +	int cpu = curr_cpu;
    3.92 +
    3.93 +	goto inside;
    3.94 +
    3.95 +	do {
    3.96 +		if (unlikely(cpu == curr_cpu))
    3.97 +			search_idle = 0;
    3.98 +inside:
    3.99 +		if (direction == 1) {
   3.100 +			cpu++;
   3.101 +			if (cpu >= NR_CPUS)
   3.102 +				cpu = 0;
   3.103 +		} else {
   3.104 +			cpu--;
   3.105 +			if (cpu == -1)
   3.106 +				cpu = NR_CPUS-1;
   3.107 +		}
   3.108 +	} while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu,allowed_mask) ||
   3.109 +			(search_idle && !IDLE_ENOUGH(cpu,now)));
   3.110 +
   3.111 +	return cpu;
   3.112 +}
   3.113 +
   3.114 +static inline void balance_irq(int cpu, int irq)
   3.115 +{
   3.116 +	unsigned long now = jiffies;
   3.117 +	cpumask_t allowed_mask;
   3.118 +	unsigned int new_cpu;
   3.119 +		
   3.120 +	if (irqbalance_disabled)
   3.121 +		return; 
   3.122 +
   3.123 +	cpus_and(allowed_mask, cpu_online_map, irq_affinity[irq]);
   3.124 +	new_cpu = move(cpu, allowed_mask, now, 1);
   3.125 +	if (cpu != new_cpu) {
   3.126 +		irq_desc_t *desc = irq_desc + irq;
   3.127 +		unsigned long flags;
   3.128 +
   3.129 +		spin_lock_irqsave(&desc->lock, flags);
   3.130 +		pending_irq_balance_cpumask[irq] = cpumask_of_cpu(new_cpu);
   3.131 +		spin_unlock_irqrestore(&desc->lock, flags);
   3.132 +	}
   3.133 +}
   3.134 +
   3.135 +static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
   3.136 +{
   3.137 +	int i, j;
   3.138 +	Dprintk("Rotating IRQs among CPUs.\n");
   3.139 +	for (i = 0; i < NR_CPUS; i++) {
   3.140 +		for (j = 0; cpu_online(i) && (j < NR_IRQS); j++) {
   3.141 +			if (!irq_desc[j].action)
   3.142 +				continue;
   3.143 +			/* Is it a significant load ?  */
   3.144 +			if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i),j) <
   3.145 +						useful_load_threshold)
   3.146 +				continue;
   3.147 +			balance_irq(i, j);
   3.148 +		}
   3.149 +	}
   3.150 +	balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
   3.151 +		balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);	
   3.152 +	return;
   3.153 +}
   3.154 +
   3.155 +static void do_irq_balance(void)
   3.156 +{
   3.157 +	int i, j;
   3.158 +	unsigned long max_cpu_irq = 0, min_cpu_irq = (~0);
   3.159 +	unsigned long move_this_load = 0;
   3.160 +	int max_loaded = 0, min_loaded = 0;
   3.161 +	int load;
   3.162 +	unsigned long useful_load_threshold = balanced_irq_interval + 10;
   3.163 +	int selected_irq;
   3.164 +	int tmp_loaded, first_attempt = 1;
   3.165 +	unsigned long tmp_cpu_irq;
   3.166 +	unsigned long imbalance = 0;
   3.167 +	cpumask_t allowed_mask, target_cpu_mask, tmp;
   3.168 +
   3.169 +	for (i = 0; i < NR_CPUS; i++) {
   3.170 +		int package_index;
   3.171 +		CPU_IRQ(i) = 0;
   3.172 +		if (!cpu_online(i))
   3.173 +			continue;
   3.174 +		package_index = CPU_TO_PACKAGEINDEX(i);
   3.175 +		for (j = 0; j < NR_IRQS; j++) {
   3.176 +			unsigned long value_now, delta;
   3.177 +			/* Is this an active IRQ? */
   3.178 +			if (!irq_desc[j].action)
   3.179 +				continue;
   3.180 +			if ( package_index == i )
   3.181 +				IRQ_DELTA(package_index,j) = 0;
   3.182 +			/* Determine the total count per processor per IRQ */
   3.183 +			value_now = (unsigned long) kstat_cpu(i).irqs[j];
   3.184 +
   3.185 +			/* Determine the activity per processor per IRQ */
   3.186 +			delta = value_now - LAST_CPU_IRQ(i,j);
   3.187 +
   3.188 +			/* Update last_cpu_irq[][] for the next time */
   3.189 +			LAST_CPU_IRQ(i,j) = value_now;
   3.190 +
   3.191 +			/* Ignore IRQs whose rate is less than the clock */
   3.192 +			if (delta < useful_load_threshold)
   3.193 +				continue;
   3.194 +			/* update the load for the processor or package total */
   3.195 +			IRQ_DELTA(package_index,j) += delta;
   3.196 +
   3.197 +			/* Keep track of the higher numbered sibling as well */
   3.198 +			if (i != package_index)
   3.199 +				CPU_IRQ(i) += delta;
   3.200 +			/*
   3.201 +			 * We have sibling A and sibling B in the package
   3.202 +			 *
   3.203 +			 * cpu_irq[A] = load for cpu A + load for cpu B
   3.204 +			 * cpu_irq[B] = load for cpu B
   3.205 +			 */
   3.206 +			CPU_IRQ(package_index) += delta;
   3.207 +		}
   3.208 +	}
   3.209 +	/* Find the least loaded processor package */
   3.210 +	for (i = 0; i < NR_CPUS; i++) {
   3.211 +		if (!cpu_online(i))
   3.212 +			continue;
   3.213 +		if (i != CPU_TO_PACKAGEINDEX(i))
   3.214 +			continue;
   3.215 +		if (min_cpu_irq > CPU_IRQ(i)) {
   3.216 +			min_cpu_irq = CPU_IRQ(i);
   3.217 +			min_loaded = i;
   3.218 +		}
   3.219 +	}
   3.220 +	max_cpu_irq = ULONG_MAX;
   3.221 +
   3.222 +tryanothercpu:
   3.223 +	/* Look for heaviest loaded processor.
   3.224 +	 * We may come back to get the next heaviest loaded processor.
   3.225 +	 * Skip processors with trivial loads.
   3.226 +	 */
   3.227 +	tmp_cpu_irq = 0;
   3.228 +	tmp_loaded = -1;
   3.229 +	for (i = 0; i < NR_CPUS; i++) {
   3.230 +		if (!cpu_online(i))
   3.231 +			continue;
   3.232 +		if (i != CPU_TO_PACKAGEINDEX(i))
   3.233 +			continue;
   3.234 +		if (max_cpu_irq <= CPU_IRQ(i)) 
   3.235 +			continue;
   3.236 +		if (tmp_cpu_irq < CPU_IRQ(i)) {
   3.237 +			tmp_cpu_irq = CPU_IRQ(i);
   3.238 +			tmp_loaded = i;
   3.239 +		}
   3.240 +	}
   3.241 +
   3.242 +	if (tmp_loaded == -1) {
   3.243 + 	 /* In the case of small number of heavy interrupt sources, 
   3.244 +	  * loading some of the cpus too much. We use Ingo's original 
   3.245 +	  * approach to rotate them around.
   3.246 +	  */
   3.247 +		if (!first_attempt && imbalance >= useful_load_threshold) {
   3.248 +			rotate_irqs_among_cpus(useful_load_threshold);
   3.249 +			return;
   3.250 +		}
   3.251 +		goto not_worth_the_effort;
   3.252 +	}
   3.253 +	
   3.254 +	first_attempt = 0;		/* heaviest search */
   3.255 +	max_cpu_irq = tmp_cpu_irq;	/* load */
   3.256 +	max_loaded = tmp_loaded;	/* processor */
   3.257 +	imbalance = (max_cpu_irq - min_cpu_irq) / 2;
   3.258 +	
   3.259 +	Dprintk("max_loaded cpu = %d\n", max_loaded);
   3.260 +	Dprintk("min_loaded cpu = %d\n", min_loaded);
   3.261 +	Dprintk("max_cpu_irq load = %ld\n", max_cpu_irq);
   3.262 +	Dprintk("min_cpu_irq load = %ld\n", min_cpu_irq);
   3.263 +	Dprintk("load imbalance = %lu\n", imbalance);
   3.264 +
   3.265 +	/* if imbalance is less than approx 10% of max load, then
   3.266 +	 * observe diminishing returns action. - quit
   3.267 +	 */
   3.268 +	if (imbalance < (max_cpu_irq >> 3)) {
   3.269 +		Dprintk("Imbalance too trivial\n");
   3.270 +		goto not_worth_the_effort;
   3.271 +	}
   3.272 +
   3.273 +tryanotherirq:
   3.274 +	/* if we select an IRQ to move that can't go where we want, then
   3.275 +	 * see if there is another one to try.
   3.276 +	 */
   3.277 +	move_this_load = 0;
   3.278 +	selected_irq = -1;
   3.279 +	for (j = 0; j < NR_IRQS; j++) {
   3.280 +		/* Is this an active IRQ? */
   3.281 +		if (!irq_desc[j].action)
   3.282 +			continue;
   3.283 +		if (imbalance <= IRQ_DELTA(max_loaded,j))
   3.284 +			continue;
   3.285 +		/* Try to find the IRQ that is closest to the imbalance
   3.286 +		 * without going over.
   3.287 +		 */
   3.288 +		if (move_this_load < IRQ_DELTA(max_loaded,j)) {
   3.289 +			move_this_load = IRQ_DELTA(max_loaded,j);
   3.290 +			selected_irq = j;
   3.291 +		}
   3.292 +	}
   3.293 +	if (selected_irq == -1) {
   3.294 +		goto tryanothercpu;
   3.295 +	}
   3.296 +
   3.297 +	imbalance = move_this_load;
   3.298 +	
   3.299 +	/* For physical_balance case, we accumlated both load
   3.300 +	 * values in the one of the siblings cpu_irq[],
   3.301 +	 * to use the same code for physical and logical processors
   3.302 +	 * as much as possible. 
   3.303 +	 *
   3.304 +	 * NOTE: the cpu_irq[] array holds the sum of the load for
   3.305 +	 * sibling A and sibling B in the slot for the lowest numbered
   3.306 +	 * sibling (A), _AND_ the load for sibling B in the slot for
   3.307 +	 * the higher numbered sibling.
   3.308 +	 *
   3.309 +	 * We seek the least loaded sibling by making the comparison
   3.310 +	 * (A+B)/2 vs B
   3.311 +	 */
   3.312 +	load = CPU_IRQ(min_loaded) >> 1;
   3.313 +	for_each_cpu_mask(j, cpu_sibling_map[min_loaded]) {
   3.314 +		if (load > CPU_IRQ(j)) {
   3.315 +			/* This won't change cpu_sibling_map[min_loaded] */
   3.316 +			load = CPU_IRQ(j);
   3.317 +			min_loaded = j;
   3.318 +		}
   3.319 +	}
   3.320 +
   3.321 +	cpus_and(allowed_mask, cpu_online_map, irq_affinity[selected_irq]);
   3.322 +	target_cpu_mask = cpumask_of_cpu(min_loaded);
   3.323 +	cpus_and(tmp, target_cpu_mask, allowed_mask);
   3.324 +
   3.325 +	if (!cpus_empty(tmp)) {
   3.326 +		irq_desc_t *desc = irq_desc + selected_irq;
   3.327 +		unsigned long flags;
   3.328 +
   3.329 +		Dprintk("irq = %d moved to cpu = %d\n",
   3.330 +				selected_irq, min_loaded);
   3.331 +		/* mark for change destination */
   3.332 +		spin_lock_irqsave(&desc->lock, flags);
   3.333 +		pending_irq_balance_cpumask[selected_irq] =
   3.334 +					cpumask_of_cpu(min_loaded);
   3.335 +		spin_unlock_irqrestore(&desc->lock, flags);
   3.336 +		/* Since we made a change, come back sooner to 
   3.337 +		 * check for more variation.
   3.338 +		 */
   3.339 +		balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
   3.340 +			balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);	
   3.341 +		return;
   3.342 +	}
   3.343 +	goto tryanotherirq;
   3.344 +
   3.345 +not_worth_the_effort:
   3.346 +	/*
   3.347 +	 * if we did not find an IRQ to move, then adjust the time interval
   3.348 +	 * upward
   3.349 +	 */
   3.350 +	balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL,
   3.351 +		balanced_irq_interval + BALANCED_IRQ_MORE_DELTA);	
   3.352 +	Dprintk("IRQ worth rotating not found\n");
   3.353 +	return;
   3.354 +}
   3.355 +
   3.356 +static int balanced_irq(void *unused)
   3.357 +{
   3.358 +	int i;
   3.359 +	unsigned long prev_balance_time = jiffies;
   3.360 +	long time_remaining = balanced_irq_interval;
   3.361 +
   3.362 +	daemonize("kirqd");
   3.363 +	
   3.364 +	/* push everything to CPU 0 to give us a starting point.  */
   3.365 +	for (i = 0 ; i < NR_IRQS ; i++) {
   3.366 +		pending_irq_balance_cpumask[i] = cpumask_of_cpu(0);
   3.367 +	}
   3.368 +
   3.369 +	for ( ; ; ) {
   3.370 +		set_current_state(TASK_INTERRUPTIBLE);
   3.371 +		time_remaining = schedule_timeout(time_remaining);
   3.372 +		try_to_freeze(PF_FREEZE);
   3.373 +		if (time_after(jiffies,
   3.374 +				prev_balance_time+balanced_irq_interval)) {
   3.375 +			do_irq_balance();
   3.376 +			prev_balance_time = jiffies;
   3.377 +			time_remaining = balanced_irq_interval;
   3.378 +		}
   3.379 +	}
   3.380 +	return 0;
   3.381 +}
   3.382 +
   3.383 +static int __init balanced_irq_init(void)
   3.384 +{
   3.385 +	int i;
   3.386 +	struct cpuinfo_x86 *c;
   3.387 +	cpumask_t tmp;
   3.388 +
   3.389 +	cpus_shift_right(tmp, cpu_online_map, 2);
   3.390 +        c = &boot_cpu_data;
   3.391 +	/* When not overwritten by the command line ask subarchitecture. */
   3.392 +	if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH)
   3.393 +		irqbalance_disabled = NO_BALANCE_IRQ;
   3.394 +	if (irqbalance_disabled)
   3.395 +		return 0;
   3.396 +	
   3.397 +	 /* disable irqbalance completely if there is only one processor online */
   3.398 +	if (num_online_cpus() < 2) {
   3.399 +		irqbalance_disabled = 1;
   3.400 +		return 0;
   3.401 +	}
   3.402 +	/*
   3.403 +	 * Enable physical balance only if more than 1 physical processor
   3.404 +	 * is present
   3.405 +	 */
   3.406 +	if (smp_num_siblings > 1 && !cpus_empty(tmp))
   3.407 +		physical_balance = 1;
   3.408 +
   3.409 +	for (i = 0; i < NR_CPUS; i++) {
   3.410 +		if (!cpu_online(i))
   3.411 +			continue;
   3.412 +		irq_cpu_data[i].irq_delta = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
   3.413 +		irq_cpu_data[i].last_irq = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
   3.414 +		if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) {
   3.415 +			printk(KERN_ERR "balanced_irq_init: out of memory");
   3.416 +			goto failed;
   3.417 +		}
   3.418 +		memset(irq_cpu_data[i].irq_delta,0,sizeof(unsigned long) * NR_IRQS);
   3.419 +		memset(irq_cpu_data[i].last_irq,0,sizeof(unsigned long) * NR_IRQS);
   3.420 +	}
   3.421 +	
   3.422 +	printk(KERN_INFO "Starting balanced_irq\n");
   3.423 +	if (kernel_thread(balanced_irq, NULL, CLONE_KERNEL) >= 0) 
   3.424 +		return 0;
   3.425 +	else 
   3.426 +		printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
   3.427 +failed:
   3.428 +	for (i = 0; i < NR_CPUS; i++) {
   3.429 +		if(irq_cpu_data[i].irq_delta)
   3.430 +			kfree(irq_cpu_data[i].irq_delta);
   3.431 +		if(irq_cpu_data[i].last_irq)
   3.432 +			kfree(irq_cpu_data[i].last_irq);
   3.433 +	}
   3.434 +	return 0;
   3.435 +}
   3.436 +
   3.437 +int __init irqbalance_disable(char *str)
   3.438 +{
   3.439 +	irqbalance_disabled = 1;
   3.440 +	return 0;
   3.441 +}
   3.442 +
   3.443 +__setup("noirqbalance", irqbalance_disable);
   3.444 +
   3.445 +static inline void move_irq(int irq)
   3.446 +{
   3.447 +	/* note - we hold the desc->lock */
   3.448 +	if (unlikely(!cpus_empty(pending_irq_balance_cpumask[irq]))) {
   3.449 +		set_ioapic_affinity_irq(irq, pending_irq_balance_cpumask[irq]);
   3.450 +		cpus_clear(pending_irq_balance_cpumask[irq]);
   3.451 +	}
   3.452 +}
   3.453 +
   3.454 +late_initcall(balanced_irq_init);
   3.455 +
   3.456 +#else /* !CONFIG_IRQBALANCE */
   3.457 +static inline void move_irq(int irq) { }
   3.458 +#endif /* CONFIG_IRQBALANCE */
   3.459 +
   3.460  #ifndef CONFIG_SMP
   3.461  void fastcall send_IPI_self(int vector)
   3.462  {
   3.463 @@ -1188,6 +1609,7 @@ static unsigned int startup_edge_ioapic_
   3.464   */
   3.465  static void ack_edge_ioapic_irq(unsigned int irq)
   3.466  {
   3.467 +	move_irq(irq);
   3.468  	if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
   3.469  					== (IRQ_PENDING | IRQ_DISABLED))
   3.470  		mask_IO_APIC_irq(irq);
   3.471 @@ -1215,13 +1637,12 @@ static unsigned int startup_level_ioapic
   3.472  	return 0; /* don't check for pending */
   3.473  }
   3.474  
   3.475 -static void mask_and_ack_level_ioapic_irq (unsigned int irq)
   3.476 +static void end_level_ioapic_irq (unsigned int irq)
   3.477  {
   3.478  	unsigned long v;
   3.479  	int i;
   3.480  
   3.481 -	mask_IO_APIC_irq(irq);
   3.482 -
   3.483 +	move_irq(irq);
   3.484  /*
   3.485   * It appears there is an erratum which affects at least version 0x11
   3.486   * of I/O APIC (that's the 82093AA and cores integrated into various
   3.487 @@ -1250,17 +1671,13 @@ static void mask_and_ack_level_ioapic_ir
   3.488  	if (!(v & (1 << (i & 0x1f)))) {
   3.489  		atomic_inc(&irq_mis_count);
   3.490  		spin_lock(&ioapic_lock);
   3.491 -		__edge_IO_APIC_irq(irq);
   3.492 -		__level_IO_APIC_irq(irq);
   3.493 +		__mask_and_edge_IO_APIC_irq(irq);
   3.494 +		__unmask_and_level_IO_APIC_irq(irq);
   3.495  		spin_unlock(&ioapic_lock);
   3.496  	}
   3.497  }
   3.498  
   3.499 -static void end_level_ioapic_irq (unsigned int irq)
   3.500 -{
   3.501 -	unmask_IO_APIC_irq(irq);
   3.502 -}
   3.503 -
   3.504 +#ifdef CONFIG_PCI_MSI
   3.505  static unsigned int startup_edge_ioapic_vector(unsigned int vector)
   3.506  {
   3.507  	int irq = vector_to_irq(vector);
   3.508 @@ -1282,13 +1699,6 @@ static unsigned int startup_level_ioapic
   3.509  	return startup_level_ioapic_irq (irq);
   3.510  }
   3.511  
   3.512 -static void mask_and_ack_level_ioapic_vector (unsigned int vector)
   3.513 -{
   3.514 -	int irq = vector_to_irq(vector);
   3.515 -
   3.516 -	mask_and_ack_level_ioapic_irq(irq);
   3.517 -}
   3.518 -
   3.519  static void end_level_ioapic_vector (unsigned int vector)
   3.520  {
   3.521  	int irq = vector_to_irq(vector);
   3.522 @@ -1317,11 +1727,7 @@ static void set_ioapic_affinity_vector (
   3.523  
   3.524  	set_ioapic_affinity_irq(irq, cpu_mask);
   3.525  }
   3.526 -
   3.527 -static void noop_ioapic_vector(unsigned int vector)
   3.528 -{
   3.529 -	/* nothing */
   3.530 -}
   3.531 +#endif
   3.532  
   3.533  /*
   3.534   * Level and edge triggered IO-APIC interrupts need different handling,
   3.535 @@ -1333,24 +1739,24 @@ static void noop_ioapic_vector(unsigned 
   3.536   */
   3.537  static struct hw_interrupt_type ioapic_edge_type = {
   3.538  	.typename 	= "IO-APIC-edge",
   3.539 -	.startup 	= startup_edge_ioapic_vector,
   3.540 -	.shutdown 	= noop_ioapic_vector,
   3.541 -	.enable 	= unmask_IO_APIC_vector,
   3.542 -	.disable 	= noop_ioapic_vector,
   3.543 -	.ack 		= ack_edge_ioapic_vector,
   3.544 -	.end 		= noop_ioapic_vector,
   3.545 -	.set_affinity 	= set_ioapic_affinity_vector,
   3.546 +	.startup 	= startup_edge_ioapic,
   3.547 +	.shutdown 	= shutdown_edge_ioapic,
   3.548 +	.enable 	= enable_edge_ioapic,
   3.549 +	.disable 	= disable_edge_ioapic,
   3.550 +	.ack 		= ack_edge_ioapic,
   3.551 +	.end 		= end_edge_ioapic,
   3.552 +	.set_affinity 	= set_ioapic_affinity,
   3.553  };
   3.554  
   3.555  static struct hw_interrupt_type ioapic_level_type = {
   3.556  	.typename 	= "IO-APIC-level",
   3.557 -	.startup 	= startup_level_ioapic_vector,
   3.558 -	.shutdown 	= mask_IO_APIC_vector,
   3.559 -	.enable 	= unmask_IO_APIC_vector,
   3.560 -	.disable 	= mask_IO_APIC_vector,
   3.561 -	.ack 		= mask_and_ack_level_ioapic_vector,
   3.562 -	.end 		= end_level_ioapic_vector,
   3.563 -	.set_affinity 	= set_ioapic_affinity_vector,
   3.564 +	.startup 	= startup_level_ioapic,
   3.565 +	.shutdown 	= shutdown_level_ioapic,
   3.566 +	.enable 	= enable_level_ioapic,
   3.567 +	.disable 	= disable_level_ioapic,
   3.568 +	.ack 		= mask_and_ack_level_ioapic,
   3.569 +	.end 		= end_level_ioapic,
   3.570 +	.set_affinity 	= set_ioapic_affinity,
   3.571  };
   3.572  
   3.573  static inline void init_IO_APIC_traps(void)
   3.574 @@ -1850,7 +2256,8 @@ int ioapic_guest_write(int apicid, int a
   3.575              return 0;
   3.576  
   3.577          /* Set the correct irq-handling type. */
   3.578 -        ioapic_register_intr(irq, rte.vector, rte.trigger);
   3.579 +        irq_desc[irq].handler = rte.trigger ? 
   3.580 +            &ioapic_level_type: &ioapic_edge_type;
   3.581  
   3.582          /* Record the pin<->irq mapping. */
   3.583          for ( entry = &irq_2_pin[irq]; ; entry = &irq_2_pin[entry->next] )
     4.1 --- a/xen/arch/x86/irq.c	Tue May 10 14:49:26 2005 +0000
     4.2 +++ b/xen/arch/x86/irq.c	Tue May 10 16:35:45 2005 +0000
     4.3 @@ -186,30 +186,26 @@ typedef struct {
     4.4      struct exec_domain *guest[IRQ_MAX_GUESTS];
     4.5  } irq_guest_action_t;
     4.6  
     4.7 -extern int vector_irq[];
     4.8 -
     4.9  static void __do_IRQ_guest(int irq)
    4.10  {
    4.11      irq_desc_t         *desc = &irq_desc[irq];
    4.12      irq_guest_action_t *action = (irq_guest_action_t *)desc->action;
    4.13      struct exec_domain *ed;
    4.14 -    int                 i, pirq;
    4.15 -
    4.16 -    pirq = platform_legacy_irq(irq) ? irq : vector_irq[irq];
    4.17 +    int                 i;
    4.18  
    4.19      for ( i = 0; i < action->nr_guests; i++ )
    4.20      {
    4.21          ed = action->guest[i];
    4.22 -        if ( !test_and_set_bit(pirq, &ed->domain->pirq_mask) )
    4.23 +        if ( !test_and_set_bit(irq, &ed->domain->pirq_mask) )
    4.24              action->in_flight++;
    4.25 -        send_guest_pirq(ed, pirq);
    4.26 +        send_guest_pirq(ed, irq);
    4.27      }
    4.28  }
    4.29  
    4.30  int pirq_guest_unmask(struct domain *d)
    4.31  {
    4.32      irq_desc_t    *desc;
    4.33 -    unsigned int   i, j, pirq, vector;
    4.34 +    unsigned int   i, j, pirq;
    4.35      u32            m;
    4.36      shared_info_t *s = d->shared_info;
    4.37  
    4.38 @@ -221,13 +217,12 @@ int pirq_guest_unmask(struct domain *d)
    4.39              j = find_first_set_bit(m);
    4.40              m &= ~(1 << j);
    4.41              pirq = (i << 5) + j;
    4.42 -            vector = platform_legacy_irq(pirq) ? pirq : IO_APIC_VECTOR(pirq);
    4.43 -            desc = &irq_desc[vector];
    4.44 +            desc = &irq_desc[pirq];
    4.45              spin_lock_irq(&desc->lock);
    4.46              if ( !test_bit(d->pirq_to_evtchn[pirq], &s->evtchn_mask[0]) &&
    4.47                   test_and_clear_bit(pirq, &d->pirq_mask) &&
    4.48                   (--((irq_guest_action_t *)desc->action)->in_flight == 0) )
    4.49 -                desc->handler->end(vector);
    4.50 +                desc->handler->end(pirq);
    4.51              spin_unlock_irq(&desc->lock);
    4.52          }
    4.53      }
    4.54 @@ -238,17 +233,14 @@ int pirq_guest_unmask(struct domain *d)
    4.55  int pirq_guest_bind(struct exec_domain *ed, int irq, int will_share)
    4.56  {
    4.57      struct domain      *d = ed->domain;
    4.58 -    irq_desc_t         *desc;
    4.59 +    irq_desc_t         *desc = &irq_desc[irq];
    4.60      irq_guest_action_t *action;
    4.61      unsigned long       flags;
    4.62 -    int                 rc = 0, vector;
    4.63 +    int                 rc = 0;
    4.64  
    4.65      if ( !IS_CAPABLE_PHYSDEV(d) )
    4.66          return -EPERM;
    4.67  
    4.68 -    vector = platform_legacy_irq(irq) ? irq : IO_APIC_VECTOR(irq);
    4.69 -    desc = &irq_desc[vector];
    4.70 -
    4.71      spin_lock_irqsave(&desc->lock, flags);
    4.72  
    4.73      action = (irq_guest_action_t *)desc->action;
    4.74 @@ -278,12 +270,12 @@ int pirq_guest_bind(struct exec_domain *
    4.75          desc->depth = 0;
    4.76          desc->status |= IRQ_GUEST;
    4.77          desc->status &= ~IRQ_DISABLED;
    4.78 -        desc->handler->startup(vector);
    4.79 +        desc->handler->startup(irq);
    4.80  
    4.81          /* Attempt to bind the interrupt target to the correct CPU. */
    4.82          if ( desc->handler->set_affinity != NULL )
    4.83              desc->handler->set_affinity(
    4.84 -                vector, apicid_to_phys_cpu_present(ed->processor));
    4.85 +                irq, apicid_to_phys_cpu_present(ed->processor));
    4.86      }
    4.87      else if ( !will_share || !action->shareable )
    4.88      {
    4.89 @@ -309,13 +301,10 @@ int pirq_guest_bind(struct exec_domain *
    4.90  
    4.91  int pirq_guest_unbind(struct domain *d, int irq)
    4.92  {
    4.93 -    irq_desc_t         *desc;
    4.94 +    irq_desc_t         *desc = &irq_desc[irq];
    4.95      irq_guest_action_t *action;
    4.96      unsigned long       flags;
    4.97 -    int                 i, vector;
    4.98 -
    4.99 -    vector = platform_legacy_irq(irq) ? irq : IO_APIC_VECTOR(irq);
   4.100 -    desc = &irq_desc[vector];
   4.101 +    int                 i;
   4.102  
   4.103      spin_lock_irqsave(&desc->lock, flags);
   4.104  
   4.105 @@ -323,7 +312,7 @@ int pirq_guest_unbind(struct domain *d, 
   4.106  
   4.107      if ( test_and_clear_bit(irq, &d->pirq_mask) &&
   4.108           (--action->in_flight == 0) )
   4.109 -        desc->handler->end(vector);
   4.110 +        desc->handler->end(irq);
   4.111  
   4.112      if ( action->nr_guests == 1 )
   4.113      {
   4.114 @@ -332,7 +321,7 @@ int pirq_guest_unbind(struct domain *d, 
   4.115          desc->depth   = 1;
   4.116          desc->status |= IRQ_DISABLED;
   4.117          desc->status &= ~IRQ_GUEST;
   4.118 -        desc->handler->shutdown(vector);
   4.119 +        desc->handler->shutdown(irq);
   4.120      }
   4.121      else
   4.122      {
   4.123 @@ -346,3 +335,26 @@ int pirq_guest_unbind(struct domain *d, 
   4.124      spin_unlock_irqrestore(&desc->lock, flags);    
   4.125      return 0;
   4.126  }
   4.127 +
   4.128 +int pirq_guest_bindable(int irq, int will_share)
   4.129 +{
   4.130 +    irq_desc_t         *desc = &irq_desc[irq];
   4.131 +    irq_guest_action_t *action;
   4.132 +    unsigned long       flags;
   4.133 +    int                 okay;
   4.134 +
   4.135 +    spin_lock_irqsave(&desc->lock, flags);
   4.136 +
   4.137 +    action = (irq_guest_action_t *)desc->action;
   4.138 +
   4.139 +    /*
   4.140 +     * To be bindable the IRQ must either be not currently bound (1), or
   4.141 +     * it must be shareable (2) and not at its share limit (3).
   4.142 +     */
   4.143 +    okay = ((!(desc->status & IRQ_GUEST) && (action == NULL)) || /* 1 */
   4.144 +            (action->shareable && will_share &&                  /* 2 */
   4.145 +             (action->nr_guests != IRQ_MAX_GUESTS)));            /* 3 */
   4.146 +
   4.147 +    spin_unlock_irqrestore(&desc->lock, flags);
   4.148 +    return okay;
   4.149 +}
     5.1 --- a/xen/arch/x86/physdev.c	Tue May 10 14:49:26 2005 +0000
     5.2 +++ b/xen/arch/x86/physdev.c	Tue May 10 16:35:45 2005 +0000
     5.3 @@ -41,7 +41,7 @@ long do_physdev_op(physdev_op_t *uop)
     5.4  {
     5.5      physdev_op_t op;
     5.6      long         ret;
     5.7 -    int          irq, vector;
     5.8 +    int          irq;
     5.9  
    5.10      if ( unlikely(copy_from_user(&op, uop, sizeof(op)) != 0) )
    5.11          return -EFAULT;
    5.12 @@ -87,13 +87,8 @@ long do_physdev_op(physdev_op_t *uop)
    5.13          if ( (irq = op.u.irq_op.irq) >= NR_IRQS )
    5.14              return -EINVAL;
    5.15          
    5.16 -        op.u.irq_op.vector = vector = assign_irq_vector(irq);
    5.17 -
    5.18 -        if ( use_pci_vector() && !platform_legacy_irq(irq) )
    5.19 -            set_intr_gate(vector, interrupt[vector]);
    5.20 -        else
    5.21 -            set_intr_gate(vector, interrupt[irq]);
    5.22 -
    5.23 +        op.u.irq_op.vector = assign_irq_vector(irq);
    5.24 +        set_intr_gate(op.u.irq_op.vector, interrupt[irq]);
    5.25          ret = 0;
    5.26          break;
    5.27  
     6.1 --- a/xen/include/asm-x86/io_apic.h	Tue May 10 14:49:26 2005 +0000
     6.2 +++ b/xen/include/asm-x86/io_apic.h	Tue May 10 16:35:45 2005 +0000
     6.3 @@ -14,7 +14,45 @@
     6.4  
     6.5  #ifdef CONFIG_X86_IO_APIC
     6.6  
     6.7 +#ifdef CONFIG_PCI_MSI
     6.8  static inline int use_pci_vector(void)	{return 1;}
     6.9 +static inline void disable_edge_ioapic_vector(unsigned int vector) { }
    6.10 +static inline void mask_and_ack_level_ioapic_vector(unsigned int vector) { }
    6.11 +static inline void end_edge_ioapic_vector (unsigned int vector) { }
    6.12 +#define startup_level_ioapic	startup_level_ioapic_vector
    6.13 +#define shutdown_level_ioapic	mask_IO_APIC_vector
    6.14 +#define enable_level_ioapic	unmask_IO_APIC_vector
    6.15 +#define disable_level_ioapic	mask_IO_APIC_vector
    6.16 +#define mask_and_ack_level_ioapic mask_and_ack_level_ioapic_vector
    6.17 +#define end_level_ioapic	end_level_ioapic_vector
    6.18 +#define set_ioapic_affinity	set_ioapic_affinity_vector
    6.19 +
    6.20 +#define startup_edge_ioapic 	startup_edge_ioapic_vector
    6.21 +#define shutdown_edge_ioapic 	disable_edge_ioapic_vector
    6.22 +#define enable_edge_ioapic 	unmask_IO_APIC_vector
    6.23 +#define disable_edge_ioapic 	disable_edge_ioapic_vector
    6.24 +#define ack_edge_ioapic 	ack_edge_ioapic_vector
    6.25 +#define end_edge_ioapic 	end_edge_ioapic_vector
    6.26 +#else
    6.27 +static inline int use_pci_vector(void)	{return 0;}
    6.28 +static inline void disable_edge_ioapic_irq(unsigned int irq) { }
    6.29 +static inline void mask_and_ack_level_ioapic_irq(unsigned int irq) { }
    6.30 +static inline void end_edge_ioapic_irq (unsigned int irq) { }
    6.31 +#define startup_level_ioapic	startup_level_ioapic_irq
    6.32 +#define shutdown_level_ioapic	mask_IO_APIC_irq
    6.33 +#define enable_level_ioapic	unmask_IO_APIC_irq
    6.34 +#define disable_level_ioapic	mask_IO_APIC_irq
    6.35 +#define mask_and_ack_level_ioapic mask_and_ack_level_ioapic_irq
    6.36 +#define end_level_ioapic	end_level_ioapic_irq
    6.37 +#define set_ioapic_affinity	set_ioapic_affinity_irq
    6.38 +
    6.39 +#define startup_edge_ioapic 	startup_edge_ioapic_irq
    6.40 +#define shutdown_edge_ioapic 	disable_edge_ioapic_irq
    6.41 +#define enable_edge_ioapic 	unmask_IO_APIC_irq
    6.42 +#define disable_edge_ioapic 	disable_edge_ioapic_irq
    6.43 +#define ack_edge_ioapic 	ack_edge_ioapic_irq
    6.44 +#define end_edge_ioapic 	end_edge_ioapic_irq
    6.45 +#endif
    6.46  
    6.47  #define IO_APIC_BASE(idx) \
    6.48  		((volatile int *)(__fix_to_virt(FIX_IO_APIC_BASE_0 + idx) \
     7.1 --- a/xen/include/asm-x86/mach-default/irq_vectors_limits.h	Tue May 10 14:49:26 2005 +0000
     7.2 +++ b/xen/include/asm-x86/mach-default/irq_vectors_limits.h	Tue May 10 16:35:45 2005 +0000
     7.3 @@ -1,8 +1,21 @@
     7.4  #ifndef _ASM_IRQ_VECTORS_LIMITS_H
     7.5  #define _ASM_IRQ_VECTORS_LIMITS_H
     7.6  
     7.7 -/* MSI limits */
     7.8 +#ifdef CONFIG_PCI_MSI
     7.9  #define NR_IRQS FIRST_SYSTEM_VECTOR
    7.10  #define NR_IRQ_VECTORS NR_IRQS
    7.11 +#else
    7.12 +#ifdef CONFIG_X86_IO_APIC
    7.13 +#define NR_IRQS 224
    7.14 +# if (224 >= 32 * NR_CPUS)
    7.15 +# define NR_IRQ_VECTORS NR_IRQS
    7.16 +# else
    7.17 +# define NR_IRQ_VECTORS (32 * NR_CPUS)
    7.18 +# endif
    7.19 +#else
    7.20 +#define NR_IRQS 16
    7.21 +#define NR_IRQ_VECTORS NR_IRQS
    7.22 +#endif
    7.23 +#endif
    7.24  
    7.25  #endif /* _ASM_IRQ_VECTORS_LIMITS_H */
     8.1 --- a/xen/include/xen/irq.h	Tue May 10 14:49:26 2005 +0000
     8.2 +++ b/xen/include/xen/irq.h	Tue May 10 16:35:45 2005 +0000
     8.3 @@ -71,5 +71,6 @@ struct exec_domain;
     8.4  extern int pirq_guest_unmask(struct domain *p);
     8.5  extern int pirq_guest_bind(struct exec_domain *p, int irq, int will_share);
     8.6  extern int pirq_guest_unbind(struct domain *p, int irq);
     8.7 +extern int pirq_guest_bindable(int irq, int will_share);
     8.8  
     8.9  #endif /* __XEN_IRQ_H__ */