ia64/xen-unstable

changeset 5363:731cd57862e5

bitkeeper revision 1.1691 (42a5bd892a-21ifB8kNwgNvEid-K_Q)

Reove IRQ balancer from Xen. It is unused, and balancing will be done
by the guests themselves.
Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Tue Jun 07 15:30:17 2005 +0000 (2005-06-07)
parents 9d9e48be101d
children ab753acf69d1 13c7fd8311ba
files xen/arch/x86/io_apic.c
line diff
     1.1 --- a/xen/arch/x86/io_apic.c	Tue Jun 07 14:02:39 2005 +0000
     1.2 +++ b/xen/arch/x86/io_apic.c	Tue Jun 07 15:30:17 2005 +0000
     1.3 @@ -232,440 +232,6 @@ static void set_ioapic_affinity_irq(unsi
     1.4  	spin_unlock_irqrestore(&ioapic_lock, flags);
     1.5  }
     1.6  
     1.7 -#if defined(CONFIG_IRQBALANCE)
     1.8 -# include <asm/processor.h>	/* kernel_thread() */
     1.9 -# include <xen/kernel_stat.h>	/* kstat */
    1.10 -# include <xen/xmalloc.h>	/* kmalloc() */
    1.11 -# include <xen/timer.h>	/* time_after() */
    1.12 - 
    1.13 -# ifdef CONFIG_BALANCED_IRQ_DEBUG
    1.14 -#  define TDprintk(x...) do { printk("<%ld:%s:%d>: ", jiffies, __FILE__, __LINE__); printk(x); } while (0)
    1.15 -#  define Dprintk(x...) do { TDprintk(x); } while (0)
    1.16 -# else
    1.17 -#  define TDprintk(x...) 
    1.18 -#  define Dprintk(x...) 
    1.19 -# endif
    1.20 -
    1.21 -cpumask_t __cacheline_aligned pending_irq_balance_cpumask[NR_IRQS];
    1.22 -
    1.23 -#define IRQBALANCE_CHECK_ARCH -999
    1.24 -static int irqbalance_disabled = IRQBALANCE_CHECK_ARCH;
    1.25 -static int physical_balance = 0;
    1.26 -
    1.27 -struct irq_cpu_info {
    1.28 -	unsigned long * last_irq;
    1.29 -	unsigned long * irq_delta;
    1.30 -	unsigned long irq;
    1.31 -} irq_cpu_data[NR_CPUS];
    1.32 -
    1.33 -#define CPU_IRQ(cpu)		(irq_cpu_data[cpu].irq)
    1.34 -#define LAST_CPU_IRQ(cpu,irq)   (irq_cpu_data[cpu].last_irq[irq])
    1.35 -#define IRQ_DELTA(cpu,irq) 	(irq_cpu_data[cpu].irq_delta[irq])
    1.36 -
    1.37 -#define IDLE_ENOUGH(cpu,now) \
    1.38 -		(idle_cpu(cpu) && ((now) - irq_stat[(cpu)].idle_timestamp > 1))
    1.39 -
    1.40 -#define IRQ_ALLOWED(cpu, allowed_mask)	cpu_isset(cpu, allowed_mask)
    1.41 -
    1.42 -#define CPU_TO_PACKAGEINDEX(i) (first_cpu(cpu_sibling_map[i]))
    1.43 -
    1.44 -#define MAX_BALANCED_IRQ_INTERVAL	(5*HZ)
    1.45 -#define MIN_BALANCED_IRQ_INTERVAL	(HZ/2)
    1.46 -#define BALANCED_IRQ_MORE_DELTA		(HZ/10)
    1.47 -#define BALANCED_IRQ_LESS_DELTA		(HZ)
    1.48 -
    1.49 -long balanced_irq_interval = MAX_BALANCED_IRQ_INTERVAL;
    1.50 -
    1.51 -static unsigned long move(int curr_cpu, cpumask_t allowed_mask,
    1.52 -			unsigned long now, int direction)
    1.53 -{
    1.54 -	int search_idle = 1;
    1.55 -	int cpu = curr_cpu;
    1.56 -
    1.57 -	goto inside;
    1.58 -
    1.59 -	do {
    1.60 -		if (unlikely(cpu == curr_cpu))
    1.61 -			search_idle = 0;
    1.62 -inside:
    1.63 -		if (direction == 1) {
    1.64 -			cpu++;
    1.65 -			if (cpu >= NR_CPUS)
    1.66 -				cpu = 0;
    1.67 -		} else {
    1.68 -			cpu--;
    1.69 -			if (cpu == -1)
    1.70 -				cpu = NR_CPUS-1;
    1.71 -		}
    1.72 -	} while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu,allowed_mask) ||
    1.73 -			(search_idle && !IDLE_ENOUGH(cpu,now)));
    1.74 -
    1.75 -	return cpu;
    1.76 -}
    1.77 -
    1.78 -static inline void balance_irq(int cpu, int irq)
    1.79 -{
    1.80 -	unsigned long now = jiffies;
    1.81 -	cpumask_t allowed_mask;
    1.82 -	unsigned int new_cpu;
    1.83 -		
    1.84 -	if (irqbalance_disabled)
    1.85 -		return; 
    1.86 -
    1.87 -	cpus_and(allowed_mask, cpu_online_map, irq_affinity[irq]);
    1.88 -	new_cpu = move(cpu, allowed_mask, now, 1);
    1.89 -	if (cpu != new_cpu) {
    1.90 -		irq_desc_t *desc = irq_desc + irq;
    1.91 -		unsigned long flags;
    1.92 -
    1.93 -		spin_lock_irqsave(&desc->lock, flags);
    1.94 -		pending_irq_balance_cpumask[irq] = cpumask_of_cpu(new_cpu);
    1.95 -		spin_unlock_irqrestore(&desc->lock, flags);
    1.96 -	}
    1.97 -}
    1.98 -
    1.99 -static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
   1.100 -{
   1.101 -	int i, j;
   1.102 -	Dprintk("Rotating IRQs among CPUs.\n");
   1.103 -	for (i = 0; i < NR_CPUS; i++) {
   1.104 -		for (j = 0; cpu_online(i) && (j < NR_IRQS); j++) {
   1.105 -			if (!irq_desc[j].action)
   1.106 -				continue;
   1.107 -			/* Is it a significant load ?  */
   1.108 -			if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i),j) <
   1.109 -						useful_load_threshold)
   1.110 -				continue;
   1.111 -			balance_irq(i, j);
   1.112 -		}
   1.113 -	}
   1.114 -	balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
   1.115 -		balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);	
   1.116 -	return;
   1.117 -}
   1.118 -
   1.119 -static void do_irq_balance(void)
   1.120 -{
   1.121 -	int i, j;
   1.122 -	unsigned long max_cpu_irq = 0, min_cpu_irq = (~0);
   1.123 -	unsigned long move_this_load = 0;
   1.124 -	int max_loaded = 0, min_loaded = 0;
   1.125 -	int load;
   1.126 -	unsigned long useful_load_threshold = balanced_irq_interval + 10;
   1.127 -	int selected_irq;
   1.128 -	int tmp_loaded, first_attempt = 1;
   1.129 -	unsigned long tmp_cpu_irq;
   1.130 -	unsigned long imbalance = 0;
   1.131 -	cpumask_t allowed_mask, target_cpu_mask, tmp;
   1.132 -
   1.133 -	for (i = 0; i < NR_CPUS; i++) {
   1.134 -		int package_index;
   1.135 -		CPU_IRQ(i) = 0;
   1.136 -		if (!cpu_online(i))
   1.137 -			continue;
   1.138 -		package_index = CPU_TO_PACKAGEINDEX(i);
   1.139 -		for (j = 0; j < NR_IRQS; j++) {
   1.140 -			unsigned long value_now, delta;
   1.141 -			/* Is this an active IRQ? */
   1.142 -			if (!irq_desc[j].action)
   1.143 -				continue;
   1.144 -			if ( package_index == i )
   1.145 -				IRQ_DELTA(package_index,j) = 0;
   1.146 -			/* Determine the total count per processor per IRQ */
   1.147 -			value_now = (unsigned long) kstat_cpu(i).irqs[j];
   1.148 -
   1.149 -			/* Determine the activity per processor per IRQ */
   1.150 -			delta = value_now - LAST_CPU_IRQ(i,j);
   1.151 -
   1.152 -			/* Update last_cpu_irq[][] for the next time */
   1.153 -			LAST_CPU_IRQ(i,j) = value_now;
   1.154 -
   1.155 -			/* Ignore IRQs whose rate is less than the clock */
   1.156 -			if (delta < useful_load_threshold)
   1.157 -				continue;
   1.158 -			/* update the load for the processor or package total */
   1.159 -			IRQ_DELTA(package_index,j) += delta;
   1.160 -
   1.161 -			/* Keep track of the higher numbered sibling as well */
   1.162 -			if (i != package_index)
   1.163 -				CPU_IRQ(i) += delta;
   1.164 -			/*
   1.165 -			 * We have sibling A and sibling B in the package
   1.166 -			 *
   1.167 -			 * cpu_irq[A] = load for cpu A + load for cpu B
   1.168 -			 * cpu_irq[B] = load for cpu B
   1.169 -			 */
   1.170 -			CPU_IRQ(package_index) += delta;
   1.171 -		}
   1.172 -	}
   1.173 -	/* Find the least loaded processor package */
   1.174 -	for (i = 0; i < NR_CPUS; i++) {
   1.175 -		if (!cpu_online(i))
   1.176 -			continue;
   1.177 -		if (i != CPU_TO_PACKAGEINDEX(i))
   1.178 -			continue;
   1.179 -		if (min_cpu_irq > CPU_IRQ(i)) {
   1.180 -			min_cpu_irq = CPU_IRQ(i);
   1.181 -			min_loaded = i;
   1.182 -		}
   1.183 -	}
   1.184 -	max_cpu_irq = ULONG_MAX;
   1.185 -
   1.186 -tryanothercpu:
   1.187 -	/* Look for heaviest loaded processor.
   1.188 -	 * We may come back to get the next heaviest loaded processor.
   1.189 -	 * Skip processors with trivial loads.
   1.190 -	 */
   1.191 -	tmp_cpu_irq = 0;
   1.192 -	tmp_loaded = -1;
   1.193 -	for (i = 0; i < NR_CPUS; i++) {
   1.194 -		if (!cpu_online(i))
   1.195 -			continue;
   1.196 -		if (i != CPU_TO_PACKAGEINDEX(i))
   1.197 -			continue;
   1.198 -		if (max_cpu_irq <= CPU_IRQ(i)) 
   1.199 -			continue;
   1.200 -		if (tmp_cpu_irq < CPU_IRQ(i)) {
   1.201 -			tmp_cpu_irq = CPU_IRQ(i);
   1.202 -			tmp_loaded = i;
   1.203 -		}
   1.204 -	}
   1.205 -
   1.206 -	if (tmp_loaded == -1) {
   1.207 - 	 /* In the case of small number of heavy interrupt sources, 
   1.208 -	  * loading some of the cpus too much. We use Ingo's original 
   1.209 -	  * approach to rotate them around.
   1.210 -	  */
   1.211 -		if (!first_attempt && imbalance >= useful_load_threshold) {
   1.212 -			rotate_irqs_among_cpus(useful_load_threshold);
   1.213 -			return;
   1.214 -		}
   1.215 -		goto not_worth_the_effort;
   1.216 -	}
   1.217 -	
   1.218 -	first_attempt = 0;		/* heaviest search */
   1.219 -	max_cpu_irq = tmp_cpu_irq;	/* load */
   1.220 -	max_loaded = tmp_loaded;	/* processor */
   1.221 -	imbalance = (max_cpu_irq - min_cpu_irq) / 2;
   1.222 -	
   1.223 -	Dprintk("max_loaded cpu = %d\n", max_loaded);
   1.224 -	Dprintk("min_loaded cpu = %d\n", min_loaded);
   1.225 -	Dprintk("max_cpu_irq load = %ld\n", max_cpu_irq);
   1.226 -	Dprintk("min_cpu_irq load = %ld\n", min_cpu_irq);
   1.227 -	Dprintk("load imbalance = %lu\n", imbalance);
   1.228 -
   1.229 -	/* if imbalance is less than approx 10% of max load, then
   1.230 -	 * observe diminishing returns action. - quit
   1.231 -	 */
   1.232 -	if (imbalance < (max_cpu_irq >> 3)) {
   1.233 -		Dprintk("Imbalance too trivial\n");
   1.234 -		goto not_worth_the_effort;
   1.235 -	}
   1.236 -
   1.237 -tryanotherirq:
   1.238 -	/* if we select an IRQ to move that can't go where we want, then
   1.239 -	 * see if there is another one to try.
   1.240 -	 */
   1.241 -	move_this_load = 0;
   1.242 -	selected_irq = -1;
   1.243 -	for (j = 0; j < NR_IRQS; j++) {
   1.244 -		/* Is this an active IRQ? */
   1.245 -		if (!irq_desc[j].action)
   1.246 -			continue;
   1.247 -		if (imbalance <= IRQ_DELTA(max_loaded,j))
   1.248 -			continue;
   1.249 -		/* Try to find the IRQ that is closest to the imbalance
   1.250 -		 * without going over.
   1.251 -		 */
   1.252 -		if (move_this_load < IRQ_DELTA(max_loaded,j)) {
   1.253 -			move_this_load = IRQ_DELTA(max_loaded,j);
   1.254 -			selected_irq = j;
   1.255 -		}
   1.256 -	}
   1.257 -	if (selected_irq == -1) {
   1.258 -		goto tryanothercpu;
   1.259 -	}
   1.260 -
   1.261 -	imbalance = move_this_load;
   1.262 -	
   1.263 -	/* For physical_balance case, we accumlated both load
   1.264 -	 * values in the one of the siblings cpu_irq[],
   1.265 -	 * to use the same code for physical and logical processors
   1.266 -	 * as much as possible. 
   1.267 -	 *
   1.268 -	 * NOTE: the cpu_irq[] array holds the sum of the load for
   1.269 -	 * sibling A and sibling B in the slot for the lowest numbered
   1.270 -	 * sibling (A), _AND_ the load for sibling B in the slot for
   1.271 -	 * the higher numbered sibling.
   1.272 -	 *
   1.273 -	 * We seek the least loaded sibling by making the comparison
   1.274 -	 * (A+B)/2 vs B
   1.275 -	 */
   1.276 -	load = CPU_IRQ(min_loaded) >> 1;
   1.277 -	for_each_cpu_mask(j, cpu_sibling_map[min_loaded]) {
   1.278 -		if (load > CPU_IRQ(j)) {
   1.279 -			/* This won't change cpu_sibling_map[min_loaded] */
   1.280 -			load = CPU_IRQ(j);
   1.281 -			min_loaded = j;
   1.282 -		}
   1.283 -	}
   1.284 -
   1.285 -	cpus_and(allowed_mask, cpu_online_map, irq_affinity[selected_irq]);
   1.286 -	target_cpu_mask = cpumask_of_cpu(min_loaded);
   1.287 -	cpus_and(tmp, target_cpu_mask, allowed_mask);
   1.288 -
   1.289 -	if (!cpus_empty(tmp)) {
   1.290 -		irq_desc_t *desc = irq_desc + selected_irq;
   1.291 -		unsigned long flags;
   1.292 -
   1.293 -		Dprintk("irq = %d moved to cpu = %d\n",
   1.294 -				selected_irq, min_loaded);
   1.295 -		/* mark for change destination */
   1.296 -		spin_lock_irqsave(&desc->lock, flags);
   1.297 -		pending_irq_balance_cpumask[selected_irq] =
   1.298 -					cpumask_of_cpu(min_loaded);
   1.299 -		spin_unlock_irqrestore(&desc->lock, flags);
   1.300 -		/* Since we made a change, come back sooner to 
   1.301 -		 * check for more variation.
   1.302 -		 */
   1.303 -		balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
   1.304 -			balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);	
   1.305 -		return;
   1.306 -	}
   1.307 -	goto tryanotherirq;
   1.308 -
   1.309 -not_worth_the_effort:
   1.310 -	/*
   1.311 -	 * if we did not find an IRQ to move, then adjust the time interval
   1.312 -	 * upward
   1.313 -	 */
   1.314 -	balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL,
   1.315 -		balanced_irq_interval + BALANCED_IRQ_MORE_DELTA);	
   1.316 -	Dprintk("IRQ worth rotating not found\n");
   1.317 -	return;
   1.318 -}
   1.319 -
   1.320 -static int balanced_irq(void *unused)
   1.321 -{
   1.322 -	int i;
   1.323 -	unsigned long prev_balance_time = jiffies;
   1.324 -	long time_remaining = balanced_irq_interval;
   1.325 -
   1.326 -	daemonize("kirqd");
   1.327 -	
   1.328 -	/* push everything to CPU 0 to give us a starting point.  */
   1.329 -	for (i = 0 ; i < NR_IRQS ; i++) {
   1.330 -		pending_irq_balance_cpumask[i] = cpumask_of_cpu(0);
   1.331 -	}
   1.332 -
   1.333 -	for ( ; ; ) {
   1.334 -		set_current_state(TASK_INTERRUPTIBLE);
   1.335 -		time_remaining = schedule_timeout(time_remaining);
   1.336 -		try_to_freeze(PF_FREEZE);
   1.337 -		if (time_after(jiffies,
   1.338 -				prev_balance_time+balanced_irq_interval)) {
   1.339 -			do_irq_balance();
   1.340 -			prev_balance_time = jiffies;
   1.341 -			time_remaining = balanced_irq_interval;
   1.342 -		}
   1.343 -	}
   1.344 -	return 0;
   1.345 -}
   1.346 -
   1.347 -static int __init balanced_irq_init(void)
   1.348 -{
   1.349 -	int i;
   1.350 -	struct cpuinfo_x86 *c;
   1.351 -	cpumask_t tmp;
   1.352 -
   1.353 -	cpus_shift_right(tmp, cpu_online_map, 2);
   1.354 -        c = &boot_cpu_data;
   1.355 -	/* When not overwritten by the command line ask subarchitecture. */
   1.356 -	if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH)
   1.357 -		irqbalance_disabled = NO_BALANCE_IRQ;
   1.358 -	if (irqbalance_disabled)
   1.359 -		return 0;
   1.360 -	
   1.361 -	 /* disable irqbalance completely if there is only one processor online */
   1.362 -	if (num_online_cpus() < 2) {
   1.363 -		irqbalance_disabled = 1;
   1.364 -		return 0;
   1.365 -	}
   1.366 -	/*
   1.367 -	 * Enable physical balance only if more than 1 physical processor
   1.368 -	 * is present
   1.369 -	 */
   1.370 -	if (smp_num_siblings > 1 && !cpus_empty(tmp))
   1.371 -		physical_balance = 1;
   1.372 -
   1.373 -	for (i = 0; i < NR_CPUS; i++) {
   1.374 -		if (!cpu_online(i))
   1.375 -			continue;
   1.376 -		irq_cpu_data[i].irq_delta = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
   1.377 -		irq_cpu_data[i].last_irq = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
   1.378 -		if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) {
   1.379 -			printk(KERN_ERR "balanced_irq_init: out of memory");
   1.380 -			goto failed;
   1.381 -		}
   1.382 -		memset(irq_cpu_data[i].irq_delta,0,sizeof(unsigned long) * NR_IRQS);
   1.383 -		memset(irq_cpu_data[i].last_irq,0,sizeof(unsigned long) * NR_IRQS);
   1.384 -	}
   1.385 -	
   1.386 -	printk(KERN_INFO "Starting balanced_irq\n");
   1.387 -	if (kernel_thread(balanced_irq, NULL, CLONE_KERNEL) >= 0) 
   1.388 -		return 0;
   1.389 -	else 
   1.390 -		printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
   1.391 -failed:
   1.392 -	for (i = 0; i < NR_CPUS; i++) {
   1.393 -		if(irq_cpu_data[i].irq_delta)
   1.394 -			kfree(irq_cpu_data[i].irq_delta);
   1.395 -		if(irq_cpu_data[i].last_irq)
   1.396 -			kfree(irq_cpu_data[i].last_irq);
   1.397 -	}
   1.398 -	return 0;
   1.399 -}
   1.400 -
   1.401 -int __init irqbalance_disable(char *str)
   1.402 -{
   1.403 -	irqbalance_disabled = 1;
   1.404 -	return 0;
   1.405 -}
   1.406 -
   1.407 -__setup("noirqbalance", irqbalance_disable);
   1.408 -
   1.409 -static inline void move_irq(int irq)
   1.410 -{
   1.411 -	/* note - we hold the desc->lock */
   1.412 -	if (unlikely(!cpus_empty(pending_irq_balance_cpumask[irq]))) {
   1.413 -		set_ioapic_affinity_irq(irq, pending_irq_balance_cpumask[irq]);
   1.414 -		cpus_clear(pending_irq_balance_cpumask[irq]);
   1.415 -	}
   1.416 -}
   1.417 -
   1.418 -late_initcall(balanced_irq_init);
   1.419 -
   1.420 -#else /* !CONFIG_IRQBALANCE */
   1.421 -static inline void move_irq(int irq) { }
   1.422 -#endif /* CONFIG_IRQBALANCE */
   1.423 -
   1.424 -#ifndef CONFIG_SMP
   1.425 -void fastcall send_IPI_self(int vector)
   1.426 -{
   1.427 -	unsigned int cfg;
   1.428 -
   1.429 -	/*
   1.430 -	 * Wait for idle.
   1.431 -	 */
   1.432 -	apic_wait_icr_idle();
   1.433 -	cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL;
   1.434 -	/*
   1.435 -	 * Send the IPI. The write to APIC_ICR fires this off.
   1.436 -	 */
   1.437 -	apic_write_around(APIC_ICR, cfg);
   1.438 -}
   1.439 -#endif /* !CONFIG_SMP */
   1.440 -
   1.441  /*
   1.442   * Find the IRQ entry number of a certain pin.
   1.443   */
   1.444 @@ -1610,7 +1176,6 @@ static unsigned int startup_edge_ioapic_
   1.445   */
   1.446  static void ack_edge_ioapic_irq(unsigned int irq)
   1.447  {
   1.448 -	move_irq(irq);
   1.449  	if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
   1.450  					== (IRQ_PENDING | IRQ_DISABLED))
   1.451  		mask_IO_APIC_irq(irq);
   1.452 @@ -1643,8 +1208,6 @@ static void mask_and_ack_level_ioapic_ir
   1.453  	unsigned long v;
   1.454  	int i;
   1.455  
   1.456 -	move_irq(irq);
   1.457 -
   1.458  	mask_IO_APIC_irq(irq);
   1.459  /*
   1.460   * It appears there is an erratum which affects at least version 0x11