ia64/xen-unstable

changeset 9023:c375c2109452

Update Linux time IRQ handler to understand the new stolen/blocked cycle counts
exported by Xen. This is based heavily on a patch from Rik van Riel, but
extended to distinguish between idle/blocked cycles and stolen cycles.

There is still stuff todo:
1. Xen should export the time values in shared memory, to save a hypercall
on every time interrupt (even though that is only every 10ms, worst case).
2. As in s390, Xen's cputime_t should be measured at finer granularity than
jiffies. Nanoseconds would be a better unit.
3. Break out the internals of the account_steal_time() interface so that we don't
need to abuse it so wretchedly.

Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Sat Feb 25 20:07:28 2006 +0100 (2006-02-25)
parents 2303fb4682e7
children d0b7281556f2
files linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c
line diff
     1.1 --- a/linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c	Sat Feb 25 17:58:37 2006 +0100
     1.2 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c	Sat Feb 25 20:07:28 2006 +0100
     1.3 @@ -48,6 +48,8 @@
     1.4  #include <linux/mca.h>
     1.5  #include <linux/sysctl.h>
     1.6  #include <linux/percpu.h>
     1.7 +#include <linux/kernel_stat.h>
     1.8 +#include <linux/posix-timers.h>
     1.9  
    1.10  #include <asm/io.h>
    1.11  #include <asm/smp.h>
    1.12 @@ -70,6 +72,7 @@
    1.13  #include <asm/arch_hooks.h>
    1.14  
    1.15  #include <xen/evtchn.h>
    1.16 +#include <xen/interface/vcpu.h>
    1.17  
    1.18  #if defined (__i386__)
    1.19  #include <asm/i8259.h>
    1.20 @@ -123,6 +126,10 @@ static u32 shadow_tv_version;
    1.21  static u64 processed_system_time;   /* System time (ns) at last processing. */
    1.22  static DEFINE_PER_CPU(u64, processed_system_time);
    1.23  
    1.24 +/* How much CPU time was spent blocked and how much was 'stolen'? */
    1.25 +static DEFINE_PER_CPU(u64, processed_stolen_time);
    1.26 +static DEFINE_PER_CPU(u64, processed_blocked_time);
    1.27 +
    1.28  /* Must be signed, as it's compared with s64 quantities which can be -ve. */
    1.29  #define NS_PER_TICK (1000000000LL/HZ)
    1.30  
    1.31 @@ -567,9 +574,10 @@ EXPORT_SYMBOL(profile_pc);
    1.32  
    1.33  irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
    1.34  {
    1.35 -	s64 delta, delta_cpu;
    1.36 +	s64 delta, delta_cpu, stolen, blocked;
    1.37  	int i, cpu = smp_processor_id();
    1.38  	struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
    1.39 +	struct vcpu_runstate_info runstate;
    1.40  
    1.41  	write_seqlock(&xtime_lock);
    1.42  
    1.43 @@ -611,21 +619,81 @@ irqreturn_t timer_interrupt(int irq, voi
    1.44  
    1.45  	write_sequnlock(&xtime_lock);
    1.46  
    1.47 -	/*
    1.48 -         * Local CPU jiffy work. No need to hold xtime_lock, and I'm not sure
    1.49 -         * if there is risk of deadlock if we do (since update_process_times
    1.50 -         * may do scheduler rebalancing work and thus acquire runqueue locks).
    1.51 -         */
    1.52 -	while (delta_cpu >= NS_PER_TICK) {
    1.53 -		delta_cpu -= NS_PER_TICK;
    1.54 -		per_cpu(processed_system_time, cpu) += NS_PER_TICK;
    1.55 -		update_process_times(user_mode(regs));
    1.56 -		profile_tick(CPU_PROFILING, regs);
    1.57 +	/* Obtain stolen/blocked cycles, if the hypervisor supports it. */
    1.58 +	if (HYPERVISOR_vcpu_op(VCPUOP_get_runstate_info,
    1.59 +			       cpu, &runstate) == 0) {
    1.60 +		/*
    1.61 +		 * Account stolen ticks.
    1.62 +		 * HACK: Passing NULL to account_steal_time()
    1.63 +		 * ensures that the ticks are accounted as stolen.
    1.64 +		 */
    1.65 +		stolen = runstate.time[RUNSTATE_runnable] +
    1.66 +			runstate.time[RUNSTATE_offline] -
    1.67 +			per_cpu(processed_stolen_time, cpu);
    1.68 +		if (unlikely(stolen < 0)) /* clock jitter */
    1.69 +			stolen = 0;
    1.70 +		delta_cpu -= stolen;
    1.71 +		if (unlikely(delta_cpu < 0)) {
    1.72 +			stolen += delta_cpu;
    1.73 +			delta_cpu = 0;
    1.74 +		}
    1.75 +		do_div(stolen, NS_PER_TICK);
    1.76 +		per_cpu(processed_stolen_time, cpu) += stolen * NS_PER_TICK;
    1.77 +		account_steal_time(NULL, (cputime_t)stolen);
    1.78 +
    1.79 +		/*
    1.80 +		 * Account blocked ticks.
    1.81 +		 * HACK: Passing idle_task to account_steal_time()
    1.82 +		 * ensures that the ticks are accounted as idle/wait.
    1.83 +		 */
    1.84 +		blocked = runstate.time[RUNSTATE_blocked] -
    1.85 +			per_cpu(processed_blocked_time, cpu);
    1.86 +		if (unlikely(blocked < 0)) /* clock jitter */
    1.87 +			blocked = 0;
    1.88 +		delta_cpu -= blocked;
    1.89 +		if (unlikely(delta_cpu < 0)) {
    1.90 +			blocked += delta_cpu;
    1.91 +			delta_cpu = 0;
    1.92 +		}
    1.93 +		do_div(blocked, NS_PER_TICK);
    1.94 +		per_cpu(processed_blocked_time, cpu) += blocked * NS_PER_TICK;
    1.95 +		account_steal_time(idle_task(cpu), (cputime_t)blocked);
    1.96 +
    1.97 +		per_cpu(processed_system_time, cpu) +=
    1.98 +			(stolen + blocked) * NS_PER_TICK;
    1.99  	}
   1.100  
   1.101 +	if (delta_cpu > 0) {
   1.102 +		do_div(delta_cpu, NS_PER_TICK);
   1.103 +		if (user_mode(regs))
   1.104 +			account_user_time(current, (cputime_t)delta_cpu);
   1.105 +		else
   1.106 +			account_system_time(current, HARDIRQ_OFFSET,
   1.107 +					    (cputime_t)delta_cpu);
   1.108 +		per_cpu(processed_system_time, cpu) += delta_cpu * NS_PER_TICK;
   1.109 +	}
   1.110 +
   1.111 +	run_local_timers();
   1.112 +	if (rcu_pending(cpu))
   1.113 +		rcu_check_callbacks(cpu, user_mode(regs));
   1.114 +	scheduler_tick();
   1.115 +	run_posix_cpu_timers(current);
   1.116 +
   1.117  	return IRQ_HANDLED;
   1.118  }
   1.119  
   1.120 +static void init_missing_ticks_accounting(int cpu)
   1.121 +{
   1.122 +	struct vcpu_runstate_info runstate = { 0 };
   1.123 +
   1.124 +	HYPERVISOR_vcpu_op(VCPUOP_get_runstate_info, cpu, &runstate);
   1.125 +
   1.126 +	per_cpu(processed_blocked_time, cpu) = runstate.time[RUNSTATE_blocked];
   1.127 +	per_cpu(processed_stolen_time, cpu) =
   1.128 +		runstate.time[RUNSTATE_runnable] +
   1.129 +		runstate.time[RUNSTATE_offline];
   1.130 +}
   1.131 +
   1.132  /* not static: needed by APM */
   1.133  unsigned long get_cmos_time(void)
   1.134  {
   1.135 @@ -814,6 +882,7 @@ void __init time_init(void)
   1.136  
   1.137  	processed_system_time = per_cpu(shadow_time, 0).system_timestamp;
   1.138  	per_cpu(processed_system_time, 0) = processed_system_time;
   1.139 +	init_missing_ticks_accounting(0);
   1.140  
   1.141  	update_wallclock();
   1.142  
   1.143 @@ -891,6 +960,7 @@ void time_resume(void)
   1.144  
   1.145  	processed_system_time = per_cpu(shadow_time, 0).system_timestamp;
   1.146  	per_cpu(processed_system_time, 0) = processed_system_time;
   1.147 +	init_missing_ticks_accounting(0);
   1.148  
   1.149  	update_wallclock();
   1.150  }
   1.151 @@ -909,6 +979,7 @@ void local_setup_timer(unsigned int cpu)
   1.152  		/* Use cpu0 timestamp: cpu's shadow is not initialised yet. */
   1.153  		per_cpu(processed_system_time, cpu) = 
   1.154  			per_cpu(shadow_time, 0).system_timestamp;
   1.155 +		init_missing_ticks_accounting(cpu);
   1.156  	} while (read_seqretry(&xtime_lock, seq));
   1.157  
   1.158  	sprintf(timer_name[cpu], "timer%d", cpu);