ia64/xen-unstable

changeset 9024:d0b7281556f2

New VCPUOP_register_runstate_memory_area hypercall. Avoids
need for a hypercall in the guest timer interrupt handler.

Cleaned up stolen/blocked tick handling in Linux.

Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Sat Feb 25 21:28:27 2006 +0100 (2006-02-25)
parents c375c2109452
children 0d7c3c47ad20
files linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c xen/arch/x86/domain.c xen/common/domain.c xen/include/public/vcpu.h xen/include/xen/sched.h
line diff
     1.1 --- a/linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c	Sat Feb 25 20:07:28 2006 +0100
     1.2 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c	Sat Feb 25 21:28:27 2006 +0100
     1.3 @@ -130,6 +130,9 @@ static DEFINE_PER_CPU(u64, processed_sys
     1.4  static DEFINE_PER_CPU(u64, processed_stolen_time);
     1.5  static DEFINE_PER_CPU(u64, processed_blocked_time);
     1.6  
     1.7 +/* Current runstate of each CPU (updated automatically by the hypervisor). */
     1.8 +static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate);
     1.9 +
    1.10  /* Must be signed, as it's compared with s64 quantities which can be -ve. */
    1.11  #define NS_PER_TICK (1000000000LL/HZ)
    1.12  
    1.13 @@ -575,19 +578,36 @@ EXPORT_SYMBOL(profile_pc);
    1.14  irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
    1.15  {
    1.16  	s64 delta, delta_cpu, stolen, blocked;
    1.17 +	u64 sched_time;
    1.18  	int i, cpu = smp_processor_id();
    1.19  	struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
    1.20 -	struct vcpu_runstate_info runstate;
    1.21 +	struct vcpu_runstate_info *runstate = &per_cpu(runstate, cpu);
    1.22  
    1.23  	write_seqlock(&xtime_lock);
    1.24  
    1.25  	do {
    1.26  		get_time_values_from_xen();
    1.27  
    1.28 +		/* Obtain a consistent snapshot of elapsed wallclock cycles. */
    1.29  		delta = delta_cpu = 
    1.30  			shadow->system_timestamp + get_nsec_offset(shadow);
    1.31  		delta     -= processed_system_time;
    1.32  		delta_cpu -= per_cpu(processed_system_time, cpu);
    1.33 +
    1.34 +		/*
    1.35 +		 * Obtain a consistent snapshot of stolen/blocked cycles. We
    1.36 +		 * can use state_entry_time to detect if we get preempted here.
    1.37 +		 */
    1.38 +		do {
    1.39 +			sched_time = runstate->state_entry_time;
    1.40 +			barrier();
    1.41 +			stolen = runstate->time[RUNSTATE_runnable] +
    1.42 +				runstate->time[RUNSTATE_offline] -
    1.43 +				per_cpu(processed_stolen_time, cpu);
    1.44 +			blocked = runstate->time[RUNSTATE_blocked] -
    1.45 +				per_cpu(processed_blocked_time, cpu);
    1.46 +			barrier();
    1.47 +		} while (sched_time != runstate->state_entry_time);
    1.48  	}
    1.49  	while (!time_values_up_to_date(cpu));
    1.50  
    1.51 @@ -619,60 +639,44 @@ irqreturn_t timer_interrupt(int irq, voi
    1.52  
    1.53  	write_sequnlock(&xtime_lock);
    1.54  
    1.55 -	/* Obtain stolen/blocked cycles, if the hypervisor supports it. */
    1.56 -	if (HYPERVISOR_vcpu_op(VCPUOP_get_runstate_info,
    1.57 -			       cpu, &runstate) == 0) {
    1.58 -		/*
    1.59 -		 * Account stolen ticks.
    1.60 -		 * HACK: Passing NULL to account_steal_time()
    1.61 -		 * ensures that the ticks are accounted as stolen.
    1.62 -		 */
    1.63 -		stolen = runstate.time[RUNSTATE_runnable] +
    1.64 -			runstate.time[RUNSTATE_offline] -
    1.65 -			per_cpu(processed_stolen_time, cpu);
    1.66 -		if (unlikely(stolen < 0)) /* clock jitter */
    1.67 -			stolen = 0;
    1.68 +	/*
    1.69 +	 * Account stolen ticks.
    1.70 +	 * HACK: Passing NULL to account_steal_time()
    1.71 +	 * ensures that the ticks are accounted as stolen.
    1.72 +	 */
    1.73 +	if (stolen > 0) {
    1.74  		delta_cpu -= stolen;
    1.75 -		if (unlikely(delta_cpu < 0)) {
    1.76 -			stolen += delta_cpu;
    1.77 -			delta_cpu = 0;
    1.78 -		}
    1.79  		do_div(stolen, NS_PER_TICK);
    1.80  		per_cpu(processed_stolen_time, cpu) += stolen * NS_PER_TICK;
    1.81 +		per_cpu(processed_system_time, cpu) += stolen * NS_PER_TICK;
    1.82  		account_steal_time(NULL, (cputime_t)stolen);
    1.83 +	}
    1.84  
    1.85 -		/*
    1.86 -		 * Account blocked ticks.
    1.87 -		 * HACK: Passing idle_task to account_steal_time()
    1.88 -		 * ensures that the ticks are accounted as idle/wait.
    1.89 -		 */
    1.90 -		blocked = runstate.time[RUNSTATE_blocked] -
    1.91 -			per_cpu(processed_blocked_time, cpu);
    1.92 -		if (unlikely(blocked < 0)) /* clock jitter */
    1.93 -			blocked = 0;
    1.94 +	/*
    1.95 +	 * Account blocked ticks.
    1.96 +	 * HACK: Passing idle_task to account_steal_time()
    1.97 +	 * ensures that the ticks are accounted as idle/wait.
    1.98 +	 */
    1.99 +	if (blocked > 0) {
   1.100  		delta_cpu -= blocked;
   1.101 -		if (unlikely(delta_cpu < 0)) {
   1.102 -			blocked += delta_cpu;
   1.103 -			delta_cpu = 0;
   1.104 -		}
   1.105  		do_div(blocked, NS_PER_TICK);
   1.106  		per_cpu(processed_blocked_time, cpu) += blocked * NS_PER_TICK;
   1.107 +		per_cpu(processed_system_time, cpu)  += blocked * NS_PER_TICK;
   1.108  		account_steal_time(idle_task(cpu), (cputime_t)blocked);
   1.109 -
   1.110 -		per_cpu(processed_system_time, cpu) +=
   1.111 -			(stolen + blocked) * NS_PER_TICK;
   1.112  	}
   1.113  
   1.114 +	/* Account user/system ticks. */
   1.115  	if (delta_cpu > 0) {
   1.116  		do_div(delta_cpu, NS_PER_TICK);
   1.117 +		per_cpu(processed_system_time, cpu) += delta_cpu * NS_PER_TICK;
   1.118  		if (user_mode(regs))
   1.119  			account_user_time(current, (cputime_t)delta_cpu);
   1.120  		else
   1.121  			account_system_time(current, HARDIRQ_OFFSET,
   1.122  					    (cputime_t)delta_cpu);
   1.123 -		per_cpu(processed_system_time, cpu) += delta_cpu * NS_PER_TICK;
   1.124  	}
   1.125  
   1.126 +	/* Local timer processing (see update_process_times()). */
   1.127  	run_local_timers();
   1.128  	if (rcu_pending(cpu))
   1.129  		rcu_check_callbacks(cpu, user_mode(regs));
   1.130 @@ -684,14 +688,19 @@ irqreturn_t timer_interrupt(int irq, voi
   1.131  
   1.132  static void init_missing_ticks_accounting(int cpu)
   1.133  {
   1.134 -	struct vcpu_runstate_info runstate = { 0 };
   1.135 +	struct vcpu_register_runstate_memory_area area;
   1.136 +	struct vcpu_runstate_info *runstate = &per_cpu(runstate, cpu);
   1.137  
   1.138 -	HYPERVISOR_vcpu_op(VCPUOP_get_runstate_info, cpu, &runstate);
   1.139 +	memset(runstate, 0, sizeof(*runstate));
   1.140  
   1.141 -	per_cpu(processed_blocked_time, cpu) = runstate.time[RUNSTATE_blocked];
   1.142 +	area.addr.v = runstate;
   1.143 +	HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, cpu, &area);
   1.144 +
   1.145 +	per_cpu(processed_blocked_time, cpu) =
   1.146 +		runstate->time[RUNSTATE_blocked];
   1.147  	per_cpu(processed_stolen_time, cpu) =
   1.148 -		runstate.time[RUNSTATE_runnable] +
   1.149 -		runstate.time[RUNSTATE_offline];
   1.150 +		runstate->time[RUNSTATE_runnable] +
   1.151 +		runstate->time[RUNSTATE_offline];
   1.152  }
   1.153  
   1.154  /* not static: needed by APM */
     2.1 --- a/xen/arch/x86/domain.c	Sat Feb 25 20:07:28 2006 +0100
     2.2 +++ b/xen/arch/x86/domain.c	Sat Feb 25 21:28:27 2006 +0100
     2.3 @@ -784,6 +784,11 @@ void context_switch(struct vcpu *prev, s
     2.4  
     2.5      context_saved(prev);
     2.6  
     2.7 +    /* Update per-VCPU guest runstate shared memory area (if registered). */
     2.8 +    if ( next->runstate_guest != NULL )
     2.9 +        __copy_to_user(next->runstate_guest, &next->runstate,
    2.10 +                       sizeof(next->runstate));
    2.11 +
    2.12      schedule_tail(next);
    2.13      BUG();
    2.14  }
     3.1 --- a/xen/common/domain.c	Sat Feb 25 20:07:28 2006 +0100
     3.2 +++ b/xen/common/domain.c	Sat Feb 25 21:28:27 2006 +0100
     3.3 @@ -461,6 +461,28 @@ long do_vcpu_op(int cmd, int vcpuid, voi
     3.4          break;
     3.5      }
     3.6  
     3.7 +    case VCPUOP_register_runstate_memory_area:
     3.8 +    {
     3.9 +        struct vcpu_register_runstate_memory_area area;
    3.10 +
    3.11 +        rc = -EINVAL;
    3.12 +        if ( v != current )
    3.13 +            break;
    3.14 +
    3.15 +        rc = -EFAULT;
    3.16 +        if ( copy_from_user(&area, arg, sizeof(area)) )
    3.17 +            break;
    3.18 +
    3.19 +        if ( !access_ok(area.addr.v, sizeof(*area.addr.v)) )
    3.20 +            break;
    3.21 +
    3.22 +        rc = 0;
    3.23 +        v->runstate_guest = area.addr.v;
    3.24 +        __copy_to_user(v->runstate_guest, &v->runstate, sizeof(v->runstate));
    3.25 +
    3.26 +        break;
    3.27 +    }
    3.28 +
    3.29      default:
    3.30          rc = -ENOSYS;
    3.31          break;
     4.1 --- a/xen/include/public/vcpu.h	Sat Feb 25 20:07:28 2006 +0100
     4.2 +++ b/xen/include/public/vcpu.h	Sat Feb 25 21:28:27 2006 +0100
     4.3 @@ -53,7 +53,7 @@
     4.4  
     4.5  /*
     4.6   * Return information about the state and running time of a VCPU.
     4.7 - * @extra_arg == pointer to xen_vcpu_info structure.
     4.8 + * @extra_arg == pointer to vcpu_runstate_info structure.
     4.9   */
    4.10  #define VCPUOP_get_runstate_info    4
    4.11  typedef struct vcpu_runstate_info {
    4.12 @@ -85,6 +85,27 @@ typedef struct vcpu_runstate_info {
    4.13   */
    4.14  #define RUNSTATE_offline  3
    4.15  
    4.16 +/*
    4.17 + * Register a shared memory area from which the guest may obtain its own
    4.18 + * runstate information without needing to execute a hypercall.
    4.19 + * Notes:
    4.20 + *  1. The registered address may be virtual or physical, depending on the
    4.21 + *     platform. The virtual address should be registered on x86 systems.
    4.22 + *  2. Only one shared area may be registered per VCPU. The shared area is
    4.23 + *     updated by the hypervisor each time the VCPU is scheduled. Thus
    4.24 + *     runstate.state will always be RUNSTATE_running and
    4.25 + *     runstate.state_entry_time will indicate the system time at which the
    4.26 + *     VCPU was last scheduled to run.
    4.27 + * @extra_arg == pointer to vcpu_register_runstate_memory_area structure.
    4.28 + */
    4.29 +#define VCPUOP_register_runstate_memory_area 5
    4.30 +typedef struct vcpu_register_runstate_memory_area {
    4.31 +    union {
    4.32 +        struct vcpu_runstate_info *v;
    4.33 +        uint64_t p;
    4.34 +    } addr;
    4.35 +} vcpu_register_runstate_memory_area_t;
    4.36 +
    4.37  #endif /* __XEN_PUBLIC_VCPU_H__ */
    4.38  
    4.39  /*
     5.1 --- a/xen/include/xen/sched.h	Sat Feb 25 20:07:28 2006 +0100
     5.2 +++ b/xen/include/xen/sched.h	Sat Feb 25 21:28:27 2006 +0100
     5.3 @@ -70,6 +70,7 @@ struct vcpu
     5.4      void            *sched_priv;    /* scheduler-specific data */
     5.5  
     5.6      struct vcpu_runstate_info runstate;
     5.7 +    struct vcpu_runstate_info *runstate_guest; /* guest address */
     5.8  
     5.9      unsigned long    vcpu_flags;
    5.10