ia64/xen-unstable
changeset 9024:d0b7281556f2
New VCPUOP_register_runstate_memory_area hypercall. Avoids
need for a hypercall in the guest timer interrupt handler.
Cleaned up stolen/blocked tick handling in Linux.
Signed-off-by: Keir Fraser <keir@xensource.com>
need for a hypercall in the guest timer interrupt handler.
Cleaned up stolen/blocked tick handling in Linux.
Signed-off-by: Keir Fraser <keir@xensource.com>
author | kaf24@firebug.cl.cam.ac.uk |
---|---|
date | Sat Feb 25 21:28:27 2006 +0100 (2006-02-25) |
parents | c375c2109452 |
children | 0d7c3c47ad20 |
files | linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c xen/arch/x86/domain.c xen/common/domain.c xen/include/public/vcpu.h xen/include/xen/sched.h |
line diff
1.1 --- a/linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c Sat Feb 25 20:07:28 2006 +0100 1.2 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c Sat Feb 25 21:28:27 2006 +0100 1.3 @@ -130,6 +130,9 @@ static DEFINE_PER_CPU(u64, processed_sys 1.4 static DEFINE_PER_CPU(u64, processed_stolen_time); 1.5 static DEFINE_PER_CPU(u64, processed_blocked_time); 1.6 1.7 +/* Current runstate of each CPU (updated automatically by the hypervisor). */ 1.8 +static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate); 1.9 + 1.10 /* Must be signed, as it's compared with s64 quantities which can be -ve. */ 1.11 #define NS_PER_TICK (1000000000LL/HZ) 1.12 1.13 @@ -575,19 +578,36 @@ EXPORT_SYMBOL(profile_pc); 1.14 irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) 1.15 { 1.16 s64 delta, delta_cpu, stolen, blocked; 1.17 + u64 sched_time; 1.18 int i, cpu = smp_processor_id(); 1.19 struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu); 1.20 - struct vcpu_runstate_info runstate; 1.21 + struct vcpu_runstate_info *runstate = &per_cpu(runstate, cpu); 1.22 1.23 write_seqlock(&xtime_lock); 1.24 1.25 do { 1.26 get_time_values_from_xen(); 1.27 1.28 + /* Obtain a consistent snapshot of elapsed wallclock cycles. */ 1.29 delta = delta_cpu = 1.30 shadow->system_timestamp + get_nsec_offset(shadow); 1.31 delta -= processed_system_time; 1.32 delta_cpu -= per_cpu(processed_system_time, cpu); 1.33 + 1.34 + /* 1.35 + * Obtain a consistent snapshot of stolen/blocked cycles. We 1.36 + * can use state_entry_time to detect if we get preempted here. 1.37 + */ 1.38 + do { 1.39 + sched_time = runstate->state_entry_time; 1.40 + barrier(); 1.41 + stolen = runstate->time[RUNSTATE_runnable] + 1.42 + runstate->time[RUNSTATE_offline] - 1.43 + per_cpu(processed_stolen_time, cpu); 1.44 + blocked = runstate->time[RUNSTATE_blocked] - 1.45 + per_cpu(processed_blocked_time, cpu); 1.46 + barrier(); 1.47 + } while (sched_time != runstate->state_entry_time); 1.48 } 1.49 while (!time_values_up_to_date(cpu)); 1.50 1.51 @@ -619,60 +639,44 @@ irqreturn_t timer_interrupt(int irq, voi 1.52 1.53 write_sequnlock(&xtime_lock); 1.54 1.55 - /* Obtain stolen/blocked cycles, if the hypervisor supports it. */ 1.56 - if (HYPERVISOR_vcpu_op(VCPUOP_get_runstate_info, 1.57 - cpu, &runstate) == 0) { 1.58 - /* 1.59 - * Account stolen ticks. 1.60 - * HACK: Passing NULL to account_steal_time() 1.61 - * ensures that the ticks are accounted as stolen. 1.62 - */ 1.63 - stolen = runstate.time[RUNSTATE_runnable] + 1.64 - runstate.time[RUNSTATE_offline] - 1.65 - per_cpu(processed_stolen_time, cpu); 1.66 - if (unlikely(stolen < 0)) /* clock jitter */ 1.67 - stolen = 0; 1.68 + /* 1.69 + * Account stolen ticks. 1.70 + * HACK: Passing NULL to account_steal_time() 1.71 + * ensures that the ticks are accounted as stolen. 1.72 + */ 1.73 + if (stolen > 0) { 1.74 delta_cpu -= stolen; 1.75 - if (unlikely(delta_cpu < 0)) { 1.76 - stolen += delta_cpu; 1.77 - delta_cpu = 0; 1.78 - } 1.79 do_div(stolen, NS_PER_TICK); 1.80 per_cpu(processed_stolen_time, cpu) += stolen * NS_PER_TICK; 1.81 + per_cpu(processed_system_time, cpu) += stolen * NS_PER_TICK; 1.82 account_steal_time(NULL, (cputime_t)stolen); 1.83 + } 1.84 1.85 - /* 1.86 - * Account blocked ticks. 1.87 - * HACK: Passing idle_task to account_steal_time() 1.88 - * ensures that the ticks are accounted as idle/wait. 1.89 - */ 1.90 - blocked = runstate.time[RUNSTATE_blocked] - 1.91 - per_cpu(processed_blocked_time, cpu); 1.92 - if (unlikely(blocked < 0)) /* clock jitter */ 1.93 - blocked = 0; 1.94 + /* 1.95 + * Account blocked ticks. 1.96 + * HACK: Passing idle_task to account_steal_time() 1.97 + * ensures that the ticks are accounted as idle/wait. 1.98 + */ 1.99 + if (blocked > 0) { 1.100 delta_cpu -= blocked; 1.101 - if (unlikely(delta_cpu < 0)) { 1.102 - blocked += delta_cpu; 1.103 - delta_cpu = 0; 1.104 - } 1.105 do_div(blocked, NS_PER_TICK); 1.106 per_cpu(processed_blocked_time, cpu) += blocked * NS_PER_TICK; 1.107 + per_cpu(processed_system_time, cpu) += blocked * NS_PER_TICK; 1.108 account_steal_time(idle_task(cpu), (cputime_t)blocked); 1.109 - 1.110 - per_cpu(processed_system_time, cpu) += 1.111 - (stolen + blocked) * NS_PER_TICK; 1.112 } 1.113 1.114 + /* Account user/system ticks. */ 1.115 if (delta_cpu > 0) { 1.116 do_div(delta_cpu, NS_PER_TICK); 1.117 + per_cpu(processed_system_time, cpu) += delta_cpu * NS_PER_TICK; 1.118 if (user_mode(regs)) 1.119 account_user_time(current, (cputime_t)delta_cpu); 1.120 else 1.121 account_system_time(current, HARDIRQ_OFFSET, 1.122 (cputime_t)delta_cpu); 1.123 - per_cpu(processed_system_time, cpu) += delta_cpu * NS_PER_TICK; 1.124 } 1.125 1.126 + /* Local timer processing (see update_process_times()). */ 1.127 run_local_timers(); 1.128 if (rcu_pending(cpu)) 1.129 rcu_check_callbacks(cpu, user_mode(regs)); 1.130 @@ -684,14 +688,19 @@ irqreturn_t timer_interrupt(int irq, voi 1.131 1.132 static void init_missing_ticks_accounting(int cpu) 1.133 { 1.134 - struct vcpu_runstate_info runstate = { 0 }; 1.135 + struct vcpu_register_runstate_memory_area area; 1.136 + struct vcpu_runstate_info *runstate = &per_cpu(runstate, cpu); 1.137 1.138 - HYPERVISOR_vcpu_op(VCPUOP_get_runstate_info, cpu, &runstate); 1.139 + memset(runstate, 0, sizeof(*runstate)); 1.140 1.141 - per_cpu(processed_blocked_time, cpu) = runstate.time[RUNSTATE_blocked]; 1.142 + area.addr.v = runstate; 1.143 + HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, cpu, &area); 1.144 + 1.145 + per_cpu(processed_blocked_time, cpu) = 1.146 + runstate->time[RUNSTATE_blocked]; 1.147 per_cpu(processed_stolen_time, cpu) = 1.148 - runstate.time[RUNSTATE_runnable] + 1.149 - runstate.time[RUNSTATE_offline]; 1.150 + runstate->time[RUNSTATE_runnable] + 1.151 + runstate->time[RUNSTATE_offline]; 1.152 } 1.153 1.154 /* not static: needed by APM */
2.1 --- a/xen/arch/x86/domain.c Sat Feb 25 20:07:28 2006 +0100 2.2 +++ b/xen/arch/x86/domain.c Sat Feb 25 21:28:27 2006 +0100 2.3 @@ -784,6 +784,11 @@ void context_switch(struct vcpu *prev, s 2.4 2.5 context_saved(prev); 2.6 2.7 + /* Update per-VCPU guest runstate shared memory area (if registered). */ 2.8 + if ( next->runstate_guest != NULL ) 2.9 + __copy_to_user(next->runstate_guest, &next->runstate, 2.10 + sizeof(next->runstate)); 2.11 + 2.12 schedule_tail(next); 2.13 BUG(); 2.14 }
3.1 --- a/xen/common/domain.c Sat Feb 25 20:07:28 2006 +0100 3.2 +++ b/xen/common/domain.c Sat Feb 25 21:28:27 2006 +0100 3.3 @@ -461,6 +461,28 @@ long do_vcpu_op(int cmd, int vcpuid, voi 3.4 break; 3.5 } 3.6 3.7 + case VCPUOP_register_runstate_memory_area: 3.8 + { 3.9 + struct vcpu_register_runstate_memory_area area; 3.10 + 3.11 + rc = -EINVAL; 3.12 + if ( v != current ) 3.13 + break; 3.14 + 3.15 + rc = -EFAULT; 3.16 + if ( copy_from_user(&area, arg, sizeof(area)) ) 3.17 + break; 3.18 + 3.19 + if ( !access_ok(area.addr.v, sizeof(*area.addr.v)) ) 3.20 + break; 3.21 + 3.22 + rc = 0; 3.23 + v->runstate_guest = area.addr.v; 3.24 + __copy_to_user(v->runstate_guest, &v->runstate, sizeof(v->runstate)); 3.25 + 3.26 + break; 3.27 + } 3.28 + 3.29 default: 3.30 rc = -ENOSYS; 3.31 break;
4.1 --- a/xen/include/public/vcpu.h Sat Feb 25 20:07:28 2006 +0100 4.2 +++ b/xen/include/public/vcpu.h Sat Feb 25 21:28:27 2006 +0100 4.3 @@ -53,7 +53,7 @@ 4.4 4.5 /* 4.6 * Return information about the state and running time of a VCPU. 4.7 - * @extra_arg == pointer to xen_vcpu_info structure. 4.8 + * @extra_arg == pointer to vcpu_runstate_info structure. 4.9 */ 4.10 #define VCPUOP_get_runstate_info 4 4.11 typedef struct vcpu_runstate_info { 4.12 @@ -85,6 +85,27 @@ typedef struct vcpu_runstate_info { 4.13 */ 4.14 #define RUNSTATE_offline 3 4.15 4.16 +/* 4.17 + * Register a shared memory area from which the guest may obtain its own 4.18 + * runstate information without needing to execute a hypercall. 4.19 + * Notes: 4.20 + * 1. The registered address may be virtual or physical, depending on the 4.21 + * platform. The virtual address should be registered on x86 systems. 4.22 + * 2. Only one shared area may be registered per VCPU. The shared area is 4.23 + * updated by the hypervisor each time the VCPU is scheduled. Thus 4.24 + * runstate.state will always be RUNSTATE_running and 4.25 + * runstate.state_entry_time will indicate the system time at which the 4.26 + * VCPU was last scheduled to run. 4.27 + * @extra_arg == pointer to vcpu_register_runstate_memory_area structure. 4.28 + */ 4.29 +#define VCPUOP_register_runstate_memory_area 5 4.30 +typedef struct vcpu_register_runstate_memory_area { 4.31 + union { 4.32 + struct vcpu_runstate_info *v; 4.33 + uint64_t p; 4.34 + } addr; 4.35 +} vcpu_register_runstate_memory_area_t; 4.36 + 4.37 #endif /* __XEN_PUBLIC_VCPU_H__ */ 4.38 4.39 /*
5.1 --- a/xen/include/xen/sched.h Sat Feb 25 20:07:28 2006 +0100 5.2 +++ b/xen/include/xen/sched.h Sat Feb 25 21:28:27 2006 +0100 5.3 @@ -70,6 +70,7 @@ struct vcpu 5.4 void *sched_priv; /* scheduler-specific data */ 5.5 5.6 struct vcpu_runstate_info runstate; 5.7 + struct vcpu_runstate_info *runstate_guest; /* guest address */ 5.8 5.9 unsigned long vcpu_flags; 5.10