ia64/xen-unstable

changeset 5828:43564304cf94

First cut of new time interfaces and synchronisation mechanisms.
Based on an initial patch from Don Fry at IBM.
Still TODO:
1. Testing
2. NTP synchronisation
3. Fix wallclock interface a bit
4. Support for platform timers other than PIT (e.g., HPET, IBM Cyclone)
5. Scale 64-bit TSC diffs instead of 32-bit, just for sanity
6. Error-correcting scale factor is still slightly wrong
6. More testing
Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Mon Jul 18 20:22:11 2005 +0000 (2005-07-18)
parents 9697bc63d403
children bf68b5fcbc9b 390e4d63cdb1
files linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile xen/arch/x86/apic.c xen/arch/x86/i8259.c xen/arch/x86/setup.c xen/arch/x86/smpboot.c xen/arch/x86/time.c xen/arch/x86/vmx_intercept.c xen/common/ac_timer.c xen/common/domain.c xen/common/page_alloc.c xen/drivers/char/console.c xen/include/asm-x86/time.h xen/include/public/xen.h xen/include/xen/sched.h xen/include/xen/time.h
line diff
     1.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile	Sun Jul 17 14:16:21 2005 +0000
     1.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile	Mon Jul 18 20:22:11 2005 +0000
     1.3 @@ -19,7 +19,7 @@ c-obj-y	:= semaphore.o vm86.o \
     1.4  s-obj-y	:=
     1.5  
     1.6  obj-y				+= cpu/
     1.7 -obj-y				+= timers/
     1.8 +#obj-y				+= timers/
     1.9  obj-$(CONFIG_ACPI_BOOT)		+= acpi/
    1.10  #c-obj-$(CONFIG_X86_BIOS_REBOOT)	+= reboot.o
    1.11  c-obj-$(CONFIG_MCA)		+= mca.o
     2.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c	Sun Jul 17 14:16:21 2005 +0000
     2.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c	Mon Jul 18 20:22:11 2005 +0000
     2.3 @@ -104,25 +104,17 @@ extern struct timer_opts timer_tsc;
     2.4  struct timer_opts *cur_timer = &timer_tsc;
     2.5  
     2.6  /* These are peridically updated in shared_info, and then copied here. */
     2.7 -u32 shadow_tsc_stamp;
     2.8 -u64 shadow_system_time;
     2.9 -static u32 shadow_time_version;
    2.10 +struct shadow_time_info {
    2.11 +	u64 tsc_timestamp;     /* TSC at last update of time vals.  */
    2.12 +	u64 system_timestamp;  /* Time, in nanosecs, since boot.    */
    2.13 +	u32 tsc_to_nsec_mul;
    2.14 +	u32 tsc_to_usec_mul;
    2.15 +	int tsc_shift;
    2.16 +	u32 version;
    2.17 +};
    2.18 +static DEFINE_PER_CPU(struct shadow_time_info, shadow_time);
    2.19  static struct timeval shadow_tv;
    2.20  
    2.21 -/*
    2.22 - * We use this to ensure that gettimeofday() is monotonically increasing. We
    2.23 - * only break this guarantee if the wall clock jumps backwards "a long way".
    2.24 - */
    2.25 -static struct timeval last_seen_tv = {0,0};
    2.26 -
    2.27 -#ifdef CONFIG_XEN_PRIVILEGED_GUEST
    2.28 -/* Periodically propagate synchronised time base to the RTC and to Xen. */
    2.29 -static long last_rtc_update, last_update_to_xen;
    2.30 -#endif
    2.31 -
    2.32 -/* Periodically take synchronised time base from Xen, if we need it. */
    2.33 -static long last_update_from_xen;   /* UTC seconds when last read Xen clock. */
    2.34 -
    2.35  /* Keep track of last time we did processing/updating of jiffies and xtime. */
    2.36  static u64 processed_system_time;   /* System time (ns) at last processing. */
    2.37  static DEFINE_PER_CPU(u64, processed_system_time);
    2.38 @@ -164,26 +156,147 @@ static int __init __independent_wallcloc
    2.39  #define INDEPENDENT_WALLCLOCK() \
    2.40      (independent_wallclock || (xen_start_info.flags & SIF_INITDOMAIN))
    2.41  
    2.42 +int tsc_disable __initdata = 0;
    2.43 +
    2.44 +static void delay_tsc(unsigned long loops)
    2.45 +{
    2.46 +	unsigned long bclock, now;
    2.47 +	
    2.48 +	rdtscl(bclock);
    2.49 +	do
    2.50 +	{
    2.51 +		rep_nop();
    2.52 +		rdtscl(now);
    2.53 +	} while ((now-bclock) < loops);
    2.54 +}
    2.55 +
    2.56 +struct timer_opts timer_tsc = {
    2.57 +	.name = "tsc",
    2.58 +	.delay = delay_tsc,
    2.59 +};
    2.60 +
    2.61 +static inline u32 down_shift(u64 time, int shift)
    2.62 +{
    2.63 +	if ( shift < 0 )
    2.64 +		return (u32)(time >> -shift);
    2.65 +	return (u32)((u32)time << shift);
    2.66 +}
    2.67 +
    2.68 +/*
    2.69 + * 32-bit multiplication of integer multiplicand and fractional multiplier
    2.70 + * yielding 32-bit integer product.
    2.71 + */
    2.72 +static inline u32 mul_frac(u32 multiplicand, u32 multiplier)
    2.73 +{
    2.74 +	u32 product_int, product_frac;
    2.75 +	__asm__ (
    2.76 +		"mul %3"
    2.77 +		: "=a" (product_frac), "=d" (product_int)
    2.78 +		: "0" (multiplicand), "r" (multiplier) );
    2.79 +	return product_int;
    2.80 +}
    2.81 +
    2.82 +void init_cpu_khz(void)
    2.83 +{
    2.84 +	u64 __cpu_khz = 1000000ULL << 32;
    2.85 +	struct vcpu_time_info *info = &HYPERVISOR_shared_info->vcpu_time[0];
    2.86 +	do_div(__cpu_khz, info->tsc_to_system_mul);
    2.87 +	cpu_khz = down_shift(__cpu_khz, -info->tsc_shift);
    2.88 +	printk(KERN_INFO "Xen reported: %lu.%03lu MHz processor.\n",
    2.89 +	       cpu_khz / 1000, cpu_khz % 1000);
    2.90 +}
    2.91 +
    2.92 +static u64 get_nsec_offset(struct shadow_time_info *shadow)
    2.93 +{
    2.94 +	u64 now;
    2.95 +	u32 delta;
    2.96 +	rdtscll(now);
    2.97 +	delta = down_shift(now - shadow->tsc_timestamp, shadow->tsc_shift);
    2.98 +	return mul_frac(delta, shadow->tsc_to_nsec_mul);
    2.99 +}
   2.100 +
   2.101 +static unsigned long get_usec_offset(struct shadow_time_info *shadow)
   2.102 +{
   2.103 +	u64 now;
   2.104 +	u32 delta;
   2.105 +	rdtscll(now);
   2.106 +	delta = down_shift(now - shadow->tsc_timestamp, shadow->tsc_shift);
   2.107 +	return mul_frac(delta, shadow->tsc_to_usec_mul);
   2.108 +}
   2.109 +
   2.110 +static void update_wallclock(void)
   2.111 +{
   2.112 +	shared_info_t *s = HYPERVISOR_shared_info;
   2.113 +	long wtm_nsec;
   2.114 +	time_t wtm_sec, sec;
   2.115 +	s64 nsec;
   2.116 +
   2.117 +	shadow_tv.tv_sec  = s->wc_sec;
   2.118 +	shadow_tv.tv_usec = s->wc_usec;
   2.119 +
   2.120 +	if (INDEPENDENT_WALLCLOCK())
   2.121 +		return;
   2.122 +
   2.123 +	if ((time_status & STA_UNSYNC) != 0)
   2.124 +		return;
   2.125 +
   2.126 +	/* Adjust shadow for jiffies that haven't updated xtime yet. */
   2.127 +	shadow_tv.tv_usec -= 
   2.128 +		(jiffies - wall_jiffies) * (USEC_PER_SEC / HZ);
   2.129 +	HANDLE_USEC_UNDERFLOW(shadow_tv);
   2.130 +
   2.131 +	/* Update our unsynchronised xtime appropriately. */
   2.132 +	sec = shadow_tv.tv_sec;
   2.133 +	nsec = shadow_tv.tv_usec * NSEC_PER_USEC;
   2.134 +
   2.135 +	__normalize_time(&sec, &nsec);
   2.136 +	wtm_sec  = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
   2.137 +	wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
   2.138 +
   2.139 +	set_normalized_timespec(&xtime, sec, nsec);
   2.140 +	set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
   2.141 +}
   2.142 +
   2.143  /*
   2.144   * Reads a consistent set of time-base values from Xen, into a shadow data
   2.145   * area. Must be called with the xtime_lock held for writing.
   2.146   */
   2.147  static void __get_time_values_from_xen(void)
   2.148  {
   2.149 -	shared_info_t *s = HYPERVISOR_shared_info;
   2.150 +	shared_info_t           *s = HYPERVISOR_shared_info;
   2.151 +	struct vcpu_time_info   *src;
   2.152 +	struct shadow_time_info *dst;
   2.153 +
   2.154 +	src = &s->vcpu_time[smp_processor_id()];
   2.155 +	dst = &per_cpu(shadow_time, smp_processor_id());
   2.156  
   2.157  	do {
   2.158 -		shadow_time_version = s->time_version2;
   2.159 +		dst->version = src->time_version2;
   2.160  		rmb();
   2.161 -		shadow_tv.tv_sec    = s->wc_sec;
   2.162 -		shadow_tv.tv_usec   = s->wc_usec;
   2.163 -		shadow_tsc_stamp    = (u32)s->tsc_timestamp;
   2.164 -		shadow_system_time  = s->system_time;
   2.165 +		dst->tsc_timestamp     = src->tsc_timestamp;
   2.166 +		dst->system_timestamp  = src->system_time;
   2.167 +		dst->tsc_to_nsec_mul   = src->tsc_to_system_mul;
   2.168 +		dst->tsc_shift         = src->tsc_shift;
   2.169  		rmb();
   2.170  	}
   2.171 -	while (shadow_time_version != s->time_version1);
   2.172 +	while (dst->version != src->time_version1);
   2.173  
   2.174 -	cur_timer->mark_offset();
   2.175 +	dst->tsc_to_usec_mul = dst->tsc_to_nsec_mul / 1000;
   2.176 +
   2.177 +	if ((shadow_tv.tv_sec != s->wc_sec) ||
   2.178 +	    (shadow_tv.tv_usec != s->wc_usec))
   2.179 +		update_wallclock();
   2.180 +}
   2.181 +
   2.182 +static inline int time_values_up_to_date(int cpu)
   2.183 +{
   2.184 +	struct vcpu_time_info   *src;
   2.185 +	struct shadow_time_info *dst;
   2.186 +
   2.187 +	src = &HYPERVISOR_shared_info->vcpu_time[smp_processor_id()];
   2.188 +	dst = &per_cpu(shadow_time, smp_processor_id());
   2.189 +
   2.190 +	return (dst->version == src->time_version2);
   2.191  }
   2.192  
   2.193  #define TIME_VALUES_UP_TO_DATE \
   2.194 @@ -229,13 +342,18 @@ void do_gettimeofday(struct timeval *tv)
   2.195  	unsigned long max_ntp_tick;
   2.196  	unsigned long flags;
   2.197  	s64 nsec;
   2.198 +	unsigned int cpu;
   2.199 +	struct shadow_time_info *shadow;
   2.200 +
   2.201 +	cpu = get_cpu();
   2.202 +	shadow = &per_cpu(shadow_time, cpu);
   2.203  
   2.204  	do {
   2.205  		unsigned long lost;
   2.206  
   2.207  		seq = read_seqbegin(&xtime_lock);
   2.208  
   2.209 -		usec = cur_timer->get_offset();
   2.210 +		usec = get_usec_offset(shadow);
   2.211  		lost = jiffies - wall_jiffies;
   2.212  
   2.213  		/*
   2.214 @@ -256,11 +374,11 @@ void do_gettimeofday(struct timeval *tv)
   2.215  		sec = xtime.tv_sec;
   2.216  		usec += (xtime.tv_nsec / NSEC_PER_USEC);
   2.217  
   2.218 -		nsec = shadow_system_time - processed_system_time;
   2.219 +		nsec = shadow->system_timestamp - processed_system_time;
   2.220  		__normalize_time(&sec, &nsec);
   2.221  		usec += (long)nsec / NSEC_PER_USEC;
   2.222  
   2.223 -		if (unlikely(!TIME_VALUES_UP_TO_DATE)) {
   2.224 +		if (unlikely(!time_values_up_to_date(cpu))) {
   2.225  			/*
   2.226  			 * We may have blocked for a long time,
   2.227  			 * rendering our calculations invalid
   2.228 @@ -275,21 +393,13 @@ void do_gettimeofday(struct timeval *tv)
   2.229  		}
   2.230  	} while (read_seqretry(&xtime_lock, seq));
   2.231  
   2.232 +	put_cpu();
   2.233 +
   2.234  	while (usec >= USEC_PER_SEC) {
   2.235  		usec -= USEC_PER_SEC;
   2.236  		sec++;
   2.237  	}
   2.238  
   2.239 -	/* Ensure that time-of-day is monotonically increasing. */
   2.240 -	if ((sec < last_seen_tv.tv_sec) ||
   2.241 -	    ((sec == last_seen_tv.tv_sec) && (usec < last_seen_tv.tv_usec))) {
   2.242 -		sec = last_seen_tv.tv_sec;
   2.243 -		usec = last_seen_tv.tv_usec;
   2.244 -	} else {
   2.245 -		last_seen_tv.tv_sec = sec;
   2.246 -		last_seen_tv.tv_usec = usec;
   2.247 -	}
   2.248 -
   2.249  	tv->tv_sec = sec;
   2.250  	tv->tv_usec = usec;
   2.251  }
   2.252 @@ -302,6 +412,8 @@ int do_settimeofday(struct timespec *tv)
   2.253  	long wtm_nsec;
   2.254  	s64 nsec;
   2.255  	struct timespec xentime;
   2.256 +	unsigned int cpu;
   2.257 +	struct shadow_time_info *shadow;
   2.258  
   2.259  	if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
   2.260  		return -EINVAL;
   2.261 @@ -309,6 +421,9 @@ int do_settimeofday(struct timespec *tv)
   2.262  	if (!INDEPENDENT_WALLCLOCK())
   2.263  		return 0; /* Silent failure? */
   2.264  
   2.265 +	cpu = get_cpu();
   2.266 +	shadow = &per_cpu(shadow_time, cpu);
   2.267 +
   2.268  	write_seqlock_irq(&xtime_lock);
   2.269  
   2.270  	/*
   2.271 @@ -317,9 +432,8 @@ int do_settimeofday(struct timespec *tv)
   2.272  	 * be stale, so we can retry with fresh ones.
   2.273  	 */
   2.274   again:
   2.275 -	nsec = (s64)tv->tv_nsec -
   2.276 -	    ((s64)cur_timer->get_offset() * (s64)NSEC_PER_USEC);
   2.277 -	if (unlikely(!TIME_VALUES_UP_TO_DATE)) {
   2.278 +	nsec = (s64)tv->tv_nsec - (s64)get_nsec_offset(shadow);
   2.279 +	if (unlikely(!time_values_up_to_date(cpu))) {
   2.280  		__get_time_values_from_xen();
   2.281  		goto again;
   2.282  	}
   2.283 @@ -335,7 +449,7 @@ int do_settimeofday(struct timespec *tv)
   2.284  	 */
   2.285  	nsec -= (jiffies - wall_jiffies) * TICK_NSEC;
   2.286  
   2.287 -	nsec -= (shadow_system_time - processed_system_time);
   2.288 +	nsec -= (shadow->system_timestamp - processed_system_time);
   2.289  
   2.290  	__normalize_time(&sec, &nsec);
   2.291  	wtm_sec  = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
   2.292 @@ -349,24 +463,21 @@ int do_settimeofday(struct timespec *tv)
   2.293  	time_maxerror = NTP_PHASE_LIMIT;
   2.294  	time_esterror = NTP_PHASE_LIMIT;
   2.295  
   2.296 -	/* Reset all our running time counts. They make no sense now. */
   2.297 -	last_seen_tv.tv_sec = 0;
   2.298 -	last_update_from_xen = 0;
   2.299 -
   2.300  #ifdef CONFIG_XEN_PRIVILEGED_GUEST
   2.301  	if (xen_start_info.flags & SIF_INITDOMAIN) {
   2.302  		dom0_op_t op;
   2.303 -		last_rtc_update = last_update_to_xen = 0;
   2.304  		op.cmd = DOM0_SETTIME;
   2.305  		op.u.settime.secs        = xentime.tv_sec;
   2.306  		op.u.settime.usecs       = xentime.tv_nsec / NSEC_PER_USEC;
   2.307 -		op.u.settime.system_time = shadow_system_time;
   2.308 +		op.u.settime.system_time = shadow->system_timestamp;
   2.309  		write_sequnlock_irq(&xtime_lock);
   2.310  		HYPERVISOR_dom0_op(&op);
   2.311  	} else
   2.312  #endif
   2.313  		write_sequnlock_irq(&xtime_lock);
   2.314  
   2.315 +	put_cpu();
   2.316 +
   2.317  	clock_was_set();
   2.318  	return 0;
   2.319  }
   2.320 @@ -403,10 +514,31 @@ static int set_rtc_mmss(unsigned long no
   2.321   */
   2.322  unsigned long long monotonic_clock(void)
   2.323  {
   2.324 -	return cur_timer->monotonic_clock();
   2.325 +	int cpu = get_cpu();
   2.326 +	struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
   2.327 +	s64 off;
   2.328 +	unsigned long flags;
   2.329 +	
   2.330 +	for ( ; ; ) {
   2.331 +		off = get_nsec_offset(shadow);
   2.332 +		if (time_values_up_to_date(cpu))
   2.333 +			break;
   2.334 +		write_seqlock_irqsave(&xtime_lock, flags);
   2.335 +		__get_time_values_from_xen();
   2.336 +		write_sequnlock_irqrestore(&xtime_lock, flags);
   2.337 +	}
   2.338 +
   2.339 +	put_cpu();
   2.340 +
   2.341 +	return shadow->system_timestamp + off;
   2.342  }
   2.343  EXPORT_SYMBOL(monotonic_clock);
   2.344  
   2.345 +unsigned long long sched_clock(void)
   2.346 +{
   2.347 +	return monotonic_clock();
   2.348 +}
   2.349 +
   2.350  #if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER)
   2.351  unsigned long profile_pc(struct pt_regs *regs)
   2.352  {
   2.353 @@ -427,27 +559,26 @@ EXPORT_SYMBOL(profile_pc);
   2.354  static inline void do_timer_interrupt(int irq, void *dev_id,
   2.355  					struct pt_regs *regs)
   2.356  {
   2.357 -	time_t wtm_sec, sec;
   2.358 -	s64 delta, delta_cpu, nsec;
   2.359 -	long sec_diff, wtm_nsec;
   2.360 +	s64 delta, delta_cpu;
   2.361  	int cpu = smp_processor_id();
   2.362 +	struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
   2.363  
   2.364  	do {
   2.365  		__get_time_values_from_xen();
   2.366  
   2.367 -		delta = delta_cpu = (s64)shadow_system_time +
   2.368 -			((s64)cur_timer->get_offset() * (s64)NSEC_PER_USEC);
   2.369 +		delta = delta_cpu = 
   2.370 +			shadow->system_timestamp + get_nsec_offset(shadow);
   2.371  		delta     -= processed_system_time;
   2.372  		delta_cpu -= per_cpu(processed_system_time, cpu);
   2.373  	}
   2.374 -	while (!TIME_VALUES_UP_TO_DATE);
   2.375 +	while (!time_values_up_to_date(cpu));
   2.376  
   2.377  	if (unlikely(delta < 0) || unlikely(delta_cpu < 0)) {
   2.378  		printk("Timer ISR/%d: Time went backwards: "
   2.379  		       "delta=%lld cpu_delta=%lld shadow=%lld "
   2.380  		       "off=%lld processed=%lld cpu_processed=%lld\n",
   2.381 -		       cpu, delta, delta_cpu, shadow_system_time,
   2.382 -		       ((s64)cur_timer->get_offset() * (s64)NSEC_PER_USEC), 
   2.383 +		       cpu, delta, delta_cpu, shadow->system_timestamp,
   2.384 +		       (s64)get_nsec_offset(shadow),
   2.385  		       processed_system_time,
   2.386  		       per_cpu(processed_system_time, cpu));
   2.387  		for (cpu = 0; cpu < num_online_cpus(); cpu++)
   2.388 @@ -470,76 +601,6 @@ static inline void do_timer_interrupt(in
   2.389  		update_process_times(user_mode(regs));
   2.390  		profile_tick(CPU_PROFILING, regs);
   2.391  	}
   2.392 -
   2.393 -	if (cpu != 0)
   2.394 -		return;
   2.395 -
   2.396 -	/*
   2.397 -	 * Take synchronised time from Xen once a minute if we're not
   2.398 -	 * synchronised ourselves, and we haven't chosen to keep an independent
   2.399 -	 * time base.
   2.400 -	 */
   2.401 -	if (!INDEPENDENT_WALLCLOCK() &&
   2.402 -	    ((time_status & STA_UNSYNC) != 0) &&
   2.403 -	    (xtime.tv_sec > (last_update_from_xen + 60))) {
   2.404 -		/* Adjust shadow for jiffies that haven't updated xtime yet. */
   2.405 -		shadow_tv.tv_usec -= 
   2.406 -			(jiffies - wall_jiffies) * (USEC_PER_SEC / HZ);
   2.407 -		HANDLE_USEC_UNDERFLOW(shadow_tv);
   2.408 -
   2.409 -		/*
   2.410 -		 * Reset our running time counts if they are invalidated by
   2.411 -		 * a warp backwards of more than 500ms.
   2.412 -		 */
   2.413 -		sec_diff = xtime.tv_sec - shadow_tv.tv_sec;
   2.414 -		if (unlikely(abs(sec_diff) > 1) ||
   2.415 -		    unlikely(((sec_diff * USEC_PER_SEC) +
   2.416 -			      (xtime.tv_nsec / NSEC_PER_USEC) -
   2.417 -			      shadow_tv.tv_usec) > 500000)) {
   2.418 -#ifdef CONFIG_XEN_PRIVILEGED_GUEST
   2.419 -			last_rtc_update = last_update_to_xen = 0;
   2.420 -#endif
   2.421 -			last_seen_tv.tv_sec = 0;
   2.422 -		}
   2.423 -
   2.424 -		/* Update our unsynchronised xtime appropriately. */
   2.425 -		sec = shadow_tv.tv_sec;
   2.426 -		nsec = shadow_tv.tv_usec * NSEC_PER_USEC;
   2.427 -
   2.428 -		__normalize_time(&sec, &nsec);
   2.429 -		wtm_sec  = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
   2.430 -		wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
   2.431 -
   2.432 -		set_normalized_timespec(&xtime, sec, nsec);
   2.433 -		set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
   2.434 -
   2.435 -		last_update_from_xen = sec;
   2.436 -	}
   2.437 -
   2.438 -#ifdef CONFIG_XEN_PRIVILEGED_GUEST
   2.439 -	if (!(xen_start_info.flags & SIF_INITDOMAIN))
   2.440 -		return;
   2.441 -
   2.442 -	/* Send synchronised time to Xen approximately every minute. */
   2.443 -	if (((time_status & STA_UNSYNC) == 0) &&
   2.444 -	    (xtime.tv_sec > (last_update_to_xen + 60))) {
   2.445 -		dom0_op_t op;
   2.446 -		struct timeval tv;
   2.447 -
   2.448 -		tv.tv_sec   = xtime.tv_sec;
   2.449 -		tv.tv_usec  = xtime.tv_nsec / NSEC_PER_USEC;
   2.450 -		tv.tv_usec += (jiffies - wall_jiffies) * (USEC_PER_SEC/HZ);
   2.451 -		HANDLE_USEC_OVERFLOW(tv);
   2.452 -
   2.453 -		op.cmd = DOM0_SETTIME;
   2.454 -		op.u.settime.secs        = tv.tv_sec;
   2.455 -		op.u.settime.usecs       = tv.tv_usec;
   2.456 -		op.u.settime.system_time = shadow_system_time;
   2.457 -		HYPERVISOR_dom0_op(&op);
   2.458 -
   2.459 -		last_update_to_xen = xtime.tv_sec;
   2.460 -	}
   2.461 -#endif
   2.462  }
   2.463  
   2.464  /*
   2.465 @@ -731,12 +792,10 @@ void __init time_init(void)
   2.466  	xtime.tv_nsec = shadow_tv.tv_usec * NSEC_PER_USEC;
   2.467  	set_normalized_timespec(&wall_to_monotonic,
   2.468  		-xtime.tv_sec, -xtime.tv_nsec);
   2.469 -	processed_system_time = shadow_system_time;
   2.470 +	processed_system_time = per_cpu(shadow_time, 0).system_timestamp;
   2.471  	per_cpu(processed_system_time, 0) = processed_system_time;
   2.472  
   2.473 -	if (timer_tsc_init.init(NULL) != 0)
   2.474 -		BUG();
   2.475 -	printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name);
   2.476 +	init_cpu_khz();
   2.477  
   2.478  #if defined(__x86_64__)
   2.479  	vxtime.mode = VXTIME_TSC;
   2.480 @@ -807,21 +866,15 @@ void time_suspend(void)
   2.481  /* No locking required. We are only CPU running, and interrupts are off. */
   2.482  void time_resume(void)
   2.483  {
   2.484 -	if (timer_tsc_init.init(NULL) != 0)
   2.485 -		BUG();
   2.486 +	init_cpu_khz();
   2.487  
   2.488  	/* Get timebases for new environment. */ 
   2.489  	__get_time_values_from_xen();
   2.490  
   2.491  	/* Reset our own concept of passage of system time. */
   2.492 -	processed_system_time = shadow_system_time;
   2.493 +	processed_system_time =
   2.494 +		per_cpu(shadow_time, smp_processor_id()).system_timestamp;
   2.495  	per_cpu(processed_system_time, 0) = processed_system_time;
   2.496 -
   2.497 -	/* Accept a warp in UTC (wall-clock) time. */
   2.498 -	last_seen_tv.tv_sec = 0;
   2.499 -
   2.500 -	/* Make sure we resync UTC time with Xen on next timer interrupt. */
   2.501 -	last_update_from_xen = 0;
   2.502  }
   2.503  
   2.504  #ifdef CONFIG_SMP
   2.505 @@ -832,7 +885,8 @@ void local_setup_timer(void)
   2.506  
   2.507  	do {
   2.508  		seq = read_seqbegin(&xtime_lock);
   2.509 -		per_cpu(processed_system_time, cpu) = shadow_system_time;
   2.510 +		per_cpu(processed_system_time, cpu) = 
   2.511 +			per_cpu(shadow_time, cpu).system_timestamp;
   2.512  	} while (read_seqretry(&xtime_lock, seq));
   2.513  
   2.514  	per_cpu(timer_irq, cpu) = bind_virq_to_irq(VIRQ_TIMER);
   2.515 @@ -861,3 +915,13 @@ static int __init xen_sysctl_init(void)
   2.516  	return 0;
   2.517  }
   2.518  __initcall(xen_sysctl_init);
   2.519 +
   2.520 +/*
   2.521 + * Local variables:
   2.522 + *  c-file-style: "linux"
   2.523 + *  indent-tabs-mode: t
   2.524 + *  c-indent-level: 8
   2.525 + *  c-basic-offset: 8
   2.526 + *  tab-width: 8
   2.527 + * End:
   2.528 + */
     3.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile	Sun Jul 17 14:16:21 2005 +0000
     3.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile	Mon Jul 18 20:22:11 2005 +0000
     3.3 @@ -15,7 +15,7 @@ c-obj-y	:= semaphore.o i387.o sys_x86_64
     3.4  		ptrace.o quirks.o syscall.o bootflag.o
     3.5  
     3.6  i386-obj-y			:= time.o
     3.7 -obj-y				+= ../../i386/kernel/timers/
     3.8 +#obj-y				+= ../../i386/kernel/timers/
     3.9  
    3.10  s-obj-y	:=
    3.11  
     4.1 --- a/xen/arch/x86/apic.c	Sun Jul 17 14:16:21 2005 +0000
     4.2 +++ b/xen/arch/x86/apic.c	Mon Jul 18 20:22:11 2005 +0000
     4.3 @@ -723,16 +723,8 @@ void __setup_APIC_LVTT(unsigned int cloc
     4.4  static void __init setup_APIC_timer(unsigned int clocks)
     4.5  {
     4.6      unsigned long flags;
     4.7 -    
     4.8      local_irq_save(flags);
     4.9 -
    4.10 -    /*
    4.11 -     * Wait for IRQ0's slice:
    4.12 -     */
    4.13 -    wait_timer_tick();
    4.14 -
    4.15      __setup_APIC_LVTT(clocks);
    4.16 -
    4.17      local_irq_restore(flags);
    4.18  }
    4.19  
     5.1 --- a/xen/arch/x86/i8259.c	Sun Jul 17 14:16:21 2005 +0000
     5.2 +++ b/xen/arch/x86/i8259.c	Mon Jul 18 20:22:11 2005 +0000
     5.3 @@ -19,7 +19,7 @@
     5.4  #include <asm/bitops.h>
     5.5  #include <xen/delay.h>
     5.6  #include <asm/apic.h>
     5.7 -
     5.8 +#include <io_ports.h>
     5.9  
    5.10  /*
    5.11   * Common place to define all x86 IRQ vectors
    5.12 @@ -395,9 +395,9 @@ void __init init_IRQ(void)
    5.13      /* Set the clock to HZ Hz */
    5.14  #define CLOCK_TICK_RATE 1193180 /* crystal freq (Hz) */
    5.15  #define LATCH (((CLOCK_TICK_RATE)+(HZ/2))/HZ)
    5.16 -    outb_p(0x34,0x43);           /* binary, mode 2, LSB/MSB, ch 0 */
    5.17 -    outb_p(LATCH & 0xff , 0x40); /* LSB */
    5.18 -    outb(LATCH >> 8 , 0x40);     /* MSB */
    5.19 +    outb_p(0x34, PIT_MODE);        /* binary, mode 2, LSB/MSB, ch 0 */
    5.20 +    outb_p(LATCH & 0xff, PIT_CH0); /* LSB */
    5.21 +    outb(LATCH >> 8, PIT_CH0);     /* MSB */
    5.22  
    5.23      setup_irq(2, &cascade);
    5.24  }
     6.1 --- a/xen/arch/x86/smpboot.c	Sun Jul 17 14:16:21 2005 +0000
     6.2 +++ b/xen/arch/x86/smpboot.c	Mon Jul 18 20:22:11 2005 +0000
     6.3 @@ -40,6 +40,7 @@
     6.4  #include <xen/sched.h>
     6.5  #include <xen/irq.h>
     6.6  #include <xen/delay.h>
     6.7 +#include <xen/softirq.h>
     6.8  #include <asm/current.h>
     6.9  #include <asm/mc146818rtc.h>
    6.10  #include <asm/desc.h>
    6.11 @@ -406,6 +407,7 @@ void __init smp_callin(void)
    6.12  	 */
    6.13  	if (cpu_has_tsc && cpu_khz)
    6.14  		synchronize_tsc_ap();
    6.15 +	calibrate_tsc_ap();
    6.16  }
    6.17  
    6.18  int cpucount;
    6.19 @@ -465,6 +467,8 @@ void __init start_secondary(void *unused
    6.20  	/* We can take interrupts now: we're officially "up". */
    6.21  	local_irq_enable();
    6.22  
    6.23 +        init_percpu_time();
    6.24 +
    6.25  	wmb();
    6.26  	startup_cpu_idle_loop();
    6.27  }
    6.28 @@ -1149,6 +1153,7 @@ static void __init smp_boot_cpus(unsigne
    6.29  	 */
    6.30  	if (cpu_has_tsc && cpucount && cpu_khz)
    6.31  		synchronize_tsc_bp();
    6.32 +	calibrate_tsc_bp();
    6.33  }
    6.34  
    6.35  /* These are wrappers to interface to the new boot process.  Someone
    6.36 @@ -1167,22 +1172,21 @@ void __devinit smp_prepare_boot_cpu(void
    6.37  int __devinit __cpu_up(unsigned int cpu)
    6.38  {
    6.39  	/* This only works at boot for x86.  See "rewrite" above. */
    6.40 -	if (cpu_isset(cpu, smp_commenced_mask)) {
    6.41 -		local_irq_enable();
    6.42 +	if (cpu_isset(cpu, smp_commenced_mask))
    6.43  		return -ENOSYS;
    6.44 -	}
    6.45  
    6.46  	/* In case one didn't come up */
    6.47 -	if (!cpu_isset(cpu, cpu_callin_map)) {
    6.48 -		local_irq_enable();
    6.49 +	if (!cpu_isset(cpu, cpu_callin_map))
    6.50  		return -EIO;
    6.51 +
    6.52 +	/* Unleash the CPU! */
    6.53 +	cpu_set(cpu, smp_commenced_mask);
    6.54 +	while (!cpu_isset(cpu, cpu_online_map)) {
    6.55 +		mb();
    6.56 +		if (softirq_pending(0))
    6.57 +			do_softirq();
    6.58  	}
    6.59  
    6.60 -	local_irq_enable();
    6.61 -	/* Unleash the CPU! */
    6.62 -	cpu_set(cpu, smp_commenced_mask);
    6.63 -	while (!cpu_isset(cpu, cpu_online_map))
    6.64 -		mb();
    6.65  	return 0;
    6.66  }
    6.67  
     7.1 --- a/xen/arch/x86/time.c	Sun Jul 17 14:16:21 2005 +0000
     7.2 +++ b/xen/arch/x86/time.c	Mon Jul 18 20:22:11 2005 +0000
     7.3 @@ -1,16 +1,12 @@
     7.4 -/****************************************************************************
     7.5 - * (C) 2002-2003 - Rolf Neugebauer - Intel Research Cambridge
     7.6 - * (C) 2002-2003 University of Cambridge
     7.7 - ****************************************************************************
     7.8 - *
     7.9 - *        File: i386/time.c
    7.10 - *      Author: Rolf Neugebar & Keir Fraser
    7.11 - */
    7.12 -
    7.13 -/*
    7.14 - *  linux/arch/i386/kernel/time.c
    7.15 - *
    7.16 - *  Copyright (C) 1991, 1992, 1995  Linus Torvalds
    7.17 +/******************************************************************************
    7.18 + * arch/x86/time.c
    7.19 + * 
    7.20 + * Per-CPU time calibration and management.
    7.21 + * 
    7.22 + * Copyright (c) 2002-2005, K A Fraser
    7.23 + * 
    7.24 + * Portions from Linux are:
    7.25 + * Copyright (c) 1991, 1992, 1995  Linus Torvalds
    7.26   */
    7.27  
    7.28  #include <xen/config.h>
    7.29 @@ -31,29 +27,74 @@
    7.30  #include <asm/processor.h>
    7.31  #include <asm/fixmap.h>
    7.32  #include <asm/mc146818rtc.h>
    7.33 +#include <asm/div64.h>
    7.34 +#include <io_ports.h>
    7.35  
    7.36 -/* GLOBAL */
    7.37  unsigned long cpu_khz;  /* CPU clock frequency in kHz. */
    7.38  spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED;
    7.39  int timer_ack = 0;
    7.40  unsigned long volatile jiffies;
    7.41 +static unsigned long wc_sec, wc_usec; /* UTC time at last 'time update'. */
    7.42  
    7.43 -/* PRIVATE */
    7.44 -static unsigned int    rdtsc_bitshift;  /* Which 32 bits of TSC do we use?   */
    7.45 -static u64             cpu_freq;        /* CPU frequency (Hz)                */
    7.46 -static u32             st_scale_f;      /* Cycles -> ns, fractional part     */
    7.47 -static u32             st_scale_i;      /* Cycles -> ns, integer part        */
    7.48 -static u32             shifted_tsc_irq; /* CPU0's TSC at last 'time update'  */
    7.49 -static u64             full_tsc_irq;    /* ...ditto, but all 64 bits         */
    7.50 -static s_time_t        stime_irq;       /* System time at last 'time update' */
    7.51 -static unsigned long   wc_sec, wc_usec; /* UTC time at last 'time update'.   */
    7.52 -static rwlock_t        time_lock = RW_LOCK_UNLOCKED;
    7.53 +struct time_scale {
    7.54 +    int shift;
    7.55 +    u32 mul_frac;
    7.56 +};
    7.57 +
    7.58 +struct cpu_time {
    7.59 +    u64 local_tsc_stamp;
    7.60 +    s_time_t stime_local_stamp;
    7.61 +    s_time_t stime_master_stamp;
    7.62 +    struct time_scale tsc_scale;
    7.63 +    struct ac_timer calibration_timer;
    7.64 +} __cacheline_aligned;
    7.65 +
    7.66 +static struct cpu_time cpu_time[NR_CPUS];
    7.67 +
    7.68 +/* Protected by platform_timer_lock. */
    7.69 +static s_time_t stime_platform_stamp;
    7.70 +static u64 platform_timer_stamp;
    7.71 +static struct time_scale platform_timer_scale;
    7.72 +static spinlock_t platform_timer_lock = SPIN_LOCK_UNLOCKED;
    7.73 +
    7.74 +static inline u32 down_shift(u64 time, int shift)
    7.75 +{
    7.76 +    if ( shift < 0 )
    7.77 +        return (u32)(time >> -shift);
    7.78 +    return (u32)((u32)time << shift);
    7.79 +}
    7.80 +
    7.81 +/*
    7.82 + * 32-bit division of integer dividend and integer divisor yielding
    7.83 + * 32-bit fractional quotient.
    7.84 + */
    7.85 +static inline u32 div_frac(u32 dividend, u32 divisor)
    7.86 +{
    7.87 +    u32 quotient, remainder;
    7.88 +    ASSERT(dividend < divisor);
    7.89 +    __asm__ ( 
    7.90 +        "div %4"
    7.91 +        : "=a" (quotient), "=d" (remainder)
    7.92 +        : "0" (0), "1" (dividend), "r" (divisor) );
    7.93 +    return quotient;
    7.94 +}
    7.95 +
    7.96 +/*
    7.97 + * 32-bit multiplication of integer multiplicand and fractional multiplier
    7.98 + * yielding 32-bit integer product.
    7.99 + */
   7.100 +static inline u32 mul_frac(u32 multiplicand, u32 multiplier)
   7.101 +{
   7.102 +    u32 product_int, product_frac;
   7.103 +    __asm__ (
   7.104 +        "mul %3"
   7.105 +        : "=a" (product_frac), "=d" (product_int)
   7.106 +        : "0" (multiplicand), "r" (multiplier) );
   7.107 +    return product_int;
   7.108 +}
   7.109  
   7.110  void timer_interrupt(int irq, void *dev_id, struct cpu_user_regs *regs)
   7.111  {
   7.112 -    write_lock_irq(&time_lock);
   7.113 -
   7.114 -#ifdef CONFIG_X86_IO_APIC
   7.115      if ( timer_ack ) 
   7.116      {
   7.117          extern spinlock_t i8259A_lock;
   7.118 @@ -63,31 +104,10 @@ void timer_interrupt(int irq, void *dev_
   7.119          inb(0x20);
   7.120          spin_unlock(&i8259A_lock);
   7.121      }
   7.122 -#endif
   7.123      
   7.124 -    /*
   7.125 -     * Updates TSC timestamp (used to interpolate passage of time between
   7.126 -     * interrupts).
   7.127 -     */
   7.128 -    rdtscll(full_tsc_irq);
   7.129 -    shifted_tsc_irq = (u32)(full_tsc_irq >> rdtsc_bitshift);
   7.130 -
   7.131      /* Update jiffies counter. */
   7.132      (*(unsigned long *)&jiffies)++;
   7.133  
   7.134 -    /* Update wall time. */
   7.135 -    wc_usec += 1000000/HZ;
   7.136 -    if ( wc_usec >= 1000000 )
   7.137 -    {
   7.138 -        wc_usec -= 1000000;
   7.139 -        wc_sec++;
   7.140 -    }
   7.141 -
   7.142 -    /* Updates system time (nanoseconds since boot). */
   7.143 -    stime_irq += MILLISECS(1000/HZ);
   7.144 -
   7.145 -    write_unlock_irq(&time_lock);
   7.146 -
   7.147      /* Rough hack to allow accurate timers to sort-of-work with no APIC. */
   7.148      if ( !cpu_has_apic )
   7.149          raise_softirq(AC_TIMER_SOFTIRQ);
   7.150 @@ -103,9 +123,9 @@ static struct irqaction irq0 = { timer_i
   7.151  #define CALIBRATE_FRAC  20      /* calibrate over 50ms */
   7.152  #define CALIBRATE_LATCH ((CLOCK_TICK_RATE+(CALIBRATE_FRAC/2))/CALIBRATE_FRAC)
   7.153  
   7.154 -static unsigned long __init calibrate_tsc(void)
   7.155 +static u64 calibrate_boot_tsc(void)
   7.156  {
   7.157 -    u64 start, end, diff;
   7.158 +    u64 start, end;
   7.159      unsigned long count;
   7.160  
   7.161      /* Set the Gate high, disable speaker */
   7.162 @@ -118,9 +138,9 @@ static unsigned long __init calibrate_ts
   7.163       * terminal count mode), binary count, load 5 * LATCH count, (LSB and MSB)
   7.164       * to begin countdown.
   7.165       */
   7.166 -    outb(0xb0, 0x43);           /* binary, mode 0, LSB/MSB, Ch 2 */
   7.167 -    outb(CALIBRATE_LATCH & 0xff, 0x42); /* LSB of count */
   7.168 -    outb(CALIBRATE_LATCH >> 8, 0x42);   /* MSB of count */
   7.169 +    outb(0xb0, PIT_MODE);           /* binary, mode 0, LSB/MSB, Ch 2 */
   7.170 +    outb(CALIBRATE_LATCH & 0xff, PIT_CH2); /* LSB of count */
   7.171 +    outb(CALIBRATE_LATCH >> 8, PIT_CH2);   /* MSB of count */
   7.172  
   7.173      rdtscll(start);
   7.174      for ( count = 0; (inb(0x61) & 0x20) == 0; count++ )
   7.175 @@ -131,15 +151,147 @@ static unsigned long __init calibrate_ts
   7.176      if ( count == 0 )
   7.177          return 0;
   7.178  
   7.179 -    diff = end - start;
   7.180 +    return ((end - start) * (u64)CALIBRATE_FRAC);
   7.181 +}
   7.182  
   7.183 -#if defined(__i386__)
   7.184 -    /* If quotient doesn't fit in 32 bits then we return error (zero). */
   7.185 -    if ( diff & ~0xffffffffULL )
   7.186 -        return 0;
   7.187 -#endif
   7.188 +static void set_time_scale(struct time_scale *ts, u64 ticks_per_sec)
   7.189 +{
   7.190 +    u64 tps64 = ticks_per_sec;
   7.191 +    u32 tps32;
   7.192 +    int shift = 0;
   7.193  
   7.194 -    return (unsigned long)diff;
   7.195 +    while ( tps64 > (MILLISECS(1000)*2) )
   7.196 +    {
   7.197 +        tps64 >>= 1;
   7.198 +        shift--;
   7.199 +    }
   7.200 +
   7.201 +    tps32 = (u32)tps64;
   7.202 +    while ( tps32 < (u32)MILLISECS(1000) )
   7.203 +    {
   7.204 +        tps32 <<= 1;
   7.205 +        shift++;
   7.206 +    }
   7.207 +
   7.208 +    ts->mul_frac = div_frac(MILLISECS(1000), tps32);
   7.209 +    ts->shift    = shift;
   7.210 +}
   7.211 +
   7.212 +static atomic_t tsc_calibrate_gang = ATOMIC_INIT(0);
   7.213 +static unsigned int tsc_calibrate_status = 0;
   7.214 +
   7.215 +void calibrate_tsc_bp(void)
   7.216 +{
   7.217 +    while ( atomic_read(&tsc_calibrate_gang) != (num_booting_cpus() - 1) )
   7.218 +        mb();
   7.219 +
   7.220 +    outb(CALIBRATE_LATCH & 0xff, PIT_CH2);
   7.221 +    outb(CALIBRATE_LATCH >> 8, PIT_CH2);
   7.222 +
   7.223 +    tsc_calibrate_status = 1;
   7.224 +	wmb();
   7.225 +
   7.226 +    while ( (inb(0x61) & 0x20) == 0 )
   7.227 +        continue;
   7.228 +
   7.229 +    tsc_calibrate_status = 2;
   7.230 +	wmb();
   7.231 +
   7.232 +    while ( atomic_read(&tsc_calibrate_gang) != 0 )
   7.233 +        mb();
   7.234 +}
   7.235 +
   7.236 +void calibrate_tsc_ap(void)
   7.237 +{
   7.238 +    u64 t1, t2, ticks_per_sec;
   7.239 +
   7.240 +    atomic_inc(&tsc_calibrate_gang);
   7.241 +
   7.242 +    while ( tsc_calibrate_status < 1 )
   7.243 +        mb();
   7.244 +
   7.245 +    rdtscll(t1);
   7.246 +
   7.247 +    while ( tsc_calibrate_status < 2 )
   7.248 +        mb();
   7.249 +
   7.250 +    rdtscll(t2);
   7.251 +
   7.252 +    ticks_per_sec = (t2 - t1) * (u64)CALIBRATE_FRAC;
   7.253 +    set_time_scale(&cpu_time[smp_processor_id()].tsc_scale, ticks_per_sec);
   7.254 +
   7.255 +    atomic_dec(&tsc_calibrate_gang);
   7.256 +}
   7.257 +
   7.258 +/* Protected by platform_timer_lock. */
   7.259 +static u64 platform_pit_counter;
   7.260 +static u16 pit_stamp;
   7.261 +static struct ac_timer pit_overflow_timer;
   7.262 +
   7.263 +static u16 pit_read_counter(void)
   7.264 +{
   7.265 +    u16 count;
   7.266 +    ASSERT(spin_is_locked(&platform_timer_lock));
   7.267 +    outb(0x80, PIT_MODE);
   7.268 +    count  = inb(PIT_CH2);
   7.269 +    count |= inb(PIT_CH2) << 8;
   7.270 +    return count;
   7.271 +}
   7.272 +
   7.273 +static void pit_overflow(void *unused)
   7.274 +{
   7.275 +    u16 counter;
   7.276 +
   7.277 +    spin_lock(&platform_timer_lock);
   7.278 +    counter = pit_read_counter();
   7.279 +    platform_pit_counter += (u16)(pit_stamp - counter);
   7.280 +    pit_stamp = counter;
   7.281 +    spin_unlock(&platform_timer_lock);
   7.282 +
   7.283 +    set_ac_timer(&pit_overflow_timer, NOW() + MILLISECS(20));
   7.284 +}
   7.285 +
   7.286 +static void init_platform_timer(void)
   7.287 +{
   7.288 +    init_ac_timer(&pit_overflow_timer, pit_overflow, NULL, 0);
   7.289 +    pit_overflow(NULL);
   7.290 +    platform_timer_stamp = platform_pit_counter;
   7.291 +    set_time_scale(&platform_timer_scale, CLOCK_TICK_RATE);
   7.292 +}
   7.293 +
   7.294 +static s_time_t __read_platform_stime(u64 platform_time)
   7.295 +{
   7.296 +    u64 diff64 = platform_time - platform_timer_stamp;
   7.297 +    u32 diff   = down_shift(diff64, platform_timer_scale.shift);
   7.298 +    ASSERT(spin_is_locked(&platform_timer_lock));
   7.299 +    return (stime_platform_stamp + 
   7.300 +            (u64)mul_frac(diff, platform_timer_scale.mul_frac));
   7.301 +}
   7.302 +
   7.303 +static s_time_t read_platform_stime(void)
   7.304 +{
   7.305 +    u64 counter;
   7.306 +    s_time_t stime;
   7.307 +
   7.308 +    spin_lock(&platform_timer_lock);
   7.309 +    counter = platform_pit_counter + (u16)(pit_stamp - pit_read_counter());
   7.310 +    stime   = __read_platform_stime(counter);
   7.311 +    spin_unlock(&platform_timer_lock);
   7.312 +
   7.313 +    return stime;
   7.314 +}
   7.315 +
   7.316 +static void platform_time_calibration(void)
   7.317 +{
   7.318 +    u64 counter;
   7.319 +    s_time_t stamp;
   7.320 +
   7.321 +    spin_lock(&platform_timer_lock);
   7.322 +    counter = platform_pit_counter + (u16)(pit_stamp - pit_read_counter());
   7.323 +    stamp   = __read_platform_stime(counter);
   7.324 +    stime_platform_stamp = stamp;
   7.325 +    platform_timer_stamp = counter;
   7.326 +    spin_unlock(&platform_timer_lock);
   7.327  }
   7.328  
   7.329  
   7.330 @@ -233,141 +385,215 @@ static unsigned long get_cmos_time(void)
   7.331   * System Time
   7.332   ***************************************************************************/
   7.333  
   7.334 -static inline u64 get_time_delta(void)
   7.335 +s_time_t get_s_time(void)
   7.336  {
   7.337 -    s32      delta_tsc;
   7.338 -    u32      low;
   7.339 -    u64      delta, tsc;
   7.340 -
   7.341 -    ASSERT(st_scale_f || st_scale_i);
   7.342 +    struct cpu_time *t = &cpu_time[smp_processor_id()];
   7.343 +    u64 tsc;
   7.344 +    u32 delta;
   7.345 +    s_time_t now;
   7.346  
   7.347      rdtscll(tsc);
   7.348 -    low = (u32)(tsc >> rdtsc_bitshift);
   7.349 -    delta_tsc = (s32)(low - shifted_tsc_irq);
   7.350 -    if ( unlikely(delta_tsc < 0) ) delta_tsc = 0;
   7.351 -    delta = ((u64)delta_tsc * st_scale_f);
   7.352 -    delta >>= 32;
   7.353 -    delta += ((u64)delta_tsc * st_scale_i);
   7.354 -
   7.355 -    return delta;
   7.356 -}
   7.357 -
   7.358 -s_time_t get_s_time(void)
   7.359 -{
   7.360 -    s_time_t now;
   7.361 -    unsigned long flags;
   7.362 +    delta = down_shift(tsc - t->local_tsc_stamp, t->tsc_scale.shift);
   7.363 +    now = t->stime_local_stamp + (u64)mul_frac(delta, t->tsc_scale.mul_frac);
   7.364  
   7.365 -    read_lock_irqsave(&time_lock, flags);
   7.366 -
   7.367 -    now = stime_irq + get_time_delta();
   7.368 -
   7.369 -    /* Ensure that the returned system time is monotonically increasing. */
   7.370 -    {
   7.371 -        static s_time_t prev_now = 0;
   7.372 -        if ( unlikely(now < prev_now) )
   7.373 -            now = prev_now;
   7.374 -        prev_now = now;
   7.375 -    }
   7.376 -
   7.377 -    read_unlock_irqrestore(&time_lock, flags);
   7.378 -
   7.379 -    return now; 
   7.380 +    return now;
   7.381  }
   7.382  
   7.383  static inline void __update_dom_time(struct vcpu *v)
   7.384  {
   7.385 -    struct domain *d  = v->domain;
   7.386 -    shared_info_t *si = d->shared_info;
   7.387 +    struct cpu_time       *t = &cpu_time[smp_processor_id()];
   7.388 +    struct vcpu_time_info *u = &v->domain->shared_info->vcpu_time[v->vcpu_id];
   7.389  
   7.390 -    spin_lock(&d->time_lock);
   7.391 -
   7.392 -    si->time_version1++;
   7.393 +    u->time_version1++;
   7.394      wmb();
   7.395  
   7.396 -    si->cpu_freq       = cpu_freq;
   7.397 -    si->tsc_timestamp  = full_tsc_irq;
   7.398 -    si->system_time    = stime_irq;
   7.399 -    si->wc_sec         = wc_sec;
   7.400 -    si->wc_usec        = wc_usec;
   7.401 +    u->tsc_timestamp     = t->local_tsc_stamp;
   7.402 +    u->system_time       = t->stime_local_stamp;
   7.403 +    u->tsc_to_system_mul = t->tsc_scale.mul_frac;
   7.404 +    u->tsc_shift         = (s8)t->tsc_scale.shift;
   7.405  
   7.406      wmb();
   7.407 -    si->time_version2++;
   7.408 +    u->time_version2++;
   7.409  
   7.410 -    spin_unlock(&d->time_lock);
   7.411 +    /* Should only do this during do_settime(). */
   7.412 +    v->domain->shared_info->wc_sec  = wc_sec;
   7.413 +    v->domain->shared_info->wc_usec = wc_usec;
   7.414  }
   7.415  
   7.416  void update_dom_time(struct vcpu *v)
   7.417  {
   7.418 -    unsigned long flags;
   7.419 -
   7.420 -    if ( v->domain->shared_info->tsc_timestamp != full_tsc_irq )
   7.421 -    {
   7.422 -        read_lock_irqsave(&time_lock, flags);
   7.423 +    if ( v->domain->shared_info->vcpu_time[v->vcpu_id].tsc_timestamp != 
   7.424 +         cpu_time[smp_processor_id()].local_tsc_stamp )
   7.425          __update_dom_time(v);
   7.426 -        read_unlock_irqrestore(&time_lock, flags);
   7.427 -    }
   7.428  }
   7.429  
   7.430  /* Set clock to <secs,usecs> after 00:00:00 UTC, 1 January, 1970. */
   7.431  void do_settime(unsigned long secs, unsigned long usecs, u64 system_time_base)
   7.432  {
   7.433 -    s64 delta;
   7.434 -    long _usecs = (long)usecs;
   7.435 -
   7.436 -    write_lock_irq(&time_lock);
   7.437 +    u64 x, base_usecs;
   7.438 +    u32 y;
   7.439  
   7.440 -    delta = (s64)(stime_irq - system_time_base);
   7.441 +    base_usecs = system_time_base;
   7.442 +    do_div(base_usecs, 1000);
   7.443  
   7.444 -    _usecs += (long)(delta/1000);
   7.445 -    while ( _usecs >= 1000000 ) 
   7.446 +    x = (secs * 1000000ULL) + (u64)usecs + base_usecs;
   7.447 +    y = do_div(x, 1000000);
   7.448 +
   7.449 +    wc_sec  = (unsigned long)x;
   7.450 +    wc_usec = (unsigned long)y;
   7.451 +
   7.452 +    __update_dom_time(current);
   7.453 +}
   7.454 +
   7.455 +static void local_time_calibration(void *unused)
   7.456 +{
   7.457 +    unsigned int cpu = smp_processor_id();
   7.458 +
   7.459 +    /*
   7.460 +     * System timestamps, extrapolated from local and master oscillators,
   7.461 +     * taken during this calibration and the previous calibration.
   7.462 +     */
   7.463 +    s_time_t prev_local_stime, curr_local_stime;
   7.464 +    s_time_t prev_master_stime, curr_master_stime;
   7.465 +
   7.466 +    /* TSC timestamps taken during this calibration and prev calibration. */
   7.467 +    u64 prev_tsc, curr_tsc;
   7.468 +
   7.469 +    /*
   7.470 +     * System time and TSC ticks elapsed during the previous calibration
   7.471 +     * 'epoch'. Also the accumulated error in the local estimate. All these
   7.472 +     * values end up down-shifted to fit in 32 bits.
   7.473 +     */
   7.474 +    u64 stime_elapsed64, tsc_elapsed64, local_stime_error64;
   7.475 +    u32 stime_elapsed32, tsc_elapsed32, local_stime_error32;
   7.476 +
   7.477 +    /* Calculated TSC shift to ensure 32-bit scale multiplier. */
   7.478 +    int tsc_shift = 0;
   7.479 +
   7.480 +    prev_tsc          = cpu_time[cpu].local_tsc_stamp;
   7.481 +    prev_local_stime  = cpu_time[cpu].stime_local_stamp;
   7.482 +    prev_master_stime = cpu_time[cpu].stime_master_stamp;
   7.483 +
   7.484 +    /* Disable IRQs to get 'instantaneous' current timestamps. */
   7.485 +    local_irq_disable();
   7.486 +    rdtscll(curr_tsc);
   7.487 +    curr_local_stime  = get_s_time();
   7.488 +    curr_master_stime = read_platform_stime();
   7.489 +    local_irq_enable();
   7.490 +
   7.491 +#if 0
   7.492 +    printk("PRE%d: tsc=%lld stime=%lld master=%lld\n",
   7.493 +           cpu, prev_tsc, prev_local_stime, prev_master_stime);
   7.494 +    printk("CUR%d: tsc=%lld stime=%lld master=%lld %lld\n",
   7.495 +           cpu, curr_tsc, curr_local_stime, curr_master_stime,
   7.496 +           platform_pit_counter);
   7.497 +#endif
   7.498 +
   7.499 +    /* Local time warps forward if it lags behind master time. */
   7.500 +    if ( curr_local_stime < curr_master_stime )
   7.501 +        curr_local_stime = curr_master_stime;
   7.502 +
   7.503 +    stime_elapsed64 = curr_master_stime - prev_master_stime;
   7.504 +    tsc_elapsed64   = curr_tsc - prev_tsc;
   7.505 +
   7.506 +    /*
   7.507 +     * Error in the local system time estimate. Clamp to epoch time period, or
   7.508 +     * we could end up with a negative scale factor (time going backwards!).
   7.509 +     * This effectively clamps the scale factor to >= 0.
   7.510 +     */
   7.511 +    local_stime_error64 = curr_local_stime - curr_master_stime;
   7.512 +    if ( local_stime_error64 > stime_elapsed64 )
   7.513 +        local_stime_error64 = stime_elapsed64;
   7.514 +
   7.515 +    /*
   7.516 +     * We require 0 < stime_elapsed < 2^31.
   7.517 +     * This allows us to binary shift a 32-bit tsc_elapsed such that:
   7.518 +     * stime_elapsed < tsc_elapsed <= 2*stime_elapsed
   7.519 +     */
   7.520 +    while ( ((u32)stime_elapsed64 != stime_elapsed64) ||
   7.521 +            ((s32)stime_elapsed64 < 0) )
   7.522      {
   7.523 -        _usecs -= 1000000;
   7.524 -        secs++;
   7.525 +        stime_elapsed64     >>= 1;
   7.526 +        tsc_elapsed64       >>= 1;
   7.527 +        local_stime_error64 >>= 1;
   7.528      }
   7.529  
   7.530 -    wc_sec  = secs;
   7.531 -    wc_usec = _usecs;
   7.532 +    /* stime_master_diff (and hence stime_error) now fit in a 32-bit word. */
   7.533 +    stime_elapsed32     = (u32)stime_elapsed64;
   7.534 +    local_stime_error32 = (u32)local_stime_error64;
   7.535  
   7.536 -    /* Others will pick up the change at the next tick. */
   7.537 -    __update_dom_time(current);
   7.538 -    send_guest_virq(current, VIRQ_TIMER);
   7.539 +    /* tsc_elapsed <= 2*stime_elapsed */
   7.540 +    while ( tsc_elapsed64 > (stime_elapsed32 * 2) )
   7.541 +    {
   7.542 +        tsc_elapsed64 >>= 1;
   7.543 +        tsc_shift--;
   7.544 +    }
   7.545  
   7.546 -    write_unlock_irq(&time_lock);
   7.547 +    /* Local difference must now fit in 32 bits. */
   7.548 +    ASSERT((u32)tsc_elapsed64 == tsc_elapsed64);
   7.549 +    tsc_elapsed32 = (u32)tsc_elapsed64;
   7.550 +
   7.551 +    /* tsc_elapsed > stime_elapsed */
   7.552 +    ASSERT(tsc_elapsed32 != 0);
   7.553 +    while ( tsc_elapsed32 <= stime_elapsed32 )
   7.554 +    {
   7.555 +        tsc_elapsed32 <<= 1;
   7.556 +        tsc_shift++;
   7.557 +    }
   7.558 +
   7.559 +#if 0
   7.560 +    printk("---%d: %08x %d\n", cpu, 
   7.561 +           div_frac(stime_elapsed32 - local_stime_error32, tsc_elapsed32),
   7.562 +           tsc_shift);
   7.563 +#endif
   7.564 +
   7.565 +    /* Record new timestamp information. */
   7.566 +    cpu_time[cpu].tsc_scale.mul_frac = 
   7.567 +        div_frac(stime_elapsed32 - local_stime_error32, tsc_elapsed32);
   7.568 +    cpu_time[cpu].tsc_scale.shift    = tsc_shift;
   7.569 +    cpu_time[cpu].local_tsc_stamp    = curr_tsc;
   7.570 +    cpu_time[cpu].stime_local_stamp  = curr_local_stime;
   7.571 +    cpu_time[cpu].stime_master_stamp = curr_master_stime;
   7.572 +
   7.573 +    set_ac_timer(&cpu_time[cpu].calibration_timer, NOW() + MILLISECS(1000));
   7.574 +
   7.575 +    if ( cpu == 0 )
   7.576 +        platform_time_calibration();
   7.577  }
   7.578  
   7.579 +void init_percpu_time(void)
   7.580 +{
   7.581 +    unsigned int cpu = smp_processor_id();
   7.582 +    unsigned long flags;
   7.583 +    s_time_t now;
   7.584 +
   7.585 +    local_irq_save(flags);
   7.586 +    rdtscll(cpu_time[cpu].local_tsc_stamp);
   7.587 +    now = (cpu == 0) ? 0 : read_platform_stime();
   7.588 +    local_irq_restore(flags);
   7.589 +
   7.590 +    cpu_time[cpu].stime_master_stamp = now;
   7.591 +    cpu_time[cpu].stime_local_stamp  = now;
   7.592 +
   7.593 +    init_ac_timer(&cpu_time[cpu].calibration_timer,
   7.594 +                  local_time_calibration, NULL, cpu);
   7.595 +    set_ac_timer(&cpu_time[cpu].calibration_timer, NOW() + MILLISECS(1000));
   7.596 +}
   7.597  
   7.598  /* Late init function (after all CPUs are booted). */
   7.599 -int __init init_xen_time()
   7.600 +int __init init_xen_time(void)
   7.601  {
   7.602 -    u64      scale;
   7.603 -    unsigned int cpu_ghz;
   7.604 -
   7.605 -    cpu_ghz = (unsigned int)(cpu_freq / 1000000000ULL);
   7.606 -    for ( rdtsc_bitshift = 0; cpu_ghz != 0; rdtsc_bitshift++, cpu_ghz >>= 1 )
   7.607 -        continue;
   7.608 -
   7.609 -    scale  = 1000000000LL << (32 + rdtsc_bitshift);
   7.610 -    scale /= cpu_freq;
   7.611 -    st_scale_f = scale & 0xffffffff;
   7.612 -    st_scale_i = scale >> 32;
   7.613 +    wc_sec = get_cmos_time();
   7.614  
   7.615      local_irq_disable();
   7.616  
   7.617 -    /* System time ticks from zero. */
   7.618 -    rdtscll(full_tsc_irq);
   7.619 -    stime_irq = (s_time_t)0;
   7.620 -    shifted_tsc_irq = (u32)(full_tsc_irq >> rdtsc_bitshift);
   7.621 +    init_percpu_time();
   7.622  
   7.623 -    /* Wallclock time starts as the initial RTC time. */
   7.624 -    wc_sec = get_cmos_time();
   7.625 +    stime_platform_stamp = 0;
   7.626 +    init_platform_timer();
   7.627  
   7.628      local_irq_enable();
   7.629  
   7.630 -    printk("Time init:\n");
   7.631 -    printk(".... cpu_freq:    %08X:%08X\n", (u32)(cpu_freq>>32),(u32)cpu_freq);
   7.632 -    printk(".... scale:       %08X:%08X\n", (u32)(scale>>32),(u32)scale);
   7.633 -    printk(".... Wall Clock:  %lds %ldus\n", wc_sec, wc_usec);
   7.634 -
   7.635      return 0;
   7.636  }
   7.637  
   7.638 @@ -375,15 +601,12 @@ int __init init_xen_time()
   7.639  /* Early init function. */
   7.640  void __init early_time_init(void)
   7.641  {
   7.642 -    unsigned long ticks_per_frac = calibrate_tsc();
   7.643 +    u64 tmp = calibrate_boot_tsc();
   7.644  
   7.645 -    if ( !ticks_per_frac )
   7.646 -        panic("Error calibrating TSC\n");
   7.647 +    set_time_scale(&cpu_time[0].tsc_scale, tmp);
   7.648  
   7.649 -    cpu_khz = ticks_per_frac / (1000/CALIBRATE_FRAC);
   7.650 -
   7.651 -    cpu_freq = (u64)ticks_per_frac * (u64)CALIBRATE_FRAC;
   7.652 -
   7.653 +    do_div(tmp, 1000);
   7.654 +    cpu_khz = (unsigned long)tmp;
   7.655      printk("Detected %lu.%03lu MHz processor.\n", 
   7.656             cpu_khz / 1000, cpu_khz % 1000);
   7.657  
     8.1 --- a/xen/arch/x86/vmx_intercept.c	Sun Jul 17 14:16:21 2005 +0000
     8.2 +++ b/xen/arch/x86/vmx_intercept.c	Mon Jul 18 20:22:11 2005 +0000
     8.3 @@ -24,10 +24,10 @@
     8.4  #include <asm/vmx_virpit.h>
     8.5  #include <asm/vmx_intercept.h>
     8.6  #include <public/io/ioreq.h>
     8.7 -
     8.8  #include <xen/lib.h>
     8.9  #include <xen/sched.h>
    8.10  #include <asm/current.h>
    8.11 +#include <io_ports.h>
    8.12  
    8.13  #ifdef CONFIG_VMX
    8.14  
    8.15 @@ -175,7 +175,7 @@ int intercept_pit_io(ioreq_t *p)
    8.16          p->port_mm)
    8.17          return 0;
    8.18      
    8.19 -    if (p->addr == 0x43 &&
    8.20 +    if (p->addr == PIT_MODE &&
    8.21  	p->dir == 0 &&				/* write */
    8.22          ((p->u.data >> 4) & 0x3) == 0 &&	/* latch command */
    8.23          ((p->u.data >> 6) & 0x3) == (vpit->channel)) {/* right channel */
    8.24 @@ -183,7 +183,7 @@ int intercept_pit_io(ioreq_t *p)
    8.25  	return 1;
    8.26      }
    8.27  
    8.28 -    if (p->addr == (0x40 + vpit->channel) &&
    8.29 +    if (p->addr == (PIT_CH0 + vpit->channel) &&
    8.30  	p->dir == 1) {	/* read */
    8.31          p->u.data = pit_read_io(vpit);
    8.32          resume_pit_io(p);
     9.1 --- a/xen/common/ac_timer.c	Sun Jul 17 14:16:21 2005 +0000
     9.2 +++ b/xen/common/ac_timer.c	Mon Jul 18 20:22:11 2005 +0000
     9.3 @@ -202,7 +202,7 @@ static void ac_timer_softirq_action(void
     9.4      do {
     9.5          heap = ac_timers[cpu].heap;
     9.6          now  = NOW();
     9.7 -        
     9.8 +
     9.9          while ( (GET_HEAP_SIZE(heap) != 0) &&
    9.10                  ((t = heap[1])->expires < (now + TIMER_SLOP)) )
    9.11          {
    10.1 --- a/xen/common/domain.c	Sun Jul 17 14:16:21 2005 +0000
    10.2 +++ b/xen/common/domain.c	Mon Jul 18 20:22:11 2005 +0000
    10.3 @@ -42,8 +42,6 @@ struct domain *do_createdomain(domid_t d
    10.4      d->domain_id   = dom_id;
    10.5      v->processor  = cpu;
    10.6   
    10.7 -    spin_lock_init(&d->time_lock);
    10.8 -
    10.9      spin_lock_init(&d->big_lock);
   10.10  
   10.11      spin_lock_init(&d->page_alloc_lock);
    11.1 --- a/xen/common/page_alloc.c	Sun Jul 17 14:16:21 2005 +0000
    11.2 +++ b/xen/common/page_alloc.c	Mon Jul 18 20:22:11 2005 +0000
    11.3 @@ -351,10 +351,10 @@ void free_heap_pages(
    11.4  void scrub_heap_pages(void)
    11.5  {
    11.6      void *p;
    11.7 -    unsigned long pfn, flags;
    11.8 +    unsigned long pfn;
    11.9 +    int cpu = smp_processor_id();
   11.10  
   11.11      printk("Scrubbing Free RAM: ");
   11.12 -    watchdog_disable();
   11.13  
   11.14      for ( pfn = 0; pfn < (bitmap_size * 8); pfn++ )
   11.15      {
   11.16 @@ -362,12 +362,15 @@ void scrub_heap_pages(void)
   11.17          if ( (pfn % ((100*1024*1024)/PAGE_SIZE)) == 0 )
   11.18              printk(".");
   11.19  
   11.20 +        if ( unlikely(softirq_pending(cpu)) )
   11.21 +            do_softirq();
   11.22 +
   11.23          /* Quick lock-free check. */
   11.24          if ( allocated_in_map(pfn) )
   11.25              continue;
   11.26 -        
   11.27 -        spin_lock_irqsave(&heap_lock, flags);
   11.28 -        
   11.29 +
   11.30 +        spin_lock_irq(&heap_lock);
   11.31 +
   11.32          /* Re-check page status with lock held. */
   11.33          if ( !allocated_in_map(pfn) )
   11.34          {
   11.35 @@ -385,11 +388,10 @@ void scrub_heap_pages(void)
   11.36                  unmap_domain_page(p);
   11.37              }
   11.38          }
   11.39 -        
   11.40 -        spin_unlock_irqrestore(&heap_lock, flags);
   11.41 +
   11.42 +        spin_unlock_irq(&heap_lock);
   11.43      }
   11.44  
   11.45 -    watchdog_enable();
   11.46      printk("done.\n");
   11.47  }
   11.48  
    12.1 --- a/xen/drivers/char/console.c	Sun Jul 17 14:16:21 2005 +0000
    12.2 +++ b/xen/drivers/char/console.c	Mon Jul 18 20:22:11 2005 +0000
    12.3 @@ -635,8 +635,6 @@ static int __init debugtrace_init(void)
    12.4  
    12.5      debugtrace_bytes = bytes;
    12.6  
    12.7 -    memset(debugtrace_buf, '\0', debugtrace_bytes);
    12.8 -
    12.9      return 0;
   12.10  }
   12.11  __initcall(debugtrace_init);
    13.1 --- a/xen/include/asm-x86/time.h	Sun Jul 17 14:16:21 2005 +0000
    13.2 +++ b/xen/include/asm-x86/time.h	Mon Jul 18 20:22:11 2005 +0000
    13.3 @@ -4,4 +4,7 @@
    13.4  
    13.5  extern int timer_ack;
    13.6  
    13.7 +extern void calibrate_tsc_bp(void);
    13.8 +extern void calibrate_tsc_ap(void);
    13.9 +
   13.10  #endif /* __X86_TIME_H__ */
    14.1 --- a/xen/include/public/xen.h	Sun Jul 17 14:16:21 2005 +0000
    14.2 +++ b/xen/include/public/xen.h	Mon Jul 18 20:22:11 2005 +0000
    14.3 @@ -329,6 +329,28 @@ typedef struct vcpu_info {
    14.4  #endif
    14.5  } vcpu_info_t;
    14.6  
    14.7 +typedef struct vcpu_time_info {
    14.8 +    /*
    14.9 +     * The following values are updated periodically (and not necessarily
   14.10 +     * atomically!). The guest OS detects this because 'time_version1' is
   14.11 +     * incremented just before updating these values, and 'time_version2' is
   14.12 +     * incremented immediately after. See the Xen-specific Linux code for an
   14.13 +     * example of how to read these values safely (arch/xen/kernel/time.c).
   14.14 +     */
   14.15 +    u32 time_version1;
   14.16 +    u32 time_version2;
   14.17 +    u64 tsc_timestamp;   /* TSC at last update of time vals.  */
   14.18 +    u64 system_time;     /* Time, in nanosecs, since boot.    */
   14.19 +    /*
   14.20 +     * Current system time:
   14.21 +     *   system_time + ((tsc - tsc_timestamp) << tsc_shift) * tsc_to_system_mul
   14.22 +     * CPU frequency (Hz):
   14.23 +     *   ((10^9 << 32) / tsc_to_system_mul) >> tsc_shift
   14.24 +     */
   14.25 +    u32 tsc_to_system_mul;
   14.26 +    s8  tsc_shift;
   14.27 +} vcpu_time_info_t;
   14.28 +
   14.29  /*
   14.30   * Xen/kernel shared data -- pointer provided in start_info.
   14.31   * NB. We expect that this struct is smaller than a page.
   14.32 @@ -336,6 +358,8 @@ typedef struct vcpu_info {
   14.33  typedef struct shared_info {
   14.34      vcpu_info_t vcpu_data[MAX_VIRT_CPUS];
   14.35  
   14.36 +    vcpu_time_info_t vcpu_time[MAX_VIRT_CPUS];
   14.37 +
   14.38      u32 n_vcpu;
   14.39  
   14.40      /*
   14.41 @@ -373,33 +397,11 @@ typedef struct shared_info {
   14.42      u32 evtchn_mask[32];
   14.43  
   14.44      /*
   14.45 -     * Time: The following abstractions are exposed: System Time, Clock Time,
   14.46 -     * Domain Virtual Time. Domains can access Cycle counter time directly.
   14.47 +     * Wallclock time: updated only by control software. Guests should base
   14.48 +     * their gettimeofday() syscall on this wallclock-base value.
   14.49       */
   14.50 -    u64                cpu_freq;        /* CPU frequency (Hz).          */
   14.51 -
   14.52 -    /*
   14.53 -     * The following values are updated periodically (and not necessarily
   14.54 -     * atomically!). The guest OS detects this because 'time_version1' is
   14.55 -     * incremented just before updating these values, and 'time_version2' is
   14.56 -     * incremented immediately after. See the Xen-specific Linux code for an
   14.57 -     * example of how to read these values safely (arch/xen/kernel/time.c).
   14.58 -     */
   14.59 -    u32                time_version1;
   14.60 -    u32                time_version2;
   14.61 -    tsc_timestamp_t    tsc_timestamp;   /* TSC at last update of time vals.  */
   14.62 -    u64                system_time;     /* Time, in nanosecs, since boot.    */
   14.63      u32                wc_sec;          /* Secs  00:00:00 UTC, Jan 1, 1970.  */
   14.64      u32                wc_usec;         /* Usecs 00:00:00 UTC, Jan 1, 1970.  */
   14.65 -    u64                domain_time;     /* Domain virtual time, in nanosecs. */
   14.66 -
   14.67 -    /*
   14.68 -     * Timeout values:
   14.69 -     * Allow a domain to specify a timeout value in system time and 
   14.70 -     * domain virtual time.
   14.71 -     */
   14.72 -    u64                wall_timeout;
   14.73 -    u64                domain_timeout;
   14.74  
   14.75      arch_shared_info_t arch;
   14.76  
    15.1 --- a/xen/include/xen/sched.h	Sun Jul 17 14:16:21 2005 +0000
    15.2 +++ b/xen/include/xen/sched.h	Mon Jul 18 20:22:11 2005 +0000
    15.3 @@ -92,7 +92,6 @@ struct domain
    15.4      domid_t          domain_id;
    15.5  
    15.6      shared_info_t   *shared_info;     /* shared data area */
    15.7 -    spinlock_t       time_lock;
    15.8  
    15.9      spinlock_t       big_lock;
   15.10  
    16.1 --- a/xen/include/xen/time.h	Sun Jul 17 14:16:21 2005 +0000
    16.2 +++ b/xen/include/xen/time.h	Mon Jul 18 20:22:11 2005 +0000
    16.3 @@ -30,7 +30,8 @@
    16.4  #include <public/xen.h>
    16.5  #include <asm/time.h>
    16.6  
    16.7 -extern int init_xen_time();
    16.8 +extern int init_xen_time(void);
    16.9 +extern void init_percpu_time(void);
   16.10  
   16.11  extern unsigned long cpu_khz;
   16.12