ia64/xen-unstable

changeset 5828:43564304cf94

First cut of new time interfaces and synchronisation mechanisms.
Based on an initial patch from Don Fry at IBM.
Still TODO:
1. Testing
2. NTP synchronisation
3. Fix wallclock interface a bit
4. Support for platform timers other than PIT (e.g., HPET, IBM Cyclone)
5. Scale 64-bit TSC diffs instead of 32-bit, just for sanity
6. Error-correcting scale factor is still slightly wrong
6. More testing
Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Mon Jul 18 20:22:11 2005 +0000 (2005-07-18)
parents 9697bc63d403
children bf68b5fcbc9b 390e4d63cdb1
files linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile xen/arch/x86/apic.c xen/arch/x86/i8259.c xen/arch/x86/setup.c xen/arch/x86/smpboot.c xen/arch/x86/time.c xen/arch/x86/vmx_intercept.c xen/common/ac_timer.c xen/common/domain.c xen/common/page_alloc.c xen/drivers/char/console.c xen/include/asm-x86/time.h xen/include/public/xen.h xen/include/xen/sched.h xen/include/xen/time.h
line diff
     1.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile	Sun Jul 17 14:16:21 2005 +0000
     1.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile	Mon Jul 18 20:22:11 2005 +0000
     1.3 @@ -19,7 +19,7 @@ c-obj-y	:= semaphore.o vm86.o \
     1.4  s-obj-y	:=
     1.5  
     1.6  obj-y				+= cpu/
     1.7 -obj-y				+= timers/
     1.8 +#obj-y				+= timers/
     1.9  obj-$(CONFIG_ACPI_BOOT)		+= acpi/
    1.10  #c-obj-$(CONFIG_X86_BIOS_REBOOT)	+= reboot.o
    1.11  c-obj-$(CONFIG_MCA)		+= mca.o
     2.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c	Sun Jul 17 14:16:21 2005 +0000
     2.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c	Mon Jul 18 20:22:11 2005 +0000
     2.3 @@ -104,25 +104,17 @@ extern struct timer_opts timer_tsc;
     2.4  struct timer_opts *cur_timer = &timer_tsc;
     2.5  
     2.6  /* These are peridically updated in shared_info, and then copied here. */
     2.7 -u32 shadow_tsc_stamp;
     2.8 -u64 shadow_system_time;
     2.9 -static u32 shadow_time_version;
    2.10 +struct shadow_time_info {
    2.11 +	u64 tsc_timestamp;     /* TSC at last update of time vals.  */
    2.12 +	u64 system_timestamp;  /* Time, in nanosecs, since boot.    */
    2.13 +	u32 tsc_to_nsec_mul;
    2.14 +	u32 tsc_to_usec_mul;
    2.15 +	int tsc_shift;
    2.16 +	u32 version;
    2.17 +};
    2.18 +static DEFINE_PER_CPU(struct shadow_time_info, shadow_time);
    2.19  static struct timeval shadow_tv;
    2.20  
    2.21 -/*
    2.22 - * We use this to ensure that gettimeofday() is monotonically increasing. We
    2.23 - * only break this guarantee if the wall clock jumps backwards "a long way".
    2.24 - */
    2.25 -static struct timeval last_seen_tv = {0,0};
    2.26 -
    2.27 -#ifdef CONFIG_XEN_PRIVILEGED_GUEST
    2.28 -/* Periodically propagate synchronised time base to the RTC and to Xen. */
    2.29 -static long last_rtc_update, last_update_to_xen;
    2.30 -#endif
    2.31 -
    2.32 -/* Periodically take synchronised time base from Xen, if we need it. */
    2.33 -static long last_update_from_xen;   /* UTC seconds when last read Xen clock. */
    2.34 -
    2.35  /* Keep track of last time we did processing/updating of jiffies and xtime. */
    2.36  static u64 processed_system_time;   /* System time (ns) at last processing. */
    2.37  static DEFINE_PER_CPU(u64, processed_system_time);
    2.38 @@ -164,26 +156,147 @@ static int __init __independent_wallcloc
    2.39  #define INDEPENDENT_WALLCLOCK() \
    2.40      (independent_wallclock || (xen_start_info.flags & SIF_INITDOMAIN))
    2.41  
    2.42 +int tsc_disable __initdata = 0;
    2.43 +
    2.44 +static void delay_tsc(unsigned long loops)
    2.45 +{
    2.46 +	unsigned long bclock, now;
    2.47 +	
    2.48 +	rdtscl(bclock);
    2.49 +	do
    2.50 +	{
    2.51 +		rep_nop();
    2.52 +		rdtscl(now);
    2.53 +	} while ((now-bclock) < loops);
    2.54 +}
    2.55 +
    2.56 +struct timer_opts timer_tsc = {
    2.57 +	.name = "tsc",
    2.58 +	.delay = delay_tsc,
    2.59 +};
    2.60 +
    2.61 +static inline u32 down_shift(u64 time, int shift)
    2.62 +{
    2.63 +	if ( shift < 0 )
    2.64 +		return (u32)(time >> -shift);
    2.65 +	return (u32)((u32)time << shift);
    2.66 +}
    2.67 +
    2.68 +/*
    2.69 + * 32-bit multiplication of integer multiplicand and fractional multiplier
    2.70 + * yielding 32-bit integer product.
    2.71 + */
    2.72 +static inline u32 mul_frac(u32 multiplicand, u32 multiplier)
    2.73 +{
    2.74 +	u32 product_int, product_frac;
    2.75 +	__asm__ (
    2.76 +		"mul %3"
    2.77 +		: "=a" (product_frac), "=d" (product_int)
    2.78 +		: "0" (multiplicand), "r" (multiplier) );
    2.79 +	return product_int;
    2.80 +}
    2.81 +
    2.82 +void init_cpu_khz(void)
    2.83 +{
    2.84 +	u64 __cpu_khz = 1000000ULL << 32;
    2.85 +	struct vcpu_time_info *info = &HYPERVISOR_shared_info->vcpu_time[0];
    2.86 +	do_div(__cpu_khz, info->tsc_to_system_mul);
    2.87 +	cpu_khz = down_shift(__cpu_khz, -info->tsc_shift);
    2.88 +	printk(KERN_INFO "Xen reported: %lu.%03lu MHz processor.\n",
    2.89 +	       cpu_khz / 1000, cpu_khz % 1000);
    2.90 +}
    2.91 +
    2.92 +static u64 get_nsec_offset(struct shadow_time_info *shadow)
    2.93 +{
    2.94 +	u64 now;
    2.95 +	u32 delta;
    2.96 +	rdtscll(now);
    2.97 +	delta = down_shift(now - shadow->tsc_timestamp, shadow->tsc_shift);
    2.98 +	return mul_frac(delta, shadow->tsc_to_nsec_mul);
    2.99 +}
   2.100 +
   2.101 +static unsigned long get_usec_offset(struct shadow_time_info *shadow)
   2.102 +{
   2.103 +	u64 now;
   2.104 +	u32 delta;
   2.105 +	rdtscll(now);
   2.106 +	delta = down_shift(now - shadow->tsc_timestamp, shadow->tsc_shift);
   2.107 +	return mul_frac(delta, shadow->tsc_to_usec_mul);
   2.108 +}
   2.109 +
   2.110 +static void update_wallclock(void)
   2.111 +{
   2.112 +	shared_info_t *s = HYPERVISOR_shared_info;
   2.113 +	long wtm_nsec;
   2.114 +	time_t wtm_sec, sec;
   2.115 +	s64 nsec;
   2.116 +
   2.117 +	shadow_tv.tv_sec  = s->wc_sec;
   2.118 +	shadow_tv.tv_usec = s->wc_usec;
   2.119 +
   2.120 +	if (INDEPENDENT_WALLCLOCK())
   2.121 +		return;
   2.122 +
   2.123 +	if ((time_status & STA_UNSYNC) != 0)
   2.124 +		return;
   2.125 +
   2.126 +	/* Adjust shadow for jiffies that haven't updated xtime yet. */
   2.127 +	shadow_tv.tv_usec -= 
   2.128 +		(jiffies - wall_jiffies) * (USEC_PER_SEC / HZ);
   2.129 +	HANDLE_USEC_UNDERFLOW(shadow_tv);
   2.130 +
   2.131 +	/* Update our unsynchronised xtime appropriately. */
   2.132 +	sec = shadow_tv.tv_sec;
   2.133 +	nsec = shadow_tv.tv_usec * NSEC_PER_USEC;
   2.134 +
   2.135 +	__normalize_time(&sec, &nsec);
   2.136 +	wtm_sec  = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
   2.137 +	wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
   2.138 +
   2.139 +	set_normalized_timespec(&xtime, sec, nsec);
   2.140 +	set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
   2.141 +}
   2.142 +
   2.143  /*
   2.144   * Reads a consistent set of time-base values from Xen, into a shadow data
   2.145   * area. Must be called with the xtime_lock held for writing.
   2.146   */
   2.147  static void __get_time_values_from_xen(void)
   2.148  {
   2.149 -	shared_info_t *s = HYPERVISOR_shared_info;
   2.150 +	shared_info_t           *s = HYPERVISOR_shared_info;
   2.151 +	struct vcpu_time_info   *src;
   2.152 +	struct shadow_time_info *dst;
   2.153 +
   2.154 +	src = &s->vcpu_time[smp_processor_id()];
   2.155 +	dst = &per_cpu(shadow_time, smp_processor_id());
   2.156  
   2.157  	do {
   2.158 -		shadow_time_version = s->time_version2;
   2.159 +		dst->version = src->time_version2;
   2.160  		rmb();
   2.161 -		shadow_tv.tv_sec    = s->wc_sec;
   2.162 -		shadow_tv.tv_usec   = s->wc_usec;
   2.163 -		shadow_tsc_stamp    = (u32)s->tsc_timestamp;
   2.164 -		shadow_system_time  = s->system_time;
   2.165 +		dst->tsc_timestamp     = src->tsc_timestamp;
   2.166 +		dst->system_timestamp  = src->system_time;
   2.167 +		dst->tsc_to_nsec_mul   = src->tsc_to_system_mul;
   2.168 +		dst->tsc_shift         = src->tsc_shift;
   2.169  		rmb();
   2.170  	}
   2.171 -	while (shadow_time_version != s->time_version1);
   2.172 +	while (dst->version != src->time_version1);
   2.173 +
   2.174 +	dst->tsc_to_usec_mul = dst->tsc_to_nsec_mul / 1000;
   2.175 +
   2.176 +	if ((shadow_tv.tv_sec != s->wc_sec) ||
   2.177 +	    (shadow_tv.tv_usec != s->wc_usec))
   2.178 +		update_wallclock();
   2.179 +}
   2.180  
   2.181 -	cur_timer->mark_offset();
   2.182 +static inline int time_values_up_to_date(int cpu)
   2.183 +{
   2.184 +	struct vcpu_time_info   *src;
   2.185 +	struct shadow_time_info *dst;
   2.186 +
   2.187 +	src = &HYPERVISOR_shared_info->vcpu_time[smp_processor_id()];
   2.188 +	dst = &per_cpu(shadow_time, smp_processor_id());
   2.189 +
   2.190 +	return (dst->version == src->time_version2);
   2.191  }
   2.192  
   2.193  #define TIME_VALUES_UP_TO_DATE \
   2.194 @@ -229,13 +342,18 @@ void do_gettimeofday(struct timeval *tv)
   2.195  	unsigned long max_ntp_tick;
   2.196  	unsigned long flags;
   2.197  	s64 nsec;
   2.198 +	unsigned int cpu;
   2.199 +	struct shadow_time_info *shadow;
   2.200 +
   2.201 +	cpu = get_cpu();
   2.202 +	shadow = &per_cpu(shadow_time, cpu);
   2.203  
   2.204  	do {
   2.205  		unsigned long lost;
   2.206  
   2.207  		seq = read_seqbegin(&xtime_lock);
   2.208  
   2.209 -		usec = cur_timer->get_offset();
   2.210 +		usec = get_usec_offset(shadow);
   2.211  		lost = jiffies - wall_jiffies;
   2.212  
   2.213  		/*
   2.214 @@ -256,11 +374,11 @@ void do_gettimeofday(struct timeval *tv)
   2.215  		sec = xtime.tv_sec;
   2.216  		usec += (xtime.tv_nsec / NSEC_PER_USEC);
   2.217  
   2.218 -		nsec = shadow_system_time - processed_system_time;
   2.219 +		nsec = shadow->system_timestamp - processed_system_time;
   2.220  		__normalize_time(&sec, &nsec);
   2.221  		usec += (long)nsec / NSEC_PER_USEC;
   2.222  
   2.223 -		if (unlikely(!TIME_VALUES_UP_TO_DATE)) {
   2.224 +		if (unlikely(!time_values_up_to_date(cpu))) {
   2.225  			/*
   2.226  			 * We may have blocked for a long time,
   2.227  			 * rendering our calculations invalid
   2.228 @@ -275,21 +393,13 @@ void do_gettimeofday(struct timeval *tv)
   2.229  		}
   2.230  	} while (read_seqretry(&xtime_lock, seq));
   2.231  
   2.232 +	put_cpu();
   2.233 +
   2.234  	while (usec >= USEC_PER_SEC) {
   2.235  		usec -= USEC_PER_SEC;
   2.236  		sec++;
   2.237  	}
   2.238  
   2.239 -	/* Ensure that time-of-day is monotonically increasing. */
   2.240 -	if ((sec < last_seen_tv.tv_sec) ||
   2.241 -	    ((sec == last_seen_tv.tv_sec) && (usec < last_seen_tv.tv_usec))) {
   2.242 -		sec = last_seen_tv.tv_sec;
   2.243 -		usec = last_seen_tv.tv_usec;
   2.244 -	} else {
   2.245 -		last_seen_tv.tv_sec = sec;
   2.246 -		last_seen_tv.tv_usec = usec;
   2.247 -	}
   2.248 -
   2.249  	tv->tv_sec = sec;
   2.250  	tv->tv_usec = usec;
   2.251  }
   2.252 @@ -302,6 +412,8 @@ int do_settimeofday(struct timespec *tv)
   2.253  	long wtm_nsec;
   2.254  	s64 nsec;
   2.255  	struct timespec xentime;
   2.256 +	unsigned int cpu;
   2.257 +	struct shadow_time_info *shadow;
   2.258  
   2.259  	if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
   2.260  		return -EINVAL;
   2.261 @@ -309,6 +421,9 @@ int do_settimeofday(struct timespec *tv)
   2.262  	if (!INDEPENDENT_WALLCLOCK())
   2.263  		return 0; /* Silent failure? */
   2.264  
   2.265 +	cpu = get_cpu();
   2.266 +	shadow = &per_cpu(shadow_time, cpu);
   2.267 +
   2.268  	write_seqlock_irq(&xtime_lock);
   2.269  
   2.270  	/*
   2.271 @@ -317,9 +432,8 @@ int do_settimeofday(struct timespec *tv)
   2.272  	 * be stale, so we can retry with fresh ones.
   2.273  	 */
   2.274   again:
   2.275 -	nsec = (s64)tv->tv_nsec -
   2.276 -	    ((s64)cur_timer->get_offset() * (s64)NSEC_PER_USEC);
   2.277 -	if (unlikely(!TIME_VALUES_UP_TO_DATE)) {
   2.278 +	nsec = (s64)tv->tv_nsec - (s64)get_nsec_offset(shadow);
   2.279 +	if (unlikely(!time_values_up_to_date(cpu))) {
   2.280  		__get_time_values_from_xen();
   2.281  		goto again;
   2.282  	}
   2.283 @@ -335,7 +449,7 @@ int do_settimeofday(struct timespec *tv)
   2.284  	 */
   2.285  	nsec -= (jiffies - wall_jiffies) * TICK_NSEC;
   2.286  
   2.287 -	nsec -= (shadow_system_time - processed_system_time);
   2.288 +	nsec -= (shadow->system_timestamp - processed_system_time);
   2.289  
   2.290  	__normalize_time(&sec, &nsec);
   2.291  	wtm_sec  = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
   2.292 @@ -349,24 +463,21 @@ int do_settimeofday(struct timespec *tv)
   2.293  	time_maxerror = NTP_PHASE_LIMIT;
   2.294  	time_esterror = NTP_PHASE_LIMIT;
   2.295  
   2.296 -	/* Reset all our running time counts. They make no sense now. */
   2.297 -	last_seen_tv.tv_sec = 0;
   2.298 -	last_update_from_xen = 0;
   2.299 -
   2.300  #ifdef CONFIG_XEN_PRIVILEGED_GUEST
   2.301  	if (xen_start_info.flags & SIF_INITDOMAIN) {
   2.302  		dom0_op_t op;
   2.303 -		last_rtc_update = last_update_to_xen = 0;
   2.304  		op.cmd = DOM0_SETTIME;
   2.305  		op.u.settime.secs        = xentime.tv_sec;
   2.306  		op.u.settime.usecs       = xentime.tv_nsec / NSEC_PER_USEC;
   2.307 -		op.u.settime.system_time = shadow_system_time;
   2.308 +		op.u.settime.system_time = shadow->system_timestamp;
   2.309  		write_sequnlock_irq(&xtime_lock);
   2.310  		HYPERVISOR_dom0_op(&op);
   2.311  	} else
   2.312  #endif
   2.313  		write_sequnlock_irq(&xtime_lock);
   2.314  
   2.315 +	put_cpu();
   2.316 +
   2.317  	clock_was_set();
   2.318  	return 0;
   2.319  }
   2.320 @@ -403,10 +514,31 @@ static int set_rtc_mmss(unsigned long no
   2.321   */
   2.322  unsigned long long monotonic_clock(void)
   2.323  {
   2.324 -	return cur_timer->monotonic_clock();
   2.325 +	int cpu = get_cpu();
   2.326 +	struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
   2.327 +	s64 off;
   2.328 +	unsigned long flags;
   2.329 +	
   2.330 +	for ( ; ; ) {
   2.331 +		off = get_nsec_offset(shadow);
   2.332 +		if (time_values_up_to_date(cpu))
   2.333 +			break;
   2.334 +		write_seqlock_irqsave(&xtime_lock, flags);
   2.335 +		__get_time_values_from_xen();
   2.336 +		write_sequnlock_irqrestore(&xtime_lock, flags);
   2.337 +	}
   2.338 +
   2.339 +	put_cpu();
   2.340 +
   2.341 +	return shadow->system_timestamp + off;
   2.342  }
   2.343  EXPORT_SYMBOL(monotonic_clock);
   2.344  
   2.345 +unsigned long long sched_clock(void)
   2.346 +{
   2.347 +	return monotonic_clock();
   2.348 +}
   2.349 +
   2.350  #if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER)
   2.351  unsigned long profile_pc(struct pt_regs *regs)
   2.352  {
   2.353 @@ -427,27 +559,26 @@ EXPORT_SYMBOL(profile_pc);
   2.354  static inline void do_timer_interrupt(int irq, void *dev_id,
   2.355  					struct pt_regs *regs)
   2.356  {
   2.357 -	time_t wtm_sec, sec;
   2.358 -	s64 delta, delta_cpu, nsec;
   2.359 -	long sec_diff, wtm_nsec;
   2.360 +	s64 delta, delta_cpu;
   2.361  	int cpu = smp_processor_id();
   2.362 +	struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
   2.363  
   2.364  	do {
   2.365  		__get_time_values_from_xen();
   2.366  
   2.367 -		delta = delta_cpu = (s64)shadow_system_time +
   2.368 -			((s64)cur_timer->get_offset() * (s64)NSEC_PER_USEC);
   2.369 +		delta = delta_cpu = 
   2.370 +			shadow->system_timestamp + get_nsec_offset(shadow);
   2.371  		delta     -= processed_system_time;
   2.372  		delta_cpu -= per_cpu(processed_system_time, cpu);
   2.373  	}
   2.374 -	while (!TIME_VALUES_UP_TO_DATE);
   2.375 +	while (!time_values_up_to_date(cpu));
   2.376  
   2.377  	if (unlikely(delta < 0) || unlikely(delta_cpu < 0)) {
   2.378  		printk("Timer ISR/%d: Time went backwards: "
   2.379  		       "delta=%lld cpu_delta=%lld shadow=%lld "
   2.380  		       "off=%lld processed=%lld cpu_processed=%lld\n",
   2.381 -		       cpu, delta, delta_cpu, shadow_system_time,
   2.382 -		       ((s64)cur_timer->get_offset() * (s64)NSEC_PER_USEC), 
   2.383 +		       cpu, delta, delta_cpu, shadow->system_timestamp,
   2.384 +		       (s64)get_nsec_offset(shadow),
   2.385  		       processed_system_time,
   2.386  		       per_cpu(processed_system_time, cpu));
   2.387  		for (cpu = 0; cpu < num_online_cpus(); cpu++)
   2.388 @@ -470,76 +601,6 @@ static inline void do_timer_interrupt(in
   2.389  		update_process_times(user_mode(regs));
   2.390  		profile_tick(CPU_PROFILING, regs);
   2.391  	}
   2.392 -
   2.393 -	if (cpu != 0)
   2.394 -		return;
   2.395 -
   2.396 -	/*
   2.397 -	 * Take synchronised time from Xen once a minute if we're not
   2.398 -	 * synchronised ourselves, and we haven't chosen to keep an independent
   2.399 -	 * time base.
   2.400 -	 */
   2.401 -	if (!INDEPENDENT_WALLCLOCK() &&
   2.402 -	    ((time_status & STA_UNSYNC) != 0) &&
   2.403 -	    (xtime.tv_sec > (last_update_from_xen + 60))) {
   2.404 -		/* Adjust shadow for jiffies that haven't updated xtime yet. */
   2.405 -		shadow_tv.tv_usec -= 
   2.406 -			(jiffies - wall_jiffies) * (USEC_PER_SEC / HZ);
   2.407 -		HANDLE_USEC_UNDERFLOW(shadow_tv);
   2.408 -
   2.409 -		/*
   2.410 -		 * Reset our running time counts if they are invalidated by
   2.411 -		 * a warp backwards of more than 500ms.
   2.412 -		 */
   2.413 -		sec_diff = xtime.tv_sec - shadow_tv.tv_sec;
   2.414 -		if (unlikely(abs(sec_diff) > 1) ||
   2.415 -		    unlikely(((sec_diff * USEC_PER_SEC) +
   2.416 -			      (xtime.tv_nsec / NSEC_PER_USEC) -
   2.417 -			      shadow_tv.tv_usec) > 500000)) {
   2.418 -#ifdef CONFIG_XEN_PRIVILEGED_GUEST
   2.419 -			last_rtc_update = last_update_to_xen = 0;
   2.420 -#endif
   2.421 -			last_seen_tv.tv_sec = 0;
   2.422 -		}
   2.423 -
   2.424 -		/* Update our unsynchronised xtime appropriately. */
   2.425 -		sec = shadow_tv.tv_sec;
   2.426 -		nsec = shadow_tv.tv_usec * NSEC_PER_USEC;
   2.427 -
   2.428 -		__normalize_time(&sec, &nsec);
   2.429 -		wtm_sec  = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
   2.430 -		wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
   2.431 -
   2.432 -		set_normalized_timespec(&xtime, sec, nsec);
   2.433 -		set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
   2.434 -
   2.435 -		last_update_from_xen = sec;
   2.436 -	}
   2.437 -
   2.438 -#ifdef CONFIG_XEN_PRIVILEGED_GUEST
   2.439 -	if (!(xen_start_info.flags & SIF_INITDOMAIN))
   2.440 -		return;
   2.441 -
   2.442 -	/* Send synchronised time to Xen approximately every minute. */
   2.443 -	if (((time_status & STA_UNSYNC) == 0) &&
   2.444 -	    (xtime.tv_sec > (last_update_to_xen + 60))) {
   2.445 -		dom0_op_t op;
   2.446 -		struct timeval tv;
   2.447 -
   2.448 -		tv.tv_sec   = xtime.tv_sec;
   2.449 -		tv.tv_usec  = xtime.tv_nsec / NSEC_PER_USEC;
   2.450 -		tv.tv_usec += (jiffies - wall_jiffies) * (USEC_PER_SEC/HZ);
   2.451 -		HANDLE_USEC_OVERFLOW(tv);
   2.452 -
   2.453 -		op.cmd = DOM0_SETTIME;
   2.454 -		op.u.settime.secs        = tv.tv_sec;
   2.455 -		op.u.settime.usecs       = tv.tv_usec;
   2.456 -		op.u.settime.system_time = shadow_system_time;
   2.457 -		HYPERVISOR_dom0_op(&op);
   2.458 -
   2.459 -		last_update_to_xen = xtime.tv_sec;
   2.460 -	}
   2.461 -#endif
   2.462  }
   2.463  
   2.464  /*
   2.465 @@ -731,12 +792,10 @@ void __init time_init(void)
   2.466  	xtime.tv_nsec = shadow_tv.tv_usec * NSEC_PER_USEC;
   2.467  	set_normalized_timespec(&wall_to_monotonic,
   2.468  		-xtime.tv_sec, -xtime.tv_nsec);
   2.469 -	processed_system_time = shadow_system_time;
   2.470 +	processed_system_time = per_cpu(shadow_time, 0).system_timestamp;
   2.471  	per_cpu(processed_system_time, 0) = processed_system_time;
   2.472  
   2.473 -	if (timer_tsc_init.init(NULL) != 0)
   2.474 -		BUG();
   2.475 -	printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name);
   2.476 +	init_cpu_khz();
   2.477  
   2.478  #if defined(__x86_64__)
   2.479  	vxtime.mode = VXTIME_TSC;
   2.480 @@ -807,21 +866,15 @@ void time_suspend(void)
   2.481  /* No locking required. We are only CPU running, and interrupts are off. */
   2.482  void time_resume(void)
   2.483  {
   2.484 -	if (timer_tsc_init.init(NULL) != 0)
   2.485 -		BUG();
   2.486 +	init_cpu_khz();
   2.487  
   2.488  	/* Get timebases for new environment. */ 
   2.489  	__get_time_values_from_xen();
   2.490  
   2.491  	/* Reset our own concept of passage of system time. */
   2.492 -	processed_system_time = shadow_system_time;
   2.493 +	processed_system_time =
   2.494 +		per_cpu(shadow_time, smp_processor_id()).system_timestamp;
   2.495  	per_cpu(processed_system_time, 0) = processed_system_time;
   2.496 -
   2.497 -	/* Accept a warp in UTC (wall-clock) time. */
   2.498 -	last_seen_tv.tv_sec = 0;
   2.499 -
   2.500 -	/* Make sure we resync UTC time with Xen on next timer interrupt. */
   2.501 -	last_update_from_xen = 0;
   2.502  }
   2.503  
   2.504  #ifdef CONFIG_SMP
   2.505 @@ -832,7 +885,8 @@ void local_setup_timer(void)
   2.506  
   2.507  	do {
   2.508  		seq = read_seqbegin(&xtime_lock);
   2.509 -		per_cpu(processed_system_time, cpu) = shadow_system_time;
   2.510 +		per_cpu(processed_system_time, cpu) = 
   2.511 +			per_cpu(shadow_time, cpu).system_timestamp;
   2.512  	} while (read_seqretry(&xtime_lock, seq));
   2.513  
   2.514  	per_cpu(timer_irq, cpu) = bind_virq_to_irq(VIRQ_TIMER);
   2.515 @@ -861,3 +915,13 @@ static int __init xen_sysctl_init(void)
   2.516  	return 0;
   2.517  }
   2.518  __initcall(xen_sysctl_init);
   2.519 +
   2.520 +/*
   2.521 + * Local variables:
   2.522 + *  c-file-style: "linux"
   2.523 + *  indent-tabs-mode: t
   2.524 + *  c-indent-level: 8
   2.525 + *  c-basic-offset: 8
   2.526 + *  tab-width: 8
   2.527 + * End:
   2.528 + */
     3.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile	Sun Jul 17 14:16:21 2005 +0000
     3.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile	Mon Jul 18 20:22:11 2005 +0000
     3.3 @@ -15,7 +15,7 @@ c-obj-y	:= semaphore.o i387.o sys_x86_64
     3.4  		ptrace.o quirks.o syscall.o bootflag.o
     3.5  
     3.6  i386-obj-y			:= time.o
     3.7 -obj-y				+= ../../i386/kernel/timers/
     3.8 +#obj-y				+= ../../i386/kernel/timers/
     3.9  
    3.10  s-obj-y	:=
    3.11  
     4.1 --- a/xen/arch/x86/apic.c	Sun Jul 17 14:16:21 2005 +0000
     4.2 +++ b/xen/arch/x86/apic.c	Mon Jul 18 20:22:11 2005 +0000
     4.3 @@ -723,16 +723,8 @@ void __setup_APIC_LVTT(unsigned int cloc
     4.4  static void __init setup_APIC_timer(unsigned int clocks)
     4.5  {
     4.6      unsigned long flags;
     4.7 -    
     4.8      local_irq_save(flags);
     4.9 -
    4.10 -    /*
    4.11 -     * Wait for IRQ0's slice:
    4.12 -     */
    4.13 -    wait_timer_tick();
    4.14 -
    4.15      __setup_APIC_LVTT(clocks);
    4.16 -
    4.17      local_irq_restore(flags);
    4.18  }
    4.19  
     5.1 --- a/xen/arch/x86/i8259.c	Sun Jul 17 14:16:21 2005 +0000
     5.2 +++ b/xen/arch/x86/i8259.c	Mon Jul 18 20:22:11 2005 +0000
     5.3 @@ -19,7 +19,7 @@
     5.4  #include <asm/bitops.h>
     5.5  #include <xen/delay.h>
     5.6  #include <asm/apic.h>
     5.7 -
     5.8 +#include <io_ports.h>
     5.9  
    5.10  /*
    5.11   * Common place to define all x86 IRQ vectors
    5.12 @@ -395,9 +395,9 @@ void __init init_IRQ(void)
    5.13      /* Set the clock to HZ Hz */
    5.14  #define CLOCK_TICK_RATE 1193180 /* crystal freq (Hz) */
    5.15  #define LATCH (((CLOCK_TICK_RATE)+(HZ/2))/HZ)
    5.16 -    outb_p(0x34,0x43);           /* binary, mode 2, LSB/MSB, ch 0 */
    5.17 -    outb_p(LATCH & 0xff , 0x40); /* LSB */
    5.18 -    outb(LATCH >> 8 , 0x40);     /* MSB */
    5.19 +    outb_p(0x34, PIT_MODE);        /* binary, mode 2, LSB/MSB, ch 0 */
    5.20 +    outb_p(LATCH & 0xff, PIT_CH0); /* LSB */
    5.21 +    outb(LATCH >> 8, PIT_CH0);     /* MSB */
    5.22  
    5.23      setup_irq(2, &cascade);
    5.24  }
     7.1 --- a/xen/arch/x86/smpboot.c	Sun Jul 17 14:16:21 2005 +0000
     7.2 +++ b/xen/arch/x86/smpboot.c	Mon Jul 18 20:22:11 2005 +0000
     7.3 @@ -40,6 +40,7 @@
     7.4  #include <xen/sched.h>
     7.5  #include <xen/irq.h>
     7.6  #include <xen/delay.h>
     7.7 +#include <xen/softirq.h>
     7.8  #include <asm/current.h>
     7.9  #include <asm/mc146818rtc.h>
    7.10  #include <asm/desc.h>
    7.11 @@ -406,6 +407,7 @@ void __init smp_callin(void)
    7.12  	 */
    7.13  	if (cpu_has_tsc && cpu_khz)
    7.14  		synchronize_tsc_ap();
    7.15 +	calibrate_tsc_ap();
    7.16  }
    7.17  
    7.18  int cpucount;
    7.19 @@ -465,6 +467,8 @@ void __init start_secondary(void *unused
    7.20  	/* We can take interrupts now: we're officially "up". */
    7.21  	local_irq_enable();
    7.22  
    7.23 +        init_percpu_time();
    7.24 +
    7.25  	wmb();
    7.26  	startup_cpu_idle_loop();
    7.27  }
    7.28 @@ -1149,6 +1153,7 @@ static void __init smp_boot_cpus(unsigne
    7.29  	 */
    7.30  	if (cpu_has_tsc && cpucount && cpu_khz)
    7.31  		synchronize_tsc_bp();
    7.32 +	calibrate_tsc_bp();
    7.33  }
    7.34  
    7.35  /* These are wrappers to interface to the new boot process.  Someone
    7.36 @@ -1167,22 +1172,21 @@ void __devinit smp_prepare_boot_cpu(void
    7.37  int __devinit __cpu_up(unsigned int cpu)
    7.38  {
    7.39  	/* This only works at boot for x86.  See "rewrite" above. */
    7.40 -	if (cpu_isset(cpu, smp_commenced_mask)) {
    7.41 -		local_irq_enable();
    7.42 +	if (cpu_isset(cpu, smp_commenced_mask))
    7.43  		return -ENOSYS;
    7.44 -	}
    7.45  
    7.46  	/* In case one didn't come up */
    7.47 -	if (!cpu_isset(cpu, cpu_callin_map)) {
    7.48 -		local_irq_enable();
    7.49 +	if (!cpu_isset(cpu, cpu_callin_map))
    7.50  		return -EIO;
    7.51 +
    7.52 +	/* Unleash the CPU! */
    7.53 +	cpu_set(cpu, smp_commenced_mask);
    7.54 +	while (!cpu_isset(cpu, cpu_online_map)) {
    7.55 +		mb();
    7.56 +		if (softirq_pending(0))
    7.57 +			do_softirq();
    7.58  	}
    7.59  
    7.60 -	local_irq_enable();
    7.61 -	/* Unleash the CPU! */
    7.62 -	cpu_set(cpu, smp_commenced_mask);
    7.63 -	while (!cpu_isset(cpu, cpu_online_map))
    7.64 -		mb();
    7.65  	return 0;
    7.66  }
    7.67  
     8.1 --- a/xen/arch/x86/time.c	Sun Jul 17 14:16:21 2005 +0000
     8.2 +++ b/xen/arch/x86/time.c	Mon Jul 18 20:22:11 2005 +0000
     8.3 @@ -1,16 +1,12 @@
     8.4 -/****************************************************************************
     8.5 - * (C) 2002-2003 - Rolf Neugebauer - Intel Research Cambridge
     8.6 - * (C) 2002-2003 University of Cambridge
     8.7 - ****************************************************************************
     8.8 - *
     8.9 - *        File: i386/time.c
    8.10 - *      Author: Rolf Neugebar & Keir Fraser
    8.11 - */
    8.12 -
    8.13 -/*
    8.14 - *  linux/arch/i386/kernel/time.c
    8.15 - *
    8.16 - *  Copyright (C) 1991, 1992, 1995  Linus Torvalds
    8.17 +/******************************************************************************
    8.18 + * arch/x86/time.c
    8.19 + * 
    8.20 + * Per-CPU time calibration and management.
    8.21 + * 
    8.22 + * Copyright (c) 2002-2005, K A Fraser
    8.23 + * 
    8.24 + * Portions from Linux are:
    8.25 + * Copyright (c) 1991, 1992, 1995  Linus Torvalds
    8.26   */
    8.27  
    8.28  #include <xen/config.h>
    8.29 @@ -31,29 +27,74 @@
    8.30  #include <asm/processor.h>
    8.31  #include <asm/fixmap.h>
    8.32  #include <asm/mc146818rtc.h>
    8.33 +#include <asm/div64.h>
    8.34 +#include <io_ports.h>
    8.35  
    8.36 -/* GLOBAL */
    8.37  unsigned long cpu_khz;  /* CPU clock frequency in kHz. */
    8.38  spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED;
    8.39  int timer_ack = 0;
    8.40  unsigned long volatile jiffies;
    8.41 +static unsigned long wc_sec, wc_usec; /* UTC time at last 'time update'. */
    8.42  
    8.43 -/* PRIVATE */
    8.44 -static unsigned int    rdtsc_bitshift;  /* Which 32 bits of TSC do we use?   */
    8.45 -static u64             cpu_freq;        /* CPU frequency (Hz)                */
    8.46 -static u32             st_scale_f;      /* Cycles -> ns, fractional part     */
    8.47 -static u32             st_scale_i;      /* Cycles -> ns, integer part        */
    8.48 -static u32             shifted_tsc_irq; /* CPU0's TSC at last 'time update'  */
    8.49 -static u64             full_tsc_irq;    /* ...ditto, but all 64 bits         */
    8.50 -static s_time_t        stime_irq;       /* System time at last 'time update' */
    8.51 -static unsigned long   wc_sec, wc_usec; /* UTC time at last 'time update'.   */
    8.52 -static rwlock_t        time_lock = RW_LOCK_UNLOCKED;
    8.53 +struct time_scale {
    8.54 +    int shift;
    8.55 +    u32 mul_frac;
    8.56 +};
    8.57 +
    8.58 +struct cpu_time {
    8.59 +    u64 local_tsc_stamp;
    8.60 +    s_time_t stime_local_stamp;
    8.61 +    s_time_t stime_master_stamp;
    8.62 +    struct time_scale tsc_scale;
    8.63 +    struct ac_timer calibration_timer;
    8.64 +} __cacheline_aligned;
    8.65 +
    8.66 +static struct cpu_time cpu_time[NR_CPUS];
    8.67 +
    8.68 +/* Protected by platform_timer_lock. */
    8.69 +static s_time_t stime_platform_stamp;
    8.70 +static u64 platform_timer_stamp;
    8.71 +static struct time_scale platform_timer_scale;
    8.72 +static spinlock_t platform_timer_lock = SPIN_LOCK_UNLOCKED;
    8.73 +
    8.74 +static inline u32 down_shift(u64 time, int shift)
    8.75 +{
    8.76 +    if ( shift < 0 )
    8.77 +        return (u32)(time >> -shift);
    8.78 +    return (u32)((u32)time << shift);
    8.79 +}
    8.80 +
    8.81 +/*
    8.82 + * 32-bit division of integer dividend and integer divisor yielding
    8.83 + * 32-bit fractional quotient.
    8.84 + */
    8.85 +static inline u32 div_frac(u32 dividend, u32 divisor)
    8.86 +{
    8.87 +    u32 quotient, remainder;
    8.88 +    ASSERT(dividend < divisor);
    8.89 +    __asm__ ( 
    8.90 +        "div %4"
    8.91 +        : "=a" (quotient), "=d" (remainder)
    8.92 +        : "0" (0), "1" (dividend), "r" (divisor) );
    8.93 +    return quotient;
    8.94 +}
    8.95 +
    8.96 +/*
    8.97 + * 32-bit multiplication of integer multiplicand and fractional multiplier
    8.98 + * yielding 32-bit integer product.
    8.99 + */
   8.100 +static inline u32 mul_frac(u32 multiplicand, u32 multiplier)
   8.101 +{
   8.102 +    u32 product_int, product_frac;
   8.103 +    __asm__ (
   8.104 +        "mul %3"
   8.105 +        : "=a" (product_frac), "=d" (product_int)
   8.106 +        : "0" (multiplicand), "r" (multiplier) );
   8.107 +    return product_int;
   8.108 +}
   8.109  
   8.110  void timer_interrupt(int irq, void *dev_id, struct cpu_user_regs *regs)
   8.111  {
   8.112 -    write_lock_irq(&time_lock);
   8.113 -
   8.114 -#ifdef CONFIG_X86_IO_APIC
   8.115      if ( timer_ack ) 
   8.116      {
   8.117          extern spinlock_t i8259A_lock;
   8.118 @@ -63,31 +104,10 @@ void timer_interrupt(int irq, void *dev_
   8.119          inb(0x20);
   8.120          spin_unlock(&i8259A_lock);
   8.121      }
   8.122 -#endif
   8.123      
   8.124 -    /*
   8.125 -     * Updates TSC timestamp (used to interpolate passage of time between
   8.126 -     * interrupts).
   8.127 -     */
   8.128 -    rdtscll(full_tsc_irq);
   8.129 -    shifted_tsc_irq = (u32)(full_tsc_irq >> rdtsc_bitshift);
   8.130 -
   8.131      /* Update jiffies counter. */
   8.132      (*(unsigned long *)&jiffies)++;
   8.133  
   8.134 -    /* Update wall time. */
   8.135 -    wc_usec += 1000000/HZ;
   8.136 -    if ( wc_usec >= 1000000 )
   8.137 -    {
   8.138 -        wc_usec -= 1000000;
   8.139 -        wc_sec++;
   8.140 -    }
   8.141 -
   8.142 -    /* Updates system time (nanoseconds since boot). */
   8.143 -    stime_irq += MILLISECS(1000/HZ);
   8.144 -
   8.145 -    write_unlock_irq(&time_lock);
   8.146 -
   8.147      /* Rough hack to allow accurate timers to sort-of-work with no APIC. */
   8.148      if ( !cpu_has_apic )
   8.149          raise_softirq(AC_TIMER_SOFTIRQ);
   8.150 @@ -103,9 +123,9 @@ static struct irqaction irq0 = { timer_i
   8.151  #define CALIBRATE_FRAC  20      /* calibrate over 50ms */
   8.152  #define CALIBRATE_LATCH ((CLOCK_TICK_RATE+(CALIBRATE_FRAC/2))/CALIBRATE_FRAC)
   8.153  
   8.154 -static unsigned long __init calibrate_tsc(void)
   8.155 +static u64 calibrate_boot_tsc(void)
   8.156  {
   8.157 -    u64 start, end, diff;
   8.158 +    u64 start, end;
   8.159      unsigned long count;
   8.160  
   8.161      /* Set the Gate high, disable speaker */
   8.162 @@ -118,9 +138,9 @@ static unsigned long __init calibrate_ts
   8.163       * terminal count mode), binary count, load 5 * LATCH count, (LSB and MSB)
   8.164       * to begin countdown.
   8.165       */
   8.166 -    outb(0xb0, 0x43);           /* binary, mode 0, LSB/MSB, Ch 2 */
   8.167 -    outb(CALIBRATE_LATCH & 0xff, 0x42); /* LSB of count */
   8.168 -    outb(CALIBRATE_LATCH >> 8, 0x42);   /* MSB of count */
   8.169 +    outb(0xb0, PIT_MODE);           /* binary, mode 0, LSB/MSB, Ch 2 */
   8.170 +    outb(CALIBRATE_LATCH & 0xff, PIT_CH2); /* LSB of count */
   8.171 +    outb(CALIBRATE_LATCH >> 8, PIT_CH2);   /* MSB of count */
   8.172  
   8.173      rdtscll(start);
   8.174      for ( count = 0; (inb(0x61) & 0x20) == 0; count++ )
   8.175 @@ -131,15 +151,147 @@ static unsigned long __init calibrate_ts
   8.176      if ( count == 0 )
   8.177          return 0;
   8.178  
   8.179 -    diff = end - start;
   8.180 +    return ((end - start) * (u64)CALIBRATE_FRAC);
   8.181 +}
   8.182 +
   8.183 +static void set_time_scale(struct time_scale *ts, u64 ticks_per_sec)
   8.184 +{
   8.185 +    u64 tps64 = ticks_per_sec;
   8.186 +    u32 tps32;
   8.187 +    int shift = 0;
   8.188 +
   8.189 +    while ( tps64 > (MILLISECS(1000)*2) )
   8.190 +    {
   8.191 +        tps64 >>= 1;
   8.192 +        shift--;
   8.193 +    }
   8.194 +
   8.195 +    tps32 = (u32)tps64;
   8.196 +    while ( tps32 < (u32)MILLISECS(1000) )
   8.197 +    {
   8.198 +        tps32 <<= 1;
   8.199 +        shift++;
   8.200 +    }
   8.201 +
   8.202 +    ts->mul_frac = div_frac(MILLISECS(1000), tps32);
   8.203 +    ts->shift    = shift;
   8.204 +}
   8.205 +
   8.206 +static atomic_t tsc_calibrate_gang = ATOMIC_INIT(0);
   8.207 +static unsigned int tsc_calibrate_status = 0;
   8.208 +
   8.209 +void calibrate_tsc_bp(void)
   8.210 +{
   8.211 +    while ( atomic_read(&tsc_calibrate_gang) != (num_booting_cpus() - 1) )
   8.212 +        mb();
   8.213 +
   8.214 +    outb(CALIBRATE_LATCH & 0xff, PIT_CH2);
   8.215 +    outb(CALIBRATE_LATCH >> 8, PIT_CH2);
   8.216 +
   8.217 +    tsc_calibrate_status = 1;
   8.218 +	wmb();
   8.219 +
   8.220 +    while ( (inb(0x61) & 0x20) == 0 )
   8.221 +        continue;
   8.222 +
   8.223 +    tsc_calibrate_status = 2;
   8.224 +	wmb();
   8.225 +
   8.226 +    while ( atomic_read(&tsc_calibrate_gang) != 0 )
   8.227 +        mb();
   8.228 +}
   8.229 +
   8.230 +void calibrate_tsc_ap(void)
   8.231 +{
   8.232 +    u64 t1, t2, ticks_per_sec;
   8.233 +
   8.234 +    atomic_inc(&tsc_calibrate_gang);
   8.235 +
   8.236 +    while ( tsc_calibrate_status < 1 )
   8.237 +        mb();
   8.238 +
   8.239 +    rdtscll(t1);
   8.240 +
   8.241 +    while ( tsc_calibrate_status < 2 )
   8.242 +        mb();
   8.243 +
   8.244 +    rdtscll(t2);
   8.245 +
   8.246 +    ticks_per_sec = (t2 - t1) * (u64)CALIBRATE_FRAC;
   8.247 +    set_time_scale(&cpu_time[smp_processor_id()].tsc_scale, ticks_per_sec);
   8.248  
   8.249 -#if defined(__i386__)
   8.250 -    /* If quotient doesn't fit in 32 bits then we return error (zero). */
   8.251 -    if ( diff & ~0xffffffffULL )
   8.252 -        return 0;
   8.253 -#endif
   8.254 +    atomic_dec(&tsc_calibrate_gang);
   8.255 +}
   8.256 +
   8.257 +/* Protected by platform_timer_lock. */
   8.258 +static u64 platform_pit_counter;
   8.259 +static u16 pit_stamp;
   8.260 +static struct ac_timer pit_overflow_timer;
   8.261 +
   8.262 +static u16 pit_read_counter(void)
   8.263 +{
   8.264 +    u16 count;
   8.265 +    ASSERT(spin_is_locked(&platform_timer_lock));
   8.266 +    outb(0x80, PIT_MODE);
   8.267 +    count  = inb(PIT_CH2);
   8.268 +    count |= inb(PIT_CH2) << 8;
   8.269 +    return count;
   8.270 +}
   8.271 +
   8.272 +static void pit_overflow(void *unused)
   8.273 +{
   8.274 +    u16 counter;
   8.275 +
   8.276 +    spin_lock(&platform_timer_lock);
   8.277 +    counter = pit_read_counter();
   8.278 +    platform_pit_counter += (u16)(pit_stamp - counter);
   8.279 +    pit_stamp = counter;
   8.280 +    spin_unlock(&platform_timer_lock);
   8.281 +
   8.282 +    set_ac_timer(&pit_overflow_timer, NOW() + MILLISECS(20));
   8.283 +}
   8.284  
   8.285 -    return (unsigned long)diff;
   8.286 +static void init_platform_timer(void)
   8.287 +{
   8.288 +    init_ac_timer(&pit_overflow_timer, pit_overflow, NULL, 0);
   8.289 +    pit_overflow(NULL);
   8.290 +    platform_timer_stamp = platform_pit_counter;
   8.291 +    set_time_scale(&platform_timer_scale, CLOCK_TICK_RATE);
   8.292 +}
   8.293 +
   8.294 +static s_time_t __read_platform_stime(u64 platform_time)
   8.295 +{
   8.296 +    u64 diff64 = platform_time - platform_timer_stamp;
   8.297 +    u32 diff   = down_shift(diff64, platform_timer_scale.shift);
   8.298 +    ASSERT(spin_is_locked(&platform_timer_lock));
   8.299 +    return (stime_platform_stamp + 
   8.300 +            (u64)mul_frac(diff, platform_timer_scale.mul_frac));
   8.301 +}
   8.302 +
   8.303 +static s_time_t read_platform_stime(void)
   8.304 +{
   8.305 +    u64 counter;
   8.306 +    s_time_t stime;
   8.307 +
   8.308 +    spin_lock(&platform_timer_lock);
   8.309 +    counter = platform_pit_counter + (u16)(pit_stamp - pit_read_counter());
   8.310 +    stime   = __read_platform_stime(counter);
   8.311 +    spin_unlock(&platform_timer_lock);
   8.312 +
   8.313 +    return stime;
   8.314 +}
   8.315 +
   8.316 +static void platform_time_calibration(void)
   8.317 +{
   8.318 +    u64 counter;
   8.319 +    s_time_t stamp;
   8.320 +
   8.321 +    spin_lock(&platform_timer_lock);
   8.322 +    counter = platform_pit_counter + (u16)(pit_stamp - pit_read_counter());
   8.323 +    stamp   = __read_platform_stime(counter);
   8.324 +    stime_platform_stamp = stamp;
   8.325 +    platform_timer_stamp = counter;
   8.326 +    spin_unlock(&platform_timer_lock);
   8.327  }
   8.328  
   8.329  
   8.330 @@ -233,141 +385,215 @@ static unsigned long get_cmos_time(void)
   8.331   * System Time
   8.332   ***************************************************************************/
   8.333  
   8.334 -static inline u64 get_time_delta(void)
   8.335 +s_time_t get_s_time(void)
   8.336  {
   8.337 -    s32      delta_tsc;
   8.338 -    u32      low;
   8.339 -    u64      delta, tsc;
   8.340 -
   8.341 -    ASSERT(st_scale_f || st_scale_i);
   8.342 +    struct cpu_time *t = &cpu_time[smp_processor_id()];
   8.343 +    u64 tsc;
   8.344 +    u32 delta;
   8.345 +    s_time_t now;
   8.346  
   8.347      rdtscll(tsc);
   8.348 -    low = (u32)(tsc >> rdtsc_bitshift);
   8.349 -    delta_tsc = (s32)(low - shifted_tsc_irq);
   8.350 -    if ( unlikely(delta_tsc < 0) ) delta_tsc = 0;
   8.351 -    delta = ((u64)delta_tsc * st_scale_f);
   8.352 -    delta >>= 32;
   8.353 -    delta += ((u64)delta_tsc * st_scale_i);
   8.354 -
   8.355 -    return delta;
   8.356 -}
   8.357 -
   8.358 -s_time_t get_s_time(void)
   8.359 -{
   8.360 -    s_time_t now;
   8.361 -    unsigned long flags;
   8.362 +    delta = down_shift(tsc - t->local_tsc_stamp, t->tsc_scale.shift);
   8.363 +    now = t->stime_local_stamp + (u64)mul_frac(delta, t->tsc_scale.mul_frac);
   8.364  
   8.365 -    read_lock_irqsave(&time_lock, flags);
   8.366 -
   8.367 -    now = stime_irq + get_time_delta();
   8.368 -
   8.369 -    /* Ensure that the returned system time is monotonically increasing. */
   8.370 -    {
   8.371 -        static s_time_t prev_now = 0;
   8.372 -        if ( unlikely(now < prev_now) )
   8.373 -            now = prev_now;
   8.374 -        prev_now = now;
   8.375 -    }
   8.376 -
   8.377 -    read_unlock_irqrestore(&time_lock, flags);
   8.378 -
   8.379 -    return now; 
   8.380 +    return now;
   8.381  }
   8.382  
   8.383  static inline void __update_dom_time(struct vcpu *v)
   8.384  {
   8.385 -    struct domain *d  = v->domain;
   8.386 -    shared_info_t *si = d->shared_info;
   8.387 +    struct cpu_time       *t = &cpu_time[smp_processor_id()];
   8.388 +    struct vcpu_time_info *u = &v->domain->shared_info->vcpu_time[v->vcpu_id];
   8.389  
   8.390 -    spin_lock(&d->time_lock);
   8.391 -
   8.392 -    si->time_version1++;
   8.393 +    u->time_version1++;
   8.394      wmb();
   8.395  
   8.396 -    si->cpu_freq       = cpu_freq;
   8.397 -    si->tsc_timestamp  = full_tsc_irq;
   8.398 -    si->system_time    = stime_irq;
   8.399 -    si->wc_sec         = wc_sec;
   8.400 -    si->wc_usec        = wc_usec;
   8.401 +    u->tsc_timestamp     = t->local_tsc_stamp;
   8.402 +    u->system_time       = t->stime_local_stamp;
   8.403 +    u->tsc_to_system_mul = t->tsc_scale.mul_frac;
   8.404 +    u->tsc_shift         = (s8)t->tsc_scale.shift;
   8.405  
   8.406      wmb();
   8.407 -    si->time_version2++;
   8.408 +    u->time_version2++;
   8.409  
   8.410 -    spin_unlock(&d->time_lock);
   8.411 +    /* Should only do this during do_settime(). */
   8.412 +    v->domain->shared_info->wc_sec  = wc_sec;
   8.413 +    v->domain->shared_info->wc_usec = wc_usec;
   8.414  }
   8.415  
   8.416  void update_dom_time(struct vcpu *v)
   8.417  {
   8.418 -    unsigned long flags;
   8.419 -
   8.420 -    if ( v->domain->shared_info->tsc_timestamp != full_tsc_irq )
   8.421 -    {
   8.422 -        read_lock_irqsave(&time_lock, flags);
   8.423 +    if ( v->domain->shared_info->vcpu_time[v->vcpu_id].tsc_timestamp != 
   8.424 +         cpu_time[smp_processor_id()].local_tsc_stamp )
   8.425          __update_dom_time(v);
   8.426 -        read_unlock_irqrestore(&time_lock, flags);
   8.427 -    }
   8.428  }
   8.429  
   8.430  /* Set clock to <secs,usecs> after 00:00:00 UTC, 1 January, 1970. */
   8.431  void do_settime(unsigned long secs, unsigned long usecs, u64 system_time_base)
   8.432  {
   8.433 -    s64 delta;
   8.434 -    long _usecs = (long)usecs;
   8.435 +    u64 x, base_usecs;
   8.436 +    u32 y;
   8.437 +
   8.438 +    base_usecs = system_time_base;
   8.439 +    do_div(base_usecs, 1000);
   8.440 +
   8.441 +    x = (secs * 1000000ULL) + (u64)usecs + base_usecs;
   8.442 +    y = do_div(x, 1000000);
   8.443 +
   8.444 +    wc_sec  = (unsigned long)x;
   8.445 +    wc_usec = (unsigned long)y;
   8.446 +
   8.447 +    __update_dom_time(current);
   8.448 +}
   8.449 +
   8.450 +static void local_time_calibration(void *unused)
   8.451 +{
   8.452 +    unsigned int cpu = smp_processor_id();
   8.453  
   8.454 -    write_lock_irq(&time_lock);
   8.455 +    /*
   8.456 +     * System timestamps, extrapolated from local and master oscillators,
   8.457 +     * taken during this calibration and the previous calibration.
   8.458 +     */
   8.459 +    s_time_t prev_local_stime, curr_local_stime;
   8.460 +    s_time_t prev_master_stime, curr_master_stime;
   8.461 +
   8.462 +    /* TSC timestamps taken during this calibration and prev calibration. */
   8.463 +    u64 prev_tsc, curr_tsc;
   8.464 +
   8.465 +    /*
   8.466 +     * System time and TSC ticks elapsed during the previous calibration
   8.467 +     * 'epoch'. Also the accumulated error in the local estimate. All these
   8.468 +     * values end up down-shifted to fit in 32 bits.
   8.469 +     */
   8.470 +    u64 stime_elapsed64, tsc_elapsed64, local_stime_error64;
   8.471 +    u32 stime_elapsed32, tsc_elapsed32, local_stime_error32;
   8.472 +
   8.473 +    /* Calculated TSC shift to ensure 32-bit scale multiplier. */
   8.474 +    int tsc_shift = 0;
   8.475  
   8.476 -    delta = (s64)(stime_irq - system_time_base);
   8.477 +    prev_tsc          = cpu_time[cpu].local_tsc_stamp;
   8.478 +    prev_local_stime  = cpu_time[cpu].stime_local_stamp;
   8.479 +    prev_master_stime = cpu_time[cpu].stime_master_stamp;
   8.480 +
   8.481 +    /* Disable IRQs to get 'instantaneous' current timestamps. */
   8.482 +    local_irq_disable();
   8.483 +    rdtscll(curr_tsc);
   8.484 +    curr_local_stime  = get_s_time();
   8.485 +    curr_master_stime = read_platform_stime();
   8.486 +    local_irq_enable();
   8.487 +
   8.488 +#if 0
   8.489 +    printk("PRE%d: tsc=%lld stime=%lld master=%lld\n",
   8.490 +           cpu, prev_tsc, prev_local_stime, prev_master_stime);
   8.491 +    printk("CUR%d: tsc=%lld stime=%lld master=%lld %lld\n",
   8.492 +           cpu, curr_tsc, curr_local_stime, curr_master_stime,
   8.493 +           platform_pit_counter);
   8.494 +#endif
   8.495 +
   8.496 +    /* Local time warps forward if it lags behind master time. */
   8.497 +    if ( curr_local_stime < curr_master_stime )
   8.498 +        curr_local_stime = curr_master_stime;
   8.499  
   8.500 -    _usecs += (long)(delta/1000);
   8.501 -    while ( _usecs >= 1000000 ) 
   8.502 +    stime_elapsed64 = curr_master_stime - prev_master_stime;
   8.503 +    tsc_elapsed64   = curr_tsc - prev_tsc;
   8.504 +
   8.505 +    /*
   8.506 +     * Error in the local system time estimate. Clamp to epoch time period, or
   8.507 +     * we could end up with a negative scale factor (time going backwards!).
   8.508 +     * This effectively clamps the scale factor to >= 0.
   8.509 +     */
   8.510 +    local_stime_error64 = curr_local_stime - curr_master_stime;
   8.511 +    if ( local_stime_error64 > stime_elapsed64 )
   8.512 +        local_stime_error64 = stime_elapsed64;
   8.513 +
   8.514 +    /*
   8.515 +     * We require 0 < stime_elapsed < 2^31.
   8.516 +     * This allows us to binary shift a 32-bit tsc_elapsed such that:
   8.517 +     * stime_elapsed < tsc_elapsed <= 2*stime_elapsed
   8.518 +     */
   8.519 +    while ( ((u32)stime_elapsed64 != stime_elapsed64) ||
   8.520 +            ((s32)stime_elapsed64 < 0) )
   8.521      {
   8.522 -        _usecs -= 1000000;
   8.523 -        secs++;
   8.524 +        stime_elapsed64     >>= 1;
   8.525 +        tsc_elapsed64       >>= 1;
   8.526 +        local_stime_error64 >>= 1;
   8.527      }
   8.528  
   8.529 -    wc_sec  = secs;
   8.530 -    wc_usec = _usecs;
   8.531 +    /* stime_master_diff (and hence stime_error) now fit in a 32-bit word. */
   8.532 +    stime_elapsed32     = (u32)stime_elapsed64;
   8.533 +    local_stime_error32 = (u32)local_stime_error64;
   8.534 +
   8.535 +    /* tsc_elapsed <= 2*stime_elapsed */
   8.536 +    while ( tsc_elapsed64 > (stime_elapsed32 * 2) )
   8.537 +    {
   8.538 +        tsc_elapsed64 >>= 1;
   8.539 +        tsc_shift--;
   8.540 +    }
   8.541 +
   8.542 +    /* Local difference must now fit in 32 bits. */
   8.543 +    ASSERT((u32)tsc_elapsed64 == tsc_elapsed64);
   8.544 +    tsc_elapsed32 = (u32)tsc_elapsed64;
   8.545  
   8.546 -    /* Others will pick up the change at the next tick. */
   8.547 -    __update_dom_time(current);
   8.548 -    send_guest_virq(current, VIRQ_TIMER);
   8.549 +    /* tsc_elapsed > stime_elapsed */
   8.550 +    ASSERT(tsc_elapsed32 != 0);
   8.551 +    while ( tsc_elapsed32 <= stime_elapsed32 )
   8.552 +    {
   8.553 +        tsc_elapsed32 <<= 1;
   8.554 +        tsc_shift++;
   8.555 +    }
   8.556  
   8.557 -    write_unlock_irq(&time_lock);
   8.558 +#if 0
   8.559 +    printk("---%d: %08x %d\n", cpu, 
   8.560 +           div_frac(stime_elapsed32 - local_stime_error32, tsc_elapsed32),
   8.561 +           tsc_shift);
   8.562 +#endif
   8.563 +
   8.564 +    /* Record new timestamp information. */
   8.565 +    cpu_time[cpu].tsc_scale.mul_frac = 
   8.566 +        div_frac(stime_elapsed32 - local_stime_error32, tsc_elapsed32);
   8.567 +    cpu_time[cpu].tsc_scale.shift    = tsc_shift;
   8.568 +    cpu_time[cpu].local_tsc_stamp    = curr_tsc;
   8.569 +    cpu_time[cpu].stime_local_stamp  = curr_local_stime;
   8.570 +    cpu_time[cpu].stime_master_stamp = curr_master_stime;
   8.571 +
   8.572 +    set_ac_timer(&cpu_time[cpu].calibration_timer, NOW() + MILLISECS(1000));
   8.573 +
   8.574 +    if ( cpu == 0 )
   8.575 +        platform_time_calibration();
   8.576  }
   8.577  
   8.578 +void init_percpu_time(void)
   8.579 +{
   8.580 +    unsigned int cpu = smp_processor_id();
   8.581 +    unsigned long flags;
   8.582 +    s_time_t now;
   8.583 +
   8.584 +    local_irq_save(flags);
   8.585 +    rdtscll(cpu_time[cpu].local_tsc_stamp);
   8.586 +    now = (cpu == 0) ? 0 : read_platform_stime();
   8.587 +    local_irq_restore(flags);
   8.588 +
   8.589 +    cpu_time[cpu].stime_master_stamp = now;
   8.590 +    cpu_time[cpu].stime_local_stamp  = now;
   8.591 +
   8.592 +    init_ac_timer(&cpu_time[cpu].calibration_timer,
   8.593 +                  local_time_calibration, NULL, cpu);
   8.594 +    set_ac_timer(&cpu_time[cpu].calibration_timer, NOW() + MILLISECS(1000));
   8.595 +}
   8.596  
   8.597  /* Late init function (after all CPUs are booted). */
   8.598 -int __init init_xen_time()
   8.599 +int __init init_xen_time(void)
   8.600  {
   8.601 -    u64      scale;
   8.602 -    unsigned int cpu_ghz;
   8.603 -
   8.604 -    cpu_ghz = (unsigned int)(cpu_freq / 1000000000ULL);
   8.605 -    for ( rdtsc_bitshift = 0; cpu_ghz != 0; rdtsc_bitshift++, cpu_ghz >>= 1 )
   8.606 -        continue;
   8.607 -
   8.608 -    scale  = 1000000000LL << (32 + rdtsc_bitshift);
   8.609 -    scale /= cpu_freq;
   8.610 -    st_scale_f = scale & 0xffffffff;
   8.611 -    st_scale_i = scale >> 32;
   8.612 +    wc_sec = get_cmos_time();
   8.613  
   8.614      local_irq_disable();
   8.615  
   8.616 -    /* System time ticks from zero. */
   8.617 -    rdtscll(full_tsc_irq);
   8.618 -    stime_irq = (s_time_t)0;
   8.619 -    shifted_tsc_irq = (u32)(full_tsc_irq >> rdtsc_bitshift);
   8.620 +    init_percpu_time();
   8.621  
   8.622 -    /* Wallclock time starts as the initial RTC time. */
   8.623 -    wc_sec = get_cmos_time();
   8.624 +    stime_platform_stamp = 0;
   8.625 +    init_platform_timer();
   8.626  
   8.627      local_irq_enable();
   8.628  
   8.629 -    printk("Time init:\n");
   8.630 -    printk(".... cpu_freq:    %08X:%08X\n", (u32)(cpu_freq>>32),(u32)cpu_freq);
   8.631 -    printk(".... scale:       %08X:%08X\n", (u32)(scale>>32),(u32)scale);
   8.632 -    printk(".... Wall Clock:  %lds %ldus\n", wc_sec, wc_usec);
   8.633 -
   8.634      return 0;
   8.635  }
   8.636  
   8.637 @@ -375,15 +601,12 @@ int __init init_xen_time()
   8.638  /* Early init function. */
   8.639  void __init early_time_init(void)
   8.640  {
   8.641 -    unsigned long ticks_per_frac = calibrate_tsc();
   8.642 -
   8.643 -    if ( !ticks_per_frac )
   8.644 -        panic("Error calibrating TSC\n");
   8.645 +    u64 tmp = calibrate_boot_tsc();
   8.646  
   8.647 -    cpu_khz = ticks_per_frac / (1000/CALIBRATE_FRAC);
   8.648 +    set_time_scale(&cpu_time[0].tsc_scale, tmp);
   8.649  
   8.650 -    cpu_freq = (u64)ticks_per_frac * (u64)CALIBRATE_FRAC;
   8.651 -
   8.652 +    do_div(tmp, 1000);
   8.653 +    cpu_khz = (unsigned long)tmp;
   8.654      printk("Detected %lu.%03lu MHz processor.\n", 
   8.655             cpu_khz / 1000, cpu_khz % 1000);
   8.656  
     9.1 --- a/xen/arch/x86/vmx_intercept.c	Sun Jul 17 14:16:21 2005 +0000
     9.2 +++ b/xen/arch/x86/vmx_intercept.c	Mon Jul 18 20:22:11 2005 +0000
     9.3 @@ -24,10 +24,10 @@
     9.4  #include <asm/vmx_virpit.h>
     9.5  #include <asm/vmx_intercept.h>
     9.6  #include <public/io/ioreq.h>
     9.7 -
     9.8  #include <xen/lib.h>
     9.9  #include <xen/sched.h>
    9.10  #include <asm/current.h>
    9.11 +#include <io_ports.h>
    9.12  
    9.13  #ifdef CONFIG_VMX
    9.14  
    9.15 @@ -175,7 +175,7 @@ int intercept_pit_io(ioreq_t *p)
    9.16          p->port_mm)
    9.17          return 0;
    9.18      
    9.19 -    if (p->addr == 0x43 &&
    9.20 +    if (p->addr == PIT_MODE &&
    9.21  	p->dir == 0 &&				/* write */
    9.22          ((p->u.data >> 4) & 0x3) == 0 &&	/* latch command */
    9.23          ((p->u.data >> 6) & 0x3) == (vpit->channel)) {/* right channel */
    9.24 @@ -183,7 +183,7 @@ int intercept_pit_io(ioreq_t *p)
    9.25  	return 1;
    9.26      }
    9.27  
    9.28 -    if (p->addr == (0x40 + vpit->channel) &&
    9.29 +    if (p->addr == (PIT_CH0 + vpit->channel) &&
    9.30  	p->dir == 1) {	/* read */
    9.31          p->u.data = pit_read_io(vpit);
    9.32          resume_pit_io(p);
    10.1 --- a/xen/common/ac_timer.c	Sun Jul 17 14:16:21 2005 +0000
    10.2 +++ b/xen/common/ac_timer.c	Mon Jul 18 20:22:11 2005 +0000
    10.3 @@ -202,7 +202,7 @@ static void ac_timer_softirq_action(void
    10.4      do {
    10.5          heap = ac_timers[cpu].heap;
    10.6          now  = NOW();
    10.7 -        
    10.8 +
    10.9          while ( (GET_HEAP_SIZE(heap) != 0) &&
   10.10                  ((t = heap[1])->expires < (now + TIMER_SLOP)) )
   10.11          {
    11.1 --- a/xen/common/domain.c	Sun Jul 17 14:16:21 2005 +0000
    11.2 +++ b/xen/common/domain.c	Mon Jul 18 20:22:11 2005 +0000
    11.3 @@ -42,8 +42,6 @@ struct domain *do_createdomain(domid_t d
    11.4      d->domain_id   = dom_id;
    11.5      v->processor  = cpu;
    11.6   
    11.7 -    spin_lock_init(&d->time_lock);
    11.8 -
    11.9      spin_lock_init(&d->big_lock);
   11.10  
   11.11      spin_lock_init(&d->page_alloc_lock);
    12.1 --- a/xen/common/page_alloc.c	Sun Jul 17 14:16:21 2005 +0000
    12.2 +++ b/xen/common/page_alloc.c	Mon Jul 18 20:22:11 2005 +0000
    12.3 @@ -351,10 +351,10 @@ void free_heap_pages(
    12.4  void scrub_heap_pages(void)
    12.5  {
    12.6      void *p;
    12.7 -    unsigned long pfn, flags;
    12.8 +    unsigned long pfn;
    12.9 +    int cpu = smp_processor_id();
   12.10  
   12.11      printk("Scrubbing Free RAM: ");
   12.12 -    watchdog_disable();
   12.13  
   12.14      for ( pfn = 0; pfn < (bitmap_size * 8); pfn++ )
   12.15      {
   12.16 @@ -362,12 +362,15 @@ void scrub_heap_pages(void)
   12.17          if ( (pfn % ((100*1024*1024)/PAGE_SIZE)) == 0 )
   12.18              printk(".");
   12.19  
   12.20 +        if ( unlikely(softirq_pending(cpu)) )
   12.21 +            do_softirq();
   12.22 +
   12.23          /* Quick lock-free check. */
   12.24          if ( allocated_in_map(pfn) )
   12.25              continue;
   12.26 -        
   12.27 -        spin_lock_irqsave(&heap_lock, flags);
   12.28 -        
   12.29 +
   12.30 +        spin_lock_irq(&heap_lock);
   12.31 +
   12.32          /* Re-check page status with lock held. */
   12.33          if ( !allocated_in_map(pfn) )
   12.34          {
   12.35 @@ -385,11 +388,10 @@ void scrub_heap_pages(void)
   12.36                  unmap_domain_page(p);
   12.37              }
   12.38          }
   12.39 -        
   12.40 -        spin_unlock_irqrestore(&heap_lock, flags);
   12.41 +
   12.42 +        spin_unlock_irq(&heap_lock);
   12.43      }
   12.44  
   12.45 -    watchdog_enable();
   12.46      printk("done.\n");
   12.47  }
   12.48  
    13.1 --- a/xen/drivers/char/console.c	Sun Jul 17 14:16:21 2005 +0000
    13.2 +++ b/xen/drivers/char/console.c	Mon Jul 18 20:22:11 2005 +0000
    13.3 @@ -635,8 +635,6 @@ static int __init debugtrace_init(void)
    13.4  
    13.5      debugtrace_bytes = bytes;
    13.6  
    13.7 -    memset(debugtrace_buf, '\0', debugtrace_bytes);
    13.8 -
    13.9      return 0;
   13.10  }
   13.11  __initcall(debugtrace_init);
    14.1 --- a/xen/include/asm-x86/time.h	Sun Jul 17 14:16:21 2005 +0000
    14.2 +++ b/xen/include/asm-x86/time.h	Mon Jul 18 20:22:11 2005 +0000
    14.3 @@ -4,4 +4,7 @@
    14.4  
    14.5  extern int timer_ack;
    14.6  
    14.7 +extern void calibrate_tsc_bp(void);
    14.8 +extern void calibrate_tsc_ap(void);
    14.9 +
   14.10  #endif /* __X86_TIME_H__ */
    15.1 --- a/xen/include/public/xen.h	Sun Jul 17 14:16:21 2005 +0000
    15.2 +++ b/xen/include/public/xen.h	Mon Jul 18 20:22:11 2005 +0000
    15.3 @@ -329,6 +329,28 @@ typedef struct vcpu_info {
    15.4  #endif
    15.5  } vcpu_info_t;
    15.6  
    15.7 +typedef struct vcpu_time_info {
    15.8 +    /*
    15.9 +     * The following values are updated periodically (and not necessarily
   15.10 +     * atomically!). The guest OS detects this because 'time_version1' is
   15.11 +     * incremented just before updating these values, and 'time_version2' is
   15.12 +     * incremented immediately after. See the Xen-specific Linux code for an
   15.13 +     * example of how to read these values safely (arch/xen/kernel/time.c).
   15.14 +     */
   15.15 +    u32 time_version1;
   15.16 +    u32 time_version2;
   15.17 +    u64 tsc_timestamp;   /* TSC at last update of time vals.  */
   15.18 +    u64 system_time;     /* Time, in nanosecs, since boot.    */
   15.19 +    /*
   15.20 +     * Current system time:
   15.21 +     *   system_time + ((tsc - tsc_timestamp) << tsc_shift) * tsc_to_system_mul
   15.22 +     * CPU frequency (Hz):
   15.23 +     *   ((10^9 << 32) / tsc_to_system_mul) >> tsc_shift
   15.24 +     */
   15.25 +    u32 tsc_to_system_mul;
   15.26 +    s8  tsc_shift;
   15.27 +} vcpu_time_info_t;
   15.28 +
   15.29  /*
   15.30   * Xen/kernel shared data -- pointer provided in start_info.
   15.31   * NB. We expect that this struct is smaller than a page.
   15.32 @@ -336,6 +358,8 @@ typedef struct vcpu_info {
   15.33  typedef struct shared_info {
   15.34      vcpu_info_t vcpu_data[MAX_VIRT_CPUS];
   15.35  
   15.36 +    vcpu_time_info_t vcpu_time[MAX_VIRT_CPUS];
   15.37 +
   15.38      u32 n_vcpu;
   15.39  
   15.40      /*
   15.41 @@ -373,33 +397,11 @@ typedef struct shared_info {
   15.42      u32 evtchn_mask[32];
   15.43  
   15.44      /*
   15.45 -     * Time: The following abstractions are exposed: System Time, Clock Time,
   15.46 -     * Domain Virtual Time. Domains can access Cycle counter time directly.
   15.47 +     * Wallclock time: updated only by control software. Guests should base
   15.48 +     * their gettimeofday() syscall on this wallclock-base value.
   15.49       */
   15.50 -    u64                cpu_freq;        /* CPU frequency (Hz).          */
   15.51 -
   15.52 -    /*
   15.53 -     * The following values are updated periodically (and not necessarily
   15.54 -     * atomically!). The guest OS detects this because 'time_version1' is
   15.55 -     * incremented just before updating these values, and 'time_version2' is
   15.56 -     * incremented immediately after. See the Xen-specific Linux code for an
   15.57 -     * example of how to read these values safely (arch/xen/kernel/time.c).
   15.58 -     */
   15.59 -    u32                time_version1;
   15.60 -    u32                time_version2;
   15.61 -    tsc_timestamp_t    tsc_timestamp;   /* TSC at last update of time vals.  */
   15.62 -    u64                system_time;     /* Time, in nanosecs, since boot.    */
   15.63      u32                wc_sec;          /* Secs  00:00:00 UTC, Jan 1, 1970.  */
   15.64      u32                wc_usec;         /* Usecs 00:00:00 UTC, Jan 1, 1970.  */
   15.65 -    u64                domain_time;     /* Domain virtual time, in nanosecs. */
   15.66 -
   15.67 -    /*
   15.68 -     * Timeout values:
   15.69 -     * Allow a domain to specify a timeout value in system time and 
   15.70 -     * domain virtual time.
   15.71 -     */
   15.72 -    u64                wall_timeout;
   15.73 -    u64                domain_timeout;
   15.74  
   15.75      arch_shared_info_t arch;
   15.76  
    16.1 --- a/xen/include/xen/sched.h	Sun Jul 17 14:16:21 2005 +0000
    16.2 +++ b/xen/include/xen/sched.h	Mon Jul 18 20:22:11 2005 +0000
    16.3 @@ -92,7 +92,6 @@ struct domain
    16.4      domid_t          domain_id;
    16.5  
    16.6      shared_info_t   *shared_info;     /* shared data area */
    16.7 -    spinlock_t       time_lock;
    16.8  
    16.9      spinlock_t       big_lock;
   16.10  
    17.1 --- a/xen/include/xen/time.h	Sun Jul 17 14:16:21 2005 +0000
    17.2 +++ b/xen/include/xen/time.h	Mon Jul 18 20:22:11 2005 +0000
    17.3 @@ -30,7 +30,8 @@
    17.4  #include <public/xen.h>
    17.5  #include <asm/time.h>
    17.6  
    17.7 -extern int init_xen_time();
    17.8 +extern int init_xen_time(void);
    17.9 +extern void init_percpu_time(void);
   17.10  
   17.11  extern unsigned long cpu_khz;
   17.12