direct-io.hg

changeset 4475:ab31a05fc9e6

bitkeeper revision 1.1236.58.1 (42527f89zgr9JJ0KbdZkIWCfo_KBfA)

SMP timer and irq fixes for 2.6. Merge x86/64 time.c with i386.
Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Tue Apr 05 12:07:37 2005 +0000 (2005-04-05)
parents 38d7ac8f210f
children a474e06bc485
files .rootkeys linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smpboot.c linux-2.6.11-xen-sparse/arch/xen/i386/kernel/time.c linux-2.6.11-xen-sparse/arch/xen/i386/mm/hypervisor.c linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/Makefile linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/time.c
line diff
     1.1 --- a/.rootkeys	Tue Apr 05 10:41:39 2005 +0000
     1.2 +++ b/.rootkeys	Tue Apr 05 12:07:37 2005 +0000
     1.3 @@ -282,7 +282,6 @@ 424efaa7DIVTR1U4waPGHucha9Xilg linux-2.6
     1.4  424efaa6L1lrzwCIadTNxogSvljFwg linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/signal.c
     1.5  424efaa61XzweJyW3v5Lb9egpe3rtw linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/smp.c
     1.6  424efaa778MkpdkAIq0An1FjQENN_Q linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/smpboot.c
     1.7 -424efaa7vzbNdhwhkQPhs1V7LrAH4Q linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/time.c
     1.8  424efaa7szEu90xkjpXk5TufZxxa4g linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/traps.c
     1.9  424efaa6sJsuHdGIGxm0r-ugsss3OQ linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/vsyscall.c
    1.10  424efaa6xbX9LkKyaXvgbL9s_39Trw linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/x8664_ksyms.c
     2.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smpboot.c	Tue Apr 05 10:41:39 2005 +0000
     2.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smpboot.c	Tue Apr 05 12:07:37 2005 +0000
     2.3 @@ -437,21 +437,23 @@ void __init smp_callin(void)
     2.4  int cpucount;
     2.5  
     2.6  
     2.7 -static irqreturn_t local_debug_interrupt(int irq, void *dev_id,
     2.8 -					 struct pt_regs *regs)
     2.9 +static irqreturn_t ldebug_interrupt(
    2.10 +	int irq, void *dev_id, struct pt_regs *regs)
    2.11  {
    2.12 -
    2.13  	return IRQ_HANDLED;
    2.14  }
    2.15  
    2.16 -static struct irqaction local_irq_debug = {
    2.17 -	local_debug_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "ldebug",
    2.18 -	NULL, NULL
    2.19 -};
    2.20 +static DEFINE_PER_CPU(int, ldebug_irq);
    2.21 +static char ldebug_name[NR_IRQS][15];
    2.22  
    2.23 -void local_setup_debug(void)
    2.24 +void ldebug_setup(void)
    2.25  {
    2.26 -	(void)setup_irq(bind_virq_to_irq(VIRQ_DEBUG), &local_irq_debug);
    2.27 +	int cpu = smp_processor_id();
    2.28 +
    2.29 +	per_cpu(ldebug_irq, cpu) = bind_virq_to_irq(VIRQ_DEBUG);
    2.30 +	sprintf(ldebug_name[cpu], "ldebug%d", cpu);
    2.31 +	BUG_ON(request_irq(per_cpu(ldebug_irq, cpu), ldebug_interrupt,
    2.32 +	                   SA_INTERRUPT, ldebug_name[cpu], NULL));
    2.33  }
    2.34  
    2.35  
    2.36 @@ -472,7 +474,7 @@ static int __init start_secondary(void *
    2.37  	while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
    2.38  		rep_nop();
    2.39  	local_setup_timer();
    2.40 -	local_setup_debug();	/* XXX */
    2.41 +	ldebug_setup();
    2.42  	smp_intr_init();
    2.43  	local_irq_enable();
    2.44  	/*
    2.45 @@ -1329,36 +1331,27 @@ void __init smp_cpus_done(unsigned int m
    2.46  }
    2.47  
    2.48  extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *);
    2.49 -
    2.50 -static struct irqaction reschedule_irq = {
    2.51 -	smp_reschedule_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "reschedule",
    2.52 -	NULL, NULL
    2.53 -};
    2.54 -
    2.55 -extern irqreturn_t smp_invalidate_interrupt(int, void *, struct pt_regs *);
    2.56 -
    2.57 -static struct irqaction invalidate_irq = {
    2.58 -	smp_invalidate_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "invalidate",
    2.59 -	NULL, NULL
    2.60 -};
    2.61 -
    2.62  extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *);
    2.63  
    2.64 -static struct irqaction call_function_irq = {
    2.65 -	smp_call_function_interrupt, SA_INTERRUPT, CPU_MASK_NONE,
    2.66 -	"call_function", NULL, NULL
    2.67 -};
    2.68 +static DEFINE_PER_CPU(int, resched_irq);
    2.69 +static DEFINE_PER_CPU(int, callfunc_irq);
    2.70 +static char resched_name[NR_IRQS][15];
    2.71 +static char callfunc_name[NR_IRQS][15];
    2.72  
    2.73  void __init smp_intr_init(void)
    2.74  {
    2.75 +	int cpu = smp_processor_id();
    2.76  
    2.77 -	(void)setup_irq(
    2.78 -	    bind_ipi_on_cpu_to_irq(smp_processor_id(), RESCHEDULE_VECTOR),
    2.79 -	    &reschedule_irq);
    2.80 -	(void)setup_irq(
    2.81 -	    bind_ipi_on_cpu_to_irq(smp_processor_id(), INVALIDATE_TLB_VECTOR),
    2.82 -	    &invalidate_irq);
    2.83 -	(void)setup_irq(
    2.84 -	    bind_ipi_on_cpu_to_irq(smp_processor_id(), CALL_FUNCTION_VECTOR),
    2.85 -	    &call_function_irq);
    2.86 +	per_cpu(resched_irq, cpu) =
    2.87 +		bind_ipi_on_cpu_to_irq(cpu, RESCHEDULE_VECTOR);
    2.88 +	sprintf(resched_name[cpu], "resched%d", cpu);
    2.89 +	BUG_ON(request_irq(per_cpu(resched_irq, cpu), smp_reschedule_interrupt,
    2.90 +	                   SA_INTERRUPT, resched_name[cpu], NULL));
    2.91 +
    2.92 +	per_cpu(callfunc_irq, cpu) =
    2.93 +		bind_ipi_on_cpu_to_irq(cpu, CALL_FUNCTION_VECTOR);
    2.94 +	sprintf(callfunc_name[cpu], "callfunc%d", cpu);
    2.95 +	BUG_ON(request_irq(per_cpu(callfunc_irq, cpu),
    2.96 +	                   smp_call_function_interrupt,
    2.97 +	                   SA_INTERRUPT, callfunc_name[cpu], NULL));
    2.98  }
     3.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/time.c	Tue Apr 05 10:41:39 2005 +0000
     3.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/time.c	Tue Apr 05 12:07:37 2005 +0000
     3.3 @@ -77,6 +77,15 @@ u64 jiffies_64 = INITIAL_JIFFIES;
     3.4  
     3.5  EXPORT_SYMBOL(jiffies_64);
     3.6  
     3.7 +#if defined(__x86_64__)
     3.8 +unsigned long vxtime_hz = PIT_TICK_RATE;
     3.9 +struct vxtime_data __vxtime __section_vxtime;   /* for vsyscalls */
    3.10 +volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
    3.11 +unsigned long __wall_jiffies __section_wall_jiffies = INITIAL_JIFFIES;
    3.12 +struct timespec __xtime __section_xtime;
    3.13 +struct timezone __sys_tz __section_sys_tz;
    3.14 +#endif
    3.15 +
    3.16  unsigned long cpu_khz;	/* Detected as we calibrate the TSC */
    3.17  
    3.18  extern unsigned long wall_jiffies;
    3.19 @@ -111,8 +120,8 @@ static long last_rtc_update, last_update
    3.20  static long last_update_from_xen;   /* UTC seconds when last read Xen clock. */
    3.21  
    3.22  /* Keep track of last time we did processing/updating of jiffies and xtime. */
    3.23 -u64 processed_system_time;   /* System time (ns) at last processing. */
    3.24 -DEFINE_PER_CPU(u64, processed_system_time);
    3.25 +static u64 processed_system_time;   /* System time (ns) at last processing. */
    3.26 +static DEFINE_PER_CPU(u64, processed_system_time);
    3.27  
    3.28  #define NS_PER_TICK (1000000000ULL/HZ)
    3.29  
    3.30 @@ -379,37 +388,49 @@ static inline void do_timer_interrupt(in
    3.31  					struct pt_regs *regs)
    3.32  {
    3.33  	time_t wtm_sec, sec;
    3.34 -	s64 delta, nsec;
    3.35 +	s64 delta, delta_cpu, nsec;
    3.36  	long sec_diff, wtm_nsec;
    3.37 +	int cpu = smp_processor_id();
    3.38  
    3.39  	do {
    3.40  		__get_time_values_from_xen();
    3.41  
    3.42 -		delta = (s64)(shadow_system_time +
    3.43 -			      ((s64)cur_timer->get_offset() * 
    3.44 -			       (s64)NSEC_PER_USEC) -
    3.45 -			      processed_system_time);
    3.46 +		delta = delta_cpu = (s64)shadow_system_time +
    3.47 +			((s64)cur_timer->get_offset() * (s64)NSEC_PER_USEC);
    3.48 +		delta     -= processed_system_time;
    3.49 +		delta_cpu -= per_cpu(processed_system_time, cpu);
    3.50  	}
    3.51  	while (!TIME_VALUES_UP_TO_DATE);
    3.52  
    3.53 -	if (unlikely(delta < 0)) {
    3.54 -		printk("Timer ISR: Time went backwards: %lld %lld %lld %lld\n",
    3.55 -		       delta, shadow_system_time,
    3.56 +	if (unlikely(delta < 0) || unlikely(delta_cpu < 0)) {
    3.57 +		printk("Timer ISR/%d: Time went backwards: "
    3.58 +		       "delta=%lld cpu_delta=%lld shadow=%lld "
    3.59 +		       "off=%lld processed=%lld cpu_processed=%lld\n",
    3.60 +		       cpu, delta, delta_cpu, shadow_system_time,
    3.61  		       ((s64)cur_timer->get_offset() * (s64)NSEC_PER_USEC), 
    3.62 -		       processed_system_time);
    3.63 +		       processed_system_time,
    3.64 +		       per_cpu(processed_system_time, cpu));
    3.65  		return;
    3.66  	}
    3.67  
    3.68 -	/* Process elapsed jiffies since last call. */
    3.69 +	/* System-wide jiffy work. */
    3.70  	while (delta >= NS_PER_TICK) {
    3.71  		delta -= NS_PER_TICK;
    3.72  		processed_system_time += NS_PER_TICK;
    3.73  		do_timer(regs);
    3.74 +	}
    3.75 +
    3.76 +	/* Local CPU jiffy work. */
    3.77 +	while (delta_cpu >= NS_PER_TICK) {
    3.78 +		delta_cpu -= NS_PER_TICK;
    3.79 +		per_cpu(processed_system_time, cpu) += NS_PER_TICK;
    3.80  		update_process_times(user_mode(regs));
    3.81 -		if (regs)
    3.82 -			profile_tick(CPU_PROFILING, regs);
    3.83 +		profile_tick(CPU_PROFILING, regs);
    3.84  	}
    3.85  
    3.86 +	if (cpu != 0)
    3.87 +		return;
    3.88 +
    3.89  	/*
    3.90  	 * Take synchronised time from Xen once a minute if we're not
    3.91  	 * synchronised ourselves, and we haven't chosen to keep an independent
    3.92 @@ -617,10 +638,10 @@ void __init hpet_time_init(void)
    3.93  #endif
    3.94  
    3.95  /* Dynamically-mapped IRQ. */
    3.96 -static int TIMER_IRQ;
    3.97 +static DEFINE_PER_CPU(int, timer_irq);
    3.98  
    3.99  static struct irqaction irq_timer = {
   3.100 -	timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "timer",
   3.101 +	timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "timer0",
   3.102  	NULL, NULL
   3.103  };
   3.104  
   3.105 @@ -642,14 +663,23 @@ void __init time_init(void)
   3.106  	set_normalized_timespec(&wall_to_monotonic,
   3.107  		-xtime.tv_sec, -xtime.tv_nsec);
   3.108  	processed_system_time = shadow_system_time;
   3.109 +	per_cpu(processed_system_time, 0) = processed_system_time;
   3.110  
   3.111  	if (timer_tsc_init.init(NULL) != 0)
   3.112  		BUG();
   3.113  	printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name);
   3.114  
   3.115 -	TIMER_IRQ = bind_virq_to_irq(VIRQ_TIMER);
   3.116 +#if defined(__x86_64__)
   3.117 +	vxtime.mode = VXTIME_TSC;
   3.118 +	vxtime.quot = (1000000L << 32) / vxtime_hz;
   3.119 +	vxtime.tsc_quot = (1000L << 32) / cpu_khz;
   3.120 +	vxtime.hz = vxtime_hz;
   3.121 +	sync_core();
   3.122 +	rdtscll(vxtime.last_tsc);
   3.123 +#endif
   3.124  
   3.125 -	(void)setup_irq(TIMER_IRQ, &irq_timer);
   3.126 +	per_cpu(timer_irq, 0) = bind_virq_to_irq(VIRQ_TIMER);
   3.127 +	(void)setup_irq(per_cpu(timer_irq, 0), &irq_timer);
   3.128  }
   3.129  
   3.130  /* Convert jiffies to system time. Call with xtime_lock held for reading. */
   3.131 @@ -719,6 +749,7 @@ void time_resume(void)
   3.132  
   3.133  	/* Reset our own concept of passage of system time. */
   3.134  	processed_system_time = shadow_system_time;
   3.135 +	per_cpu(processed_system_time, 0) = processed_system_time;
   3.136  
   3.137  	/* Accept a warp in UTC (wall-clock) time. */
   3.138  	last_seen_tv.tv_sec = 0;
   3.139 @@ -728,63 +759,20 @@ void time_resume(void)
   3.140  }
   3.141  
   3.142  #ifdef CONFIG_SMP
   3.143 -
   3.144 -static irqreturn_t local_timer_interrupt(int irq, void *dev_id,
   3.145 -					 struct pt_regs *regs)
   3.146 +static char timer_name[NR_IRQS][15];
   3.147 +void local_setup_timer(void)
   3.148  {
   3.149 -	s64 delta;
   3.150 -	int cpu = smp_processor_id();
   3.151 +	int seq, cpu = smp_processor_id();
   3.152  
   3.153  	do {
   3.154 -		__get_time_values_from_xen();
   3.155 -
   3.156 -		delta = (s64)(shadow_system_time +
   3.157 -			      ((s64)cur_timer->get_offset() * 
   3.158 -			       (s64)NSEC_PER_USEC) -
   3.159 -			      per_cpu(processed_system_time, cpu));
   3.160 -	}
   3.161 -	while (!TIME_VALUES_UP_TO_DATE);
   3.162 -
   3.163 -	if (unlikely(delta < 0)) {
   3.164 -		printk("Timer ISR/%d: Time went backwards: %lld %lld %lld %lld\n",
   3.165 -		       cpu, delta, shadow_system_time,
   3.166 -		       ((s64)cur_timer->get_offset() * (s64)NSEC_PER_USEC), 
   3.167 -		       processed_system_time);
   3.168 -		return IRQ_HANDLED;
   3.169 -	}
   3.170 -
   3.171 -	/* Process elapsed jiffies since last call. */
   3.172 -	while (delta >= NS_PER_TICK) {
   3.173 -		delta -= NS_PER_TICK;
   3.174 -		per_cpu(processed_system_time, cpu) += NS_PER_TICK;
   3.175 -		if (regs)
   3.176 -			update_process_times(user_mode(regs));
   3.177 -#if 0
   3.178 -		if (regs)
   3.179 -			profile_tick(CPU_PROFILING, regs);
   3.180 -#endif
   3.181 -	}
   3.182 -
   3.183 -	return IRQ_HANDLED;
   3.184 -}
   3.185 -
   3.186 -static struct irqaction local_irq_timer = {
   3.187 -	local_timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "ltimer",
   3.188 -	NULL, NULL
   3.189 -};
   3.190 -
   3.191 -void local_setup_timer(void)
   3.192 -{
   3.193 -	int seq, time_irq;
   3.194 -	int cpu = smp_processor_id();
   3.195 -
   3.196 -	do {
   3.197 -	    seq = read_seqbegin(&xtime_lock);
   3.198 -	    per_cpu(processed_system_time, cpu) = shadow_system_time;
   3.199 +		seq = read_seqbegin(&xtime_lock);
   3.200 +		per_cpu(processed_system_time, cpu) = shadow_system_time;
   3.201  	} while (read_seqretry(&xtime_lock, seq));
   3.202  
   3.203 -	time_irq = bind_virq_to_irq(VIRQ_TIMER);
   3.204 -	(void)setup_irq(time_irq, &local_irq_timer);
   3.205 +	per_cpu(timer_irq, cpu) = bind_virq_to_irq(VIRQ_TIMER);
   3.206 +	sprintf(timer_name[cpu], "timer%d", cpu);
   3.207 +	BUG_ON(request_irq(per_cpu(timer_irq, cpu), timer_interrupt,
   3.208 +	                   SA_INTERRUPT, timer_name[cpu], NULL));
   3.209  }
   3.210  #endif
   3.211  
     4.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/hypervisor.c	Tue Apr 05 10:41:39 2005 +0000
     4.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/hypervisor.c	Tue Apr 05 12:07:37 2005 +0000
     4.3 @@ -108,7 +108,7 @@ void xen_tlb_flush_mask(cpumask_t mask)
     4.4  {
     4.5      struct mmuext_op op;
     4.6      op.cmd = MMUEXT_TLB_FLUSH_MULTI;
     4.7 -    op.cpuset = (unsigned long)mask.bits;
     4.8 +    op.cpuset = mask.bits;
     4.9      BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
    4.10  }
    4.11  
    4.12 @@ -124,7 +124,7 @@ void xen_invlpg_mask(cpumask_t mask, uns
    4.13  {
    4.14      struct mmuext_op op;
    4.15      op.cmd = MMUEXT_INVLPG_MULTI;
    4.16 -    op.cpuset = (unsigned long)mask.bits;
    4.17 +    op.cpuset = mask.bits;
    4.18      op.linear_addr = ptr & PAGE_MASK;
    4.19      BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
    4.20  }
     5.1 --- a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/Makefile	Tue Apr 05 10:41:39 2005 +0000
     5.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/Makefile	Tue Apr 05 12:07:37 2005 +0000
     5.3 @@ -8,11 +8,13 @@ CFLAGS	+= -Iarch/$(XENARCH)/kernel
     5.4  extra-y 	:= head.o head64.o init_task.o
     5.5  
     5.6  obj-y	:= process.o signal.o entry.o traps.o  \
     5.7 -		time.o ioport.o ldt.o setup.o \
     5.8 +		ioport.o ldt.o setup.o \
     5.9  		x8664_ksyms.o vsyscall.o \
    5.10  		setup64.o e820.o irq.o early_printk.o
    5.11  c-obj-y	:= semaphore.o i387.o sys_x86_64.o \
    5.12  		ptrace.o quirks.o syscall.o
    5.13 +
    5.14 +obj-y				+= ../../i386/time.o
    5.15  obj-y				+= ../../i386/kernel/timers/
    5.16  
    5.17  s-obj-y	:=
     6.1 --- a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/time.c	Tue Apr 05 10:41:39 2005 +0000
     6.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.3 @@ -1,840 +0,0 @@
     6.4 -/*
     6.5 - *  linux/arch/i386/kernel/time.c
     6.6 - *
     6.7 - *  Copyright (C) 1991, 1992, 1995  Linus Torvalds
     6.8 - *
     6.9 - * This file contains the PC-specific time handling details:
    6.10 - * reading the RTC at bootup, etc..
    6.11 - * 1994-07-02    Alan Modra
    6.12 - *	fixed set_rtc_mmss, fixed time.year for >= 2000, new mktime
    6.13 - * 1995-03-26    Markus Kuhn
    6.14 - *      fixed 500 ms bug at call to set_rtc_mmss, fixed DS12887
    6.15 - *      precision CMOS clock update
    6.16 - * 1996-05-03    Ingo Molnar
    6.17 - *      fixed time warps in do_[slow|fast]_gettimeoffset()
    6.18 - * 1997-09-10	Updated NTP code according to technical memorandum Jan '96
    6.19 - *		"A Kernel Model for Precision Timekeeping" by Dave Mills
    6.20 - * 1998-09-05    (Various)
    6.21 - *	More robust do_fast_gettimeoffset() algorithm implemented
    6.22 - *	(works with APM, Cyrix 6x86MX and Centaur C6),
    6.23 - *	monotonic gettimeofday() with fast_get_timeoffset(),
    6.24 - *	drift-proof precision TSC calibration on boot
    6.25 - *	(C. Scott Ananian <cananian@alumni.princeton.edu>, Andrew D.
    6.26 - *	Balsa <andrebalsa@altern.org>, Philip Gladstone <philip@raptor.com>;
    6.27 - *	ported from 2.0.35 Jumbo-9 by Michael Krause <m.krause@tu-harburg.de>).
    6.28 - * 1998-12-16    Andrea Arcangeli
    6.29 - *	Fixed Jumbo-9 code in 2.1.131: do_gettimeofday was missing 1 jiffy
    6.30 - *	because was not accounting lost_ticks.
    6.31 - * 1998-12-24 Copyright (C) 1998  Andrea Arcangeli
    6.32 - *	Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
    6.33 - *	serialize accesses to xtime/lost_ticks).
    6.34 - */
    6.35 -
    6.36 -#include <linux/errno.h>
    6.37 -#include <linux/sched.h>
    6.38 -#include <linux/kernel.h>
    6.39 -#include <linux/param.h>
    6.40 -#include <linux/string.h>
    6.41 -#include <linux/mm.h>
    6.42 -#include <linux/interrupt.h>
    6.43 -#include <linux/time.h>
    6.44 -#include <linux/delay.h>
    6.45 -#include <linux/init.h>
    6.46 -#include <linux/smp.h>
    6.47 -#include <linux/module.h>
    6.48 -#include <linux/sysdev.h>
    6.49 -#include <linux/bcd.h>
    6.50 -#include <linux/efi.h>
    6.51 -#include <linux/mca.h>
    6.52 -#include <linux/sysctl.h>
    6.53 -#include <linux/percpu.h>
    6.54 -
    6.55 -#include <asm/io.h>
    6.56 -#include <asm/smp.h>
    6.57 -#include <asm/irq.h>
    6.58 -#include <asm/msr.h>
    6.59 -#include <asm/delay.h>
    6.60 -#include <asm/mpspec.h>
    6.61 -#include <asm/uaccess.h>
    6.62 -#include <asm/processor.h>
    6.63 -#include <asm/timer.h>
    6.64 -
    6.65 -#include "mach_time.h"
    6.66 -
    6.67 -#include <linux/timex.h>
    6.68 -#include <linux/config.h>
    6.69 -
    6.70 -#include <asm/hpet.h>
    6.71 -
    6.72 -#include <asm/arch_hooks.h>
    6.73 -
    6.74 -#include "io_ports.h"
    6.75 -
    6.76 -extern spinlock_t i8259A_lock;
    6.77 -int pit_latch_buggy;              /* extern */
    6.78 -
    6.79 -u64 jiffies_64 = INITIAL_JIFFIES;
    6.80 -
    6.81 -EXPORT_SYMBOL(jiffies_64);
    6.82 -
    6.83 -#if defined(__x86_64__)
    6.84 -unsigned long vxtime_hz = PIT_TICK_RATE;
    6.85 -
    6.86 -struct vxtime_data __vxtime __section_vxtime;   /* for vsyscalls */
    6.87 -
    6.88 -volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
    6.89 -unsigned long __wall_jiffies __section_wall_jiffies = INITIAL_JIFFIES;
    6.90 -struct timespec __xtime __section_xtime;
    6.91 -struct timezone __sys_tz __section_sys_tz;
    6.92 -
    6.93 -static inline void rdtscll_sync(unsigned long *tsc)
    6.94 -{
    6.95 -#ifdef CONFIG_SMP
    6.96 -        sync_core();
    6.97 -#endif
    6.98 -        rdtscll(*tsc);
    6.99 -}
   6.100 -#endif
   6.101 -
   6.102 -u32 cpu_khz;	/* Detected as we calibrate the TSC */
   6.103 -
   6.104 -extern unsigned long wall_jiffies;
   6.105 -
   6.106 -spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED;
   6.107 -
   6.108 -DEFINE_SPINLOCK(i8253_lock);
   6.109 -EXPORT_SYMBOL(i8253_lock);
   6.110 -
   6.111 -extern struct init_timer_opts timer_tsc_init;
   6.112 -extern struct timer_opts timer_tsc;
   6.113 -struct timer_opts *cur_timer = &timer_tsc;
   6.114 -
   6.115 -/* These are peridically updated in shared_info, and then copied here. */
   6.116 -u32 shadow_tsc_stamp;
   6.117 -u64 shadow_system_time;
   6.118 -static u32 shadow_time_version;
   6.119 -static struct timeval shadow_tv;
   6.120 -
   6.121 -/*
   6.122 - * We use this to ensure that gettimeofday() is monotonically increasing. We
   6.123 - * only break this guarantee if the wall clock jumps backwards "a long way".
   6.124 - */
   6.125 -static struct timeval last_seen_tv = {0,0};
   6.126 -
   6.127 -#ifdef CONFIG_XEN_PRIVILEGED_GUEST
   6.128 -/* Periodically propagate synchronised time base to the RTC and to Xen. */
   6.129 -static long last_rtc_update, last_update_to_xen;
   6.130 -#endif
   6.131 -
   6.132 -/* Periodically take synchronised time base from Xen, if we need it. */
   6.133 -static long last_update_from_xen;   /* UTC seconds when last read Xen clock. */
   6.134 -
   6.135 -/* Keep track of last time we did processing/updating of jiffies and xtime. */
   6.136 -u64 processed_system_time;   /* System time (ns) at last processing. */
   6.137 -DEFINE_PER_CPU(u64, processed_system_time);
   6.138 -
   6.139 -#define NS_PER_TICK (1000000000ULL/HZ)
   6.140 -
   6.141 -#define HANDLE_USEC_UNDERFLOW(_tv) do {		\
   6.142 -	while ((_tv).tv_usec < 0) {		\
   6.143 -		(_tv).tv_usec += USEC_PER_SEC;	\
   6.144 -		(_tv).tv_sec--;			\
   6.145 -	}					\
   6.146 -} while (0)
   6.147 -#define HANDLE_USEC_OVERFLOW(_tv) do {		\
   6.148 -	while ((_tv).tv_usec >= USEC_PER_SEC) {	\
   6.149 -		(_tv).tv_usec -= USEC_PER_SEC;	\
   6.150 -		(_tv).tv_sec++;			\
   6.151 -	}					\
   6.152 -} while (0)
   6.153 -static inline void __normalize_time(time_t *sec, s64 *nsec)
   6.154 -{
   6.155 -	while (*nsec >= NSEC_PER_SEC) {
   6.156 -		(*nsec) -= NSEC_PER_SEC;
   6.157 -		(*sec)++;
   6.158 -	}
   6.159 -	while (*nsec < 0) {
   6.160 -		(*nsec) += NSEC_PER_SEC;
   6.161 -		(*sec)--;
   6.162 -	}
   6.163 -}
   6.164 -
   6.165 -/* Does this guest OS track Xen time, or set its wall clock independently? */
   6.166 -static int independent_wallclock = 0;
   6.167 -static int __init __independent_wallclock(char *str)
   6.168 -{
   6.169 -	independent_wallclock = 1;
   6.170 -	return 1;
   6.171 -}
   6.172 -__setup("independent_wallclock", __independent_wallclock);
   6.173 -#define INDEPENDENT_WALLCLOCK() \
   6.174 -    (independent_wallclock || (xen_start_info.flags & SIF_INITDOMAIN))
   6.175 -
   6.176 -/*
   6.177 - * Reads a consistent set of time-base values from Xen, into a shadow data
   6.178 - * area. Must be called with the xtime_lock held for writing.
   6.179 - */
   6.180 -static void __get_time_values_from_xen(void)
   6.181 -{
   6.182 -	shared_info_t *s = HYPERVISOR_shared_info;
   6.183 -
   6.184 -	do {
   6.185 -		shadow_time_version = s->time_version2;
   6.186 -		rmb();
   6.187 -		shadow_tv.tv_sec    = s->wc_sec;
   6.188 -		shadow_tv.tv_usec   = s->wc_usec;
   6.189 -		shadow_tsc_stamp    = (u32)s->tsc_timestamp;
   6.190 -		shadow_system_time  = s->system_time;
   6.191 -		rmb();
   6.192 -	}
   6.193 -	while (shadow_time_version != s->time_version1);
   6.194 -
   6.195 -	cur_timer->mark_offset();
   6.196 -}
   6.197 -
   6.198 -#define TIME_VALUES_UP_TO_DATE \
   6.199 - ({ rmb(); (shadow_time_version == HYPERVISOR_shared_info->time_version2); })
   6.200 -
   6.201 -/*
   6.202 - * This version of gettimeofday has microsecond resolution
   6.203 - * and better than microsecond precision on fast x86 machines with TSC.
   6.204 - */
   6.205 -void do_gettimeofday(struct timeval *tv)
   6.206 -{
   6.207 -	unsigned long seq;
   6.208 -	unsigned long usec, sec;
   6.209 -	unsigned long max_ntp_tick;
   6.210 -	unsigned long flags;
   6.211 -	s64 nsec;
   6.212 -
   6.213 -	do {
   6.214 -		unsigned long lost;
   6.215 -
   6.216 -		seq = read_seqbegin(&xtime_lock);
   6.217 -
   6.218 -		usec = cur_timer->get_offset();
   6.219 -		lost = jiffies - wall_jiffies;
   6.220 -
   6.221 -		/*
   6.222 -		 * If time_adjust is negative then NTP is slowing the clock
   6.223 -		 * so make sure not to go into next possible interval.
   6.224 -		 * Better to lose some accuracy than have time go backwards..
   6.225 -		 */
   6.226 -		if (unlikely(time_adjust < 0)) {
   6.227 -			max_ntp_tick = (USEC_PER_SEC / HZ) - tickadj;
   6.228 -			usec = min(usec, max_ntp_tick);
   6.229 -
   6.230 -			if (lost)
   6.231 -				usec += lost * max_ntp_tick;
   6.232 -		}
   6.233 -		else if (unlikely(lost))
   6.234 -			usec += lost * (USEC_PER_SEC / HZ);
   6.235 -
   6.236 -		sec = xtime.tv_sec;
   6.237 -		usec += (xtime.tv_nsec / NSEC_PER_USEC);
   6.238 -
   6.239 -		nsec = shadow_system_time - processed_system_time;
   6.240 -		__normalize_time(&sec, &nsec);
   6.241 -		usec += (long)nsec / NSEC_PER_USEC;
   6.242 -
   6.243 -		if (unlikely(!TIME_VALUES_UP_TO_DATE)) {
   6.244 -			/*
   6.245 -			 * We may have blocked for a long time,
   6.246 -			 * rendering our calculations invalid
   6.247 -			 * (e.g. the time delta may have
   6.248 -			 * overflowed). Detect that and recalculate
   6.249 -			 * with fresh values.
   6.250 -			 */
   6.251 -			write_seqlock_irqsave(&xtime_lock, flags);
   6.252 -			__get_time_values_from_xen();
   6.253 -			write_sequnlock_irqrestore(&xtime_lock, flags);
   6.254 -			continue;
   6.255 -		}
   6.256 -	} while (read_seqretry(&xtime_lock, seq));
   6.257 -
   6.258 -	while (usec >= USEC_PER_SEC) {
   6.259 -		usec -= USEC_PER_SEC;
   6.260 -		sec++;
   6.261 -	}
   6.262 -
   6.263 -	/* Ensure that time-of-day is monotonically increasing. */
   6.264 -	if ((sec < last_seen_tv.tv_sec) ||
   6.265 -	    ((sec == last_seen_tv.tv_sec) && (usec < last_seen_tv.tv_usec))) {
   6.266 -		sec = last_seen_tv.tv_sec;
   6.267 -		usec = last_seen_tv.tv_usec;
   6.268 -	} else {
   6.269 -		last_seen_tv.tv_sec = sec;
   6.270 -		last_seen_tv.tv_usec = usec;
   6.271 -	}
   6.272 -
   6.273 -	tv->tv_sec = sec;
   6.274 -	tv->tv_usec = usec;
   6.275 -}
   6.276 -
   6.277 -EXPORT_SYMBOL(do_gettimeofday);
   6.278 -
   6.279 -int do_settimeofday(struct timespec *tv)
   6.280 -{
   6.281 -	time_t wtm_sec, sec = tv->tv_sec;
   6.282 -	long wtm_nsec;
   6.283 -	s64 nsec;
   6.284 -	struct timespec xentime;
   6.285 -
   6.286 -	if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
   6.287 -		return -EINVAL;
   6.288 -
   6.289 -	if (!INDEPENDENT_WALLCLOCK())
   6.290 -		return 0; /* Silent failure? */
   6.291 -
   6.292 -	write_seqlock_irq(&xtime_lock);
   6.293 -
   6.294 -	/*
   6.295 -	 * Ensure we don't get blocked for a long time so that our time delta
   6.296 -	 * overflows. If that were to happen then our shadow time values would
   6.297 -	 * be stale, so we can retry with fresh ones.
   6.298 -	 */
   6.299 - again:
   6.300 -	nsec = (s64)tv->tv_nsec -
   6.301 -	    ((s64)cur_timer->get_offset() * (s64)NSEC_PER_USEC);
   6.302 -	if (unlikely(!TIME_VALUES_UP_TO_DATE)) {
   6.303 -		__get_time_values_from_xen();
   6.304 -		goto again;
   6.305 -	}
   6.306 -
   6.307 -	__normalize_time(&sec, &nsec);
   6.308 -	set_normalized_timespec(&xentime, sec, nsec);
   6.309 -
   6.310 -	/*
   6.311 -	 * This is revolting. We need to set "xtime" correctly. However, the
   6.312 -	 * value in this location is the value at the most recent update of
   6.313 -	 * wall time.  Discover what correction gettimeofday() would have
   6.314 -	 * made, and then undo it!
   6.315 -	 */
   6.316 -	nsec -= (jiffies - wall_jiffies) * TICK_NSEC;
   6.317 -
   6.318 -	nsec -= (shadow_system_time - processed_system_time);
   6.319 -
   6.320 -	__normalize_time(&sec, &nsec);
   6.321 -	wtm_sec  = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
   6.322 -	wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
   6.323 -
   6.324 -	set_normalized_timespec(&xtime, sec, nsec);
   6.325 -	set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
   6.326 -
   6.327 -	time_adjust = 0;		/* stop active adjtime() */
   6.328 -	time_status |= STA_UNSYNC;
   6.329 -	time_maxerror = NTP_PHASE_LIMIT;
   6.330 -	time_esterror = NTP_PHASE_LIMIT;
   6.331 -
   6.332 -	/* Reset all our running time counts. They make no sense now. */
   6.333 -	last_seen_tv.tv_sec = 0;
   6.334 -	last_update_from_xen = 0;
   6.335 -
   6.336 -#ifdef CONFIG_XEN_PRIVILEGED_GUEST
   6.337 -	if (xen_start_info.flags & SIF_INITDOMAIN) {
   6.338 -		dom0_op_t op;
   6.339 -		last_rtc_update = last_update_to_xen = 0;
   6.340 -		op.cmd = DOM0_SETTIME;
   6.341 -		op.u.settime.secs        = xentime.tv_sec;
   6.342 -		op.u.settime.usecs       = xentime.tv_nsec / NSEC_PER_USEC;
   6.343 -		op.u.settime.system_time = shadow_system_time;
   6.344 -		write_sequnlock_irq(&xtime_lock);
   6.345 -		HYPERVISOR_dom0_op(&op);
   6.346 -	} else
   6.347 -#endif
   6.348 -		write_sequnlock_irq(&xtime_lock);
   6.349 -
   6.350 -	clock_was_set();
   6.351 -	return 0;
   6.352 -}
   6.353 -
   6.354 -EXPORT_SYMBOL(do_settimeofday);
   6.355 -
   6.356 -#ifdef CONFIG_XEN_PRIVILEGED_GUEST
   6.357 -static int set_rtc_mmss(unsigned long nowtime)
   6.358 -{
   6.359 -	int retval;
   6.360 -
   6.361 -	/* gets recalled with irq locally disabled */
   6.362 -	spin_lock(&rtc_lock);
   6.363 -	if (efi_enabled)
   6.364 -		retval = efi_set_rtc_mmss(nowtime);
   6.365 -	else
   6.366 -		retval = mach_set_rtc_mmss(nowtime);
   6.367 -	spin_unlock(&rtc_lock);
   6.368 -
   6.369 -	return retval;
   6.370 -}
   6.371 -#endif
   6.372 -
   6.373 -/* monotonic_clock(): returns # of nanoseconds passed since time_init()
   6.374 - *		Note: This function is required to return accurate
   6.375 - *		time even in the absence of multiple timer ticks.
   6.376 - */
   6.377 -unsigned long long monotonic_clock(void)
   6.378 -{
   6.379 -	return cur_timer->monotonic_clock();
   6.380 -}
   6.381 -EXPORT_SYMBOL(monotonic_clock);
   6.382 -
   6.383 -#if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER)
   6.384 -unsigned long profile_pc(struct pt_regs *regs)
   6.385 -{
   6.386 -	unsigned long pc = instruction_pointer(regs);
   6.387 -
   6.388 -	if (in_lock_functions(pc))
   6.389 -		return *(unsigned long *)(regs->ebp + 4);
   6.390 -
   6.391 -	return pc;
   6.392 -}
   6.393 -EXPORT_SYMBOL(profile_pc);
   6.394 -#endif
   6.395 -
   6.396 -/*
   6.397 - * timer_interrupt() needs to keep up the real-time clock,
   6.398 - * as well as call the "do_timer()" routine every clocktick
   6.399 - */
   6.400 -static inline void do_timer_interrupt(int irq, void *dev_id,
   6.401 -					struct pt_regs *regs)
   6.402 -{
   6.403 -	time_t wtm_sec, sec;
   6.404 -	s64 delta, nsec;
   6.405 -	long sec_diff, wtm_nsec;
   6.406 -
   6.407 -	do {
   6.408 -		__get_time_values_from_xen();
   6.409 -
   6.410 -		delta = (s64)(shadow_system_time +
   6.411 -			      ((s64)cur_timer->get_offset() * 
   6.412 -			       (s64)NSEC_PER_USEC) -
   6.413 -			      processed_system_time);
   6.414 -	}
   6.415 -	while (!TIME_VALUES_UP_TO_DATE);
   6.416 -
   6.417 -	if (unlikely(delta < 0)) {
   6.418 -		printk("Timer ISR: Time went backwards: %lld %lld %lld %lld\n",
   6.419 -		       delta, shadow_system_time,
   6.420 -		       ((s64)cur_timer->get_offset() * (s64)NSEC_PER_USEC), 
   6.421 -		       processed_system_time);
   6.422 -		return;
   6.423 -	}
   6.424 -
   6.425 -	/* Process elapsed jiffies since last call. */
   6.426 -	while (delta >= NS_PER_TICK) {
   6.427 -		delta -= NS_PER_TICK;
   6.428 -		processed_system_time += NS_PER_TICK;
   6.429 -		do_timer(regs);
   6.430 -		update_process_times(user_mode(regs));
   6.431 -		if (regs)
   6.432 -			profile_tick(CPU_PROFILING, regs);
   6.433 -	}
   6.434 -
   6.435 -	/*
   6.436 -	 * Take synchronised time from Xen once a minute if we're not
   6.437 -	 * synchronised ourselves, and we haven't chosen to keep an independent
   6.438 -	 * time base.
   6.439 -	 */
   6.440 -	if (!INDEPENDENT_WALLCLOCK() &&
   6.441 -	    ((time_status & STA_UNSYNC) != 0) &&
   6.442 -	    (xtime.tv_sec > (last_update_from_xen + 60))) {
   6.443 -		/* Adjust shadow for jiffies that haven't updated xtime yet. */
   6.444 -		shadow_tv.tv_usec -= 
   6.445 -			(jiffies - wall_jiffies) * (USEC_PER_SEC / HZ);
   6.446 -		HANDLE_USEC_UNDERFLOW(shadow_tv);
   6.447 -
   6.448 -		/*
   6.449 -		 * Reset our running time counts if they are invalidated by
   6.450 -		 * a warp backwards of more than 500ms.
   6.451 -		 */
   6.452 -		sec_diff = xtime.tv_sec - shadow_tv.tv_sec;
   6.453 -		if (unlikely(abs(sec_diff) > 1) ||
   6.454 -		    unlikely(((sec_diff * USEC_PER_SEC) +
   6.455 -			      (xtime.tv_nsec / NSEC_PER_USEC) -
   6.456 -			      shadow_tv.tv_usec) > 500000)) {
   6.457 -#ifdef CONFIG_XEN_PRIVILEGED_GUEST
   6.458 -			last_rtc_update = last_update_to_xen = 0;
   6.459 -#endif
   6.460 -			last_seen_tv.tv_sec = 0;
   6.461 -		}
   6.462 -
   6.463 -		/* Update our unsynchronised xtime appropriately. */
   6.464 -		sec = shadow_tv.tv_sec;
   6.465 -		nsec = shadow_tv.tv_usec * NSEC_PER_USEC;
   6.466 -
   6.467 -		__normalize_time(&sec, &nsec);
   6.468 -		wtm_sec  = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
   6.469 -		wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
   6.470 -
   6.471 -		set_normalized_timespec(&xtime, sec, nsec);
   6.472 -		set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
   6.473 -
   6.474 -		last_update_from_xen = sec;
   6.475 -	}
   6.476 -
   6.477 -#ifdef CONFIG_XEN_PRIVILEGED_GUEST
   6.478 -	if (!(xen_start_info.flags & SIF_INITDOMAIN))
   6.479 -		return;
   6.480 -
   6.481 -	/* Send synchronised time to Xen approximately every minute. */
   6.482 -	if (((time_status & STA_UNSYNC) == 0) &&
   6.483 -	    (xtime.tv_sec > (last_update_to_xen + 60))) {
   6.484 -		dom0_op_t op;
   6.485 -		struct timeval tv;
   6.486 -
   6.487 -		tv.tv_sec   = xtime.tv_sec;
   6.488 -		tv.tv_usec  = xtime.tv_nsec / NSEC_PER_USEC;
   6.489 -		tv.tv_usec += (jiffies - wall_jiffies) * (USEC_PER_SEC/HZ);
   6.490 -		HANDLE_USEC_OVERFLOW(tv);
   6.491 -
   6.492 -		op.cmd = DOM0_SETTIME;
   6.493 -		op.u.settime.secs        = tv.tv_sec;
   6.494 -		op.u.settime.usecs       = tv.tv_usec;
   6.495 -		op.u.settime.system_time = shadow_system_time;
   6.496 -		HYPERVISOR_dom0_op(&op);
   6.497 -
   6.498 -		last_update_to_xen = xtime.tv_sec;
   6.499 -	}
   6.500 -
   6.501 -	/*
   6.502 -	 * If we have an externally synchronized Linux clock, then update
   6.503 -	 * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
   6.504 -	 * called as close as possible to 500 ms before the new second starts.
   6.505 -	 */
   6.506 -	if ((time_status & STA_UNSYNC) == 0 &&
   6.507 -	    xtime.tv_sec > last_rtc_update + 660 &&
   6.508 -	    (xtime.tv_nsec / 1000)
   6.509 -			>= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 &&
   6.510 -	    (xtime.tv_nsec / 1000)
   6.511 -			<= USEC_BEFORE + ((unsigned) TICK_SIZE) / 2) {
   6.512 -		/* horrible...FIXME */
   6.513 -		if (efi_enabled) {
   6.514 -	 		if (efi_set_rtc_mmss(xtime.tv_sec) == 0)
   6.515 -				last_rtc_update = xtime.tv_sec;
   6.516 -			else
   6.517 -				last_rtc_update = xtime.tv_sec - 600;
   6.518 -		} else if (set_rtc_mmss(xtime.tv_sec) == 0)
   6.519 -			last_rtc_update = xtime.tv_sec;
   6.520 -		else
   6.521 -			last_rtc_update = xtime.tv_sec - 600; /* do it again in 60 s */
   6.522 -	}
   6.523 -#endif
   6.524 -}
   6.525 -
   6.526 -/*
   6.527 - * This is the same as the above, except we _also_ save the current
   6.528 - * Time Stamp Counter value at the time of the timer interrupt, so that
   6.529 - * we later on can estimate the time of day more exactly.
   6.530 - */
   6.531 -irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
   6.532 -{
   6.533 -	/*
   6.534 -	 * Here we are in the timer irq handler. We just have irqs locally
   6.535 -	 * disabled but we don't know if the timer_bh is running on the other
   6.536 -	 * CPU. We need to avoid to SMP race with it. NOTE: we don' t need
   6.537 -	 * the irq version of write_lock because as just said we have irq
   6.538 -	 * locally disabled. -arca
   6.539 -	 */
   6.540 -	write_seqlock(&xtime_lock);
   6.541 -	do_timer_interrupt(irq, NULL, regs);
   6.542 -	write_sequnlock(&xtime_lock);
   6.543 -	return IRQ_HANDLED;
   6.544 -}
   6.545 -
   6.546 -/* not static: needed by APM */
   6.547 -unsigned long get_cmos_time(void)
   6.548 -{
   6.549 -	unsigned long retval;
   6.550 -
   6.551 -	spin_lock(&rtc_lock);
   6.552 -
   6.553 -	if (efi_enabled)
   6.554 -		retval = efi_get_time();
   6.555 -	else
   6.556 -		retval = mach_get_cmos_time();
   6.557 -
   6.558 -	spin_unlock(&rtc_lock);
   6.559 -
   6.560 -	return retval;
   6.561 -}
   6.562 -
   6.563 -static long clock_cmos_diff, sleep_start;
   6.564 -
   6.565 -static int timer_suspend(struct sys_device *dev, u32 state)
   6.566 -{
   6.567 -	/*
   6.568 -	 * Estimate time zone so that set_time can update the clock
   6.569 -	 */
   6.570 -	clock_cmos_diff = -get_cmos_time();
   6.571 -	clock_cmos_diff += get_seconds();
   6.572 -	sleep_start = get_cmos_time();
   6.573 -	return 0;
   6.574 -}
   6.575 -
   6.576 -static int timer_resume(struct sys_device *dev)
   6.577 -{
   6.578 -	unsigned long flags;
   6.579 -	unsigned long sec;
   6.580 -	unsigned long sleep_length;
   6.581 -
   6.582 -#ifdef CONFIG_HPET_TIMER
   6.583 -	if (is_hpet_enabled())
   6.584 -		hpet_reenable();
   6.585 -#endif
   6.586 -	sec = get_cmos_time() + clock_cmos_diff;
   6.587 -	sleep_length = (get_cmos_time() - sleep_start) * HZ;
   6.588 -	write_seqlock_irqsave(&xtime_lock, flags);
   6.589 -	xtime.tv_sec = sec;
   6.590 -	xtime.tv_nsec = 0;
   6.591 -	write_sequnlock_irqrestore(&xtime_lock, flags);
   6.592 -	jiffies += sleep_length;
   6.593 -	wall_jiffies += sleep_length;
   6.594 -	return 0;
   6.595 -}
   6.596 -
   6.597 -static struct sysdev_class timer_sysclass = {
   6.598 -	.resume = timer_resume,
   6.599 -	.suspend = timer_suspend,
   6.600 -	set_kset_name("timer"),
   6.601 -};
   6.602 -
   6.603 -
   6.604 -/* XXX this driverfs stuff should probably go elsewhere later -john */
   6.605 -static struct sys_device device_timer = {
   6.606 -	.id	= 0,
   6.607 -	.cls	= &timer_sysclass,
   6.608 -};
   6.609 -
   6.610 -static int time_init_device(void)
   6.611 -{
   6.612 -	int error = sysdev_class_register(&timer_sysclass);
   6.613 -	if (!error)
   6.614 -		error = sysdev_register(&device_timer);
   6.615 -	return error;
   6.616 -}
   6.617 -
   6.618 -device_initcall(time_init_device);
   6.619 -
   6.620 -#ifdef CONFIG_HPET_TIMER
   6.621 -extern void (*late_time_init)(void);
   6.622 -/* Duplicate of time_init() below, with hpet_enable part added */
   6.623 -void __init hpet_time_init(void)
   6.624 -{
   6.625 -	xtime.tv_sec = get_cmos_time();
   6.626 -	xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
   6.627 -	set_normalized_timespec(&wall_to_monotonic,
   6.628 -		-xtime.tv_sec, -xtime.tv_nsec);
   6.629 -
   6.630 -	if (hpet_enable() >= 0) {
   6.631 -		printk("Using HPET for base-timer\n");
   6.632 -	}
   6.633 -
   6.634 -	cur_timer = select_timer();
   6.635 -	printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name);
   6.636 -
   6.637 -	time_init_hook();
   6.638 -}
   6.639 -#endif
   6.640 -
   6.641 -/* Dynamically-mapped IRQ. */
   6.642 -static int TIMER_IRQ;
   6.643 -
   6.644 -static struct irqaction irq_timer = {
   6.645 -	timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "timer",
   6.646 -	NULL, NULL
   6.647 -};
   6.648 -
   6.649 -void __init time_init(void)
   6.650 -{
   6.651 -#ifdef CONFIG_HPET_TIMER
   6.652 -	if (is_hpet_capable()) {
   6.653 -		/*
   6.654 -		 * HPET initialization needs to do memory-mapped io. So, let
   6.655 -		 * us do a late initialization after mem_init().
   6.656 -		 */
   6.657 -		late_time_init = hpet_time_init;
   6.658 -		return;
   6.659 -	}
   6.660 -#endif
   6.661 -	__get_time_values_from_xen();
   6.662 -	xtime.tv_sec = shadow_tv.tv_sec;
   6.663 -	xtime.tv_nsec = shadow_tv.tv_usec * NSEC_PER_USEC;
   6.664 -	set_normalized_timespec(&wall_to_monotonic,
   6.665 -		-xtime.tv_sec, -xtime.tv_nsec);
   6.666 -	processed_system_time = shadow_system_time;
   6.667 -
   6.668 -	if (timer_tsc_init.init(NULL) != 0)
   6.669 -		BUG();
   6.670 -	printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name);
   6.671 -
   6.672 -#if defined(__x86_64__)
   6.673 -        vxtime.mode = VXTIME_TSC;
   6.674 -	vxtime.quot = (1000000L << 32) / vxtime_hz;
   6.675 -	vxtime.tsc_quot = (1000L << 32) / cpu_khz;
   6.676 -	vxtime.hz = vxtime_hz;
   6.677 -	rdtscll_sync(&vxtime.last_tsc);
   6.678 -#endif
   6.679 -
   6.680 -	TIMER_IRQ = bind_virq_to_irq(VIRQ_TIMER);
   6.681 -
   6.682 -	(void)setup_irq(TIMER_IRQ, &irq_timer);
   6.683 -}
   6.684 -
   6.685 -/* Convert jiffies to system time. Call with xtime_lock held for reading. */
   6.686 -static inline u64 __jiffies_to_st(unsigned long j) 
   6.687 -{
   6.688 -	return processed_system_time + ((j - jiffies) * NS_PER_TICK);
   6.689 -}
   6.690 -
   6.691 -/*
   6.692 - * This function works out when the the next timer function has to be
   6.693 - * executed (by looking at the timer list) and sets the Xen one-shot
   6.694 - * domain timer to the appropriate value. This is typically called in
   6.695 - * cpu_idle() before the domain blocks.
   6.696 - * 
   6.697 - * The function returns a non-0 value on error conditions.
   6.698 - * 
   6.699 - * It must be called with interrupts disabled.
   6.700 - */
   6.701 -int set_timeout_timer(void)
   6.702 -{
   6.703 -	u64 alarm = 0;
   6.704 -	int ret = 0;
   6.705 -#ifdef CONFIG_SMP
   6.706 -	unsigned long seq;
   6.707 -#endif
   6.708 -
   6.709 -	/*
   6.710 -	 * This is safe against long blocking (since calculations are
   6.711 -	 * not based on TSC deltas). It is also safe against warped
   6.712 -	 * system time since suspend-resume is cooperative and we
   6.713 -	 * would first get locked out.
   6.714 -	 */
   6.715 -#ifdef CONFIG_SMP
   6.716 -	do {
   6.717 -		seq = read_seqbegin(&xtime_lock);
   6.718 -		if (smp_processor_id())
   6.719 -			alarm = __jiffies_to_st(jiffies + 1);
   6.720 -		else
   6.721 -			alarm = __jiffies_to_st(jiffies + 1);
   6.722 -	} while (read_seqretry(&xtime_lock, seq));
   6.723 -#else
   6.724 -	alarm = __jiffies_to_st(next_timer_interrupt());
   6.725 -#endif
   6.726 -
   6.727 -	/* Failure is pretty bad, but we'd best soldier on. */
   6.728 -	if ( HYPERVISOR_set_timer_op(alarm) != 0 )
   6.729 -		ret = -1;
   6.730 -
   6.731 -	return ret;
   6.732 -}
   6.733 -
   6.734 -void time_suspend(void)
   6.735 -{
   6.736 -	/* nothing */
   6.737 -}
   6.738 -
   6.739 -/* No locking required. We are only CPU running, and interrupts are off. */
   6.740 -void time_resume(void)
   6.741 -{
   6.742 -	if (timer_tsc_init.init(NULL) != 0)
   6.743 -		BUG();
   6.744 -
   6.745 -	/* Get timebases for new environment. */ 
   6.746 -	__get_time_values_from_xen();
   6.747 -
   6.748 -	/* Reset our own concept of passage of system time. */
   6.749 -	processed_system_time = shadow_system_time;
   6.750 -
   6.751 -	/* Accept a warp in UTC (wall-clock) time. */
   6.752 -	last_seen_tv.tv_sec = 0;
   6.753 -
   6.754 -	/* Make sure we resync UTC time with Xen on next timer interrupt. */
   6.755 -	last_update_from_xen = 0;
   6.756 -}
   6.757 -
   6.758 -#ifdef CONFIG_SMP
   6.759 -#define xxprint(msg) HYPERVISOR_console_io(CONSOLEIO_write, strlen(msg), msg)
   6.760 -
   6.761 -static irqreturn_t local_timer_interrupt(int irq, void *dev_id,
   6.762 -					 struct pt_regs *regs)
   6.763 -{
   6.764 -	s64 delta;
   6.765 -	int cpu = smp_processor_id();
   6.766 -
   6.767 -	do {
   6.768 -		__get_time_values_from_xen();
   6.769 -
   6.770 -		delta = (s64)(shadow_system_time +
   6.771 -			      ((s64)cur_timer->get_offset() * 
   6.772 -			       (s64)NSEC_PER_USEC) -
   6.773 -			      per_cpu(processed_system_time, cpu));
   6.774 -	}
   6.775 -	while (!TIME_VALUES_UP_TO_DATE);
   6.776 -
   6.777 -	if (unlikely(delta < 0)) {
   6.778 -		printk("Timer ISR/%d: Time went backwards: %lld %lld %lld %lld\n",
   6.779 -		       cpu, delta, shadow_system_time,
   6.780 -		       ((s64)cur_timer->get_offset() * (s64)NSEC_PER_USEC), 
   6.781 -		       processed_system_time);
   6.782 -		return IRQ_HANDLED;
   6.783 -	}
   6.784 -
   6.785 -	/* Process elapsed jiffies since last call. */
   6.786 -	while (delta >= NS_PER_TICK) {
   6.787 -		delta -= NS_PER_TICK;
   6.788 -		per_cpu(processed_system_time, cpu) += NS_PER_TICK;
   6.789 -		if (regs)
   6.790 -			update_process_times(user_mode(regs));
   6.791 -#if 0
   6.792 -		if (regs)
   6.793 -			profile_tick(CPU_PROFILING, regs);
   6.794 -#endif
   6.795 -	}
   6.796 -
   6.797 -	if (smp_processor_id() == 0) {
   6.798 -	    xxprint("bug bug\n");
   6.799 -	    BUG();
   6.800 -	}
   6.801 -
   6.802 -	return IRQ_HANDLED;
   6.803 -}
   6.804 -
   6.805 -static struct irqaction local_irq_timer = {
   6.806 -	local_timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "ltimer",
   6.807 -	NULL, NULL
   6.808 -};
   6.809 -
   6.810 -void local_setup_timer(void)
   6.811 -{
   6.812 -	int seq, time_irq;
   6.813 -	int cpu = smp_processor_id();
   6.814 -
   6.815 -	do {
   6.816 -	    seq = read_seqbegin(&xtime_lock);
   6.817 -	    per_cpu(processed_system_time, cpu) = shadow_system_time;
   6.818 -	} while (read_seqretry(&xtime_lock, seq));
   6.819 -
   6.820 -	time_irq = bind_virq_to_irq(VIRQ_TIMER);
   6.821 -	(void)setup_irq(time_irq, &local_irq_timer);
   6.822 -}
   6.823 -#endif
   6.824 -
   6.825 -/*
   6.826 - * /proc/sys/xen: This really belongs in another file. It can stay here for
   6.827 - * now however.
   6.828 - */
   6.829 -static ctl_table xen_subtable[] = {
   6.830 -	{1, "independent_wallclock", &independent_wallclock,
   6.831 -	 sizeof(independent_wallclock), 0644, NULL, proc_dointvec},
   6.832 -	{0}
   6.833 -};
   6.834 -static ctl_table xen_table[] = {
   6.835 -	{123, "xen", NULL, 0, 0555, xen_subtable},
   6.836 -	{0}
   6.837 -};
   6.838 -static int __init xen_sysctl_init(void)
   6.839 -{
   6.840 -	(void)register_sysctl_table(xen_table, 0);
   6.841 -	return 0;
   6.842 -}
   6.843 -__initcall(xen_sysctl_init);