ia64/xen-unstable

view linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c @ 6002:565cec5b9cc2

merge?
author cl349@firebug.cl.cam.ac.uk
date Tue Aug 02 23:13:50 2005 +0000 (2005-08-02)
parents 1032271a0abf 0db6a59abb95
children d4fd332df775 04dfb5158f3a f294acb25858
line source
1 /*
2 * linux/arch/i386/kernel/time.c
3 *
4 * Copyright (C) 1991, 1992, 1995 Linus Torvalds
5 *
6 * This file contains the PC-specific time handling details:
7 * reading the RTC at bootup, etc..
8 * 1994-07-02 Alan Modra
9 * fixed set_rtc_mmss, fixed time.year for >= 2000, new mktime
10 * 1995-03-26 Markus Kuhn
11 * fixed 500 ms bug at call to set_rtc_mmss, fixed DS12887
12 * precision CMOS clock update
13 * 1996-05-03 Ingo Molnar
14 * fixed time warps in do_[slow|fast]_gettimeoffset()
15 * 1997-09-10 Updated NTP code according to technical memorandum Jan '96
16 * "A Kernel Model for Precision Timekeeping" by Dave Mills
17 * 1998-09-05 (Various)
18 * More robust do_fast_gettimeoffset() algorithm implemented
19 * (works with APM, Cyrix 6x86MX and Centaur C6),
20 * monotonic gettimeofday() with fast_get_timeoffset(),
21 * drift-proof precision TSC calibration on boot
22 * (C. Scott Ananian <cananian@alumni.princeton.edu>, Andrew D.
23 * Balsa <andrebalsa@altern.org>, Philip Gladstone <philip@raptor.com>;
24 * ported from 2.0.35 Jumbo-9 by Michael Krause <m.krause@tu-harburg.de>).
25 * 1998-12-16 Andrea Arcangeli
26 * Fixed Jumbo-9 code in 2.1.131: do_gettimeofday was missing 1 jiffy
27 * because was not accounting lost_ticks.
28 * 1998-12-24 Copyright (C) 1998 Andrea Arcangeli
29 * Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
30 * serialize accesses to xtime/lost_ticks).
31 */
33 #include <linux/errno.h>
34 #include <linux/sched.h>
35 #include <linux/kernel.h>
36 #include <linux/param.h>
37 #include <linux/string.h>
38 #include <linux/mm.h>
39 #include <linux/interrupt.h>
40 #include <linux/time.h>
41 #include <linux/delay.h>
42 #include <linux/init.h>
43 #include <linux/smp.h>
44 #include <linux/module.h>
45 #include <linux/sysdev.h>
46 #include <linux/bcd.h>
47 #include <linux/efi.h>
48 #include <linux/mca.h>
49 #include <linux/sysctl.h>
50 #include <linux/percpu.h>
52 #include <asm/io.h>
53 #include <asm/smp.h>
54 #include <asm/irq.h>
55 #include <asm/msr.h>
56 #include <asm/delay.h>
57 #include <asm/mpspec.h>
58 #include <asm/uaccess.h>
59 #include <asm/processor.h>
60 #include <asm/timer.h>
62 #include "mach_time.h"
64 #include <linux/timex.h>
65 #include <linux/config.h>
67 #include <asm/hpet.h>
69 #include <asm/arch_hooks.h>
71 #include "io_ports.h"
73 extern spinlock_t i8259A_lock;
74 int pit_latch_buggy; /* extern */
76 u64 jiffies_64 = INITIAL_JIFFIES;
78 EXPORT_SYMBOL(jiffies_64);
80 #if defined(__x86_64__)
81 unsigned long vxtime_hz = PIT_TICK_RATE;
82 struct vxtime_data __vxtime __section_vxtime; /* for vsyscalls */
83 volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
84 unsigned long __wall_jiffies __section_wall_jiffies = INITIAL_JIFFIES;
85 struct timespec __xtime __section_xtime;
86 struct timezone __sys_tz __section_sys_tz;
87 #endif
89 #if defined(__x86_64__)
90 unsigned int cpu_khz; /* Detected as we calibrate the TSC */
91 #else
92 unsigned long cpu_khz; /* Detected as we calibrate the TSC */
93 #endif
95 extern unsigned long wall_jiffies;
97 DEFINE_SPINLOCK(rtc_lock);
99 DEFINE_SPINLOCK(i8253_lock);
100 EXPORT_SYMBOL(i8253_lock);
102 extern struct init_timer_opts timer_tsc_init;
103 extern struct timer_opts timer_tsc;
104 struct timer_opts *cur_timer = &timer_tsc;
106 /* These are peridically updated in shared_info, and then copied here. */
107 struct shadow_time_info {
108 u64 tsc_timestamp; /* TSC at last update of time vals. */
109 u64 system_timestamp; /* Time, in nanosecs, since boot. */
110 u32 tsc_to_nsec_mul;
111 u32 tsc_to_usec_mul;
112 int tsc_shift;
113 u32 version;
114 };
115 static DEFINE_PER_CPU(struct shadow_time_info, shadow_time);
116 static struct timeval shadow_tv;
118 /* Keep track of last time we did processing/updating of jiffies and xtime. */
119 static u64 processed_system_time; /* System time (ns) at last processing. */
120 static DEFINE_PER_CPU(u64, processed_system_time);
122 #define NS_PER_TICK (1000000000ULL/HZ)
124 #define HANDLE_USEC_UNDERFLOW(_tv) do { \
125 while ((_tv).tv_usec < 0) { \
126 (_tv).tv_usec += USEC_PER_SEC; \
127 (_tv).tv_sec--; \
128 } \
129 } while (0)
130 #define HANDLE_USEC_OVERFLOW(_tv) do { \
131 while ((_tv).tv_usec >= USEC_PER_SEC) { \
132 (_tv).tv_usec -= USEC_PER_SEC; \
133 (_tv).tv_sec++; \
134 } \
135 } while (0)
136 static inline void __normalize_time(time_t *sec, s64 *nsec)
137 {
138 while (*nsec >= NSEC_PER_SEC) {
139 (*nsec) -= NSEC_PER_SEC;
140 (*sec)++;
141 }
142 while (*nsec < 0) {
143 (*nsec) += NSEC_PER_SEC;
144 (*sec)--;
145 }
146 }
148 /* Does this guest OS track Xen time, or set its wall clock independently? */
149 static int independent_wallclock = 0;
150 static int __init __independent_wallclock(char *str)
151 {
152 independent_wallclock = 1;
153 return 1;
154 }
155 __setup("independent_wallclock", __independent_wallclock);
156 #define INDEPENDENT_WALLCLOCK() \
157 (independent_wallclock || (xen_start_info.flags & SIF_INITDOMAIN))
159 int tsc_disable __initdata = 0;
161 static void delay_tsc(unsigned long loops)
162 {
163 unsigned long bclock, now;
165 rdtscl(bclock);
166 do
167 {
168 rep_nop();
169 rdtscl(now);
170 } while ((now-bclock) < loops);
171 }
173 struct timer_opts timer_tsc = {
174 .name = "tsc",
175 .delay = delay_tsc,
176 };
178 static inline u32 down_shift(u64 time, int shift)
179 {
180 if ( shift < 0 )
181 return (u32)(time >> -shift);
182 return (u32)((u32)time << shift);
183 }
185 /*
186 * 32-bit multiplication of integer multiplicand and fractional multiplier
187 * yielding 32-bit integer product.
188 */
189 static inline u32 mul_frac(u32 multiplicand, u32 multiplier)
190 {
191 u32 product_int, product_frac;
192 __asm__ (
193 "mul %3"
194 : "=a" (product_frac), "=d" (product_int)
195 : "0" (multiplicand), "r" (multiplier) );
196 return product_int;
197 }
199 void init_cpu_khz(void)
200 {
201 u64 __cpu_khz = 1000000ULL << 32;
202 struct vcpu_time_info *info = &HYPERVISOR_shared_info->vcpu_time[0];
203 do_div(__cpu_khz, info->tsc_to_system_mul);
204 cpu_khz = down_shift(__cpu_khz, -info->tsc_shift);
205 printk(KERN_INFO "Xen reported: %lu.%03lu MHz processor.\n",
206 cpu_khz / 1000, cpu_khz % 1000);
207 }
209 static u64 get_nsec_offset(struct shadow_time_info *shadow)
210 {
211 u64 now;
212 u32 delta;
213 rdtscll(now);
214 delta = down_shift(now - shadow->tsc_timestamp, shadow->tsc_shift);
215 return mul_frac(delta, shadow->tsc_to_nsec_mul);
216 }
218 static unsigned long get_usec_offset(struct shadow_time_info *shadow)
219 {
220 u64 now;
221 u32 delta;
222 rdtscll(now);
223 delta = down_shift(now - shadow->tsc_timestamp, shadow->tsc_shift);
224 return mul_frac(delta, shadow->tsc_to_usec_mul);
225 }
227 static void update_wallclock(void)
228 {
229 shared_info_t *s = HYPERVISOR_shared_info;
230 long wtm_nsec, xtime_nsec;
231 time_t wtm_sec, xtime_sec;
232 u64 tmp, usec;
234 shadow_tv.tv_sec = s->wc_sec;
235 shadow_tv.tv_usec = s->wc_usec;
237 if (INDEPENDENT_WALLCLOCK())
238 return;
240 if ((time_status & STA_UNSYNC) != 0)
241 return;
243 /* Adjust wall-clock time base based on wall_jiffies ticks. */
244 usec = processed_system_time;
245 do_div(usec, 1000);
246 usec += (u64)shadow_tv.tv_sec * 1000000ULL;
247 usec += (u64)shadow_tv.tv_usec;
248 usec -= (jiffies - wall_jiffies) * (USEC_PER_SEC / HZ);
250 /* Split wallclock base into seconds and nanoseconds. */
251 tmp = usec;
252 xtime_nsec = do_div(tmp, 1000000) * 1000ULL;
253 xtime_sec = (time_t)tmp;
255 wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - xtime_sec);
256 wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - xtime_nsec);
258 set_normalized_timespec(&xtime, xtime_sec, xtime_nsec);
259 set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
260 }
262 /*
263 * Reads a consistent set of time-base values from Xen, into a shadow data
264 * area. Must be called with the xtime_lock held for writing.
265 */
266 static void __get_time_values_from_xen(void)
267 {
268 shared_info_t *s = HYPERVISOR_shared_info;
269 struct vcpu_time_info *src;
270 struct shadow_time_info *dst;
272 src = &s->vcpu_time[smp_processor_id()];
273 dst = &per_cpu(shadow_time, smp_processor_id());
275 do {
276 dst->version = src->time_version2;
277 rmb();
278 dst->tsc_timestamp = src->tsc_timestamp;
279 dst->system_timestamp = src->system_time;
280 dst->tsc_to_nsec_mul = src->tsc_to_system_mul;
281 dst->tsc_shift = src->tsc_shift;
282 rmb();
283 }
284 while (dst->version != src->time_version1);
286 dst->tsc_to_usec_mul = dst->tsc_to_nsec_mul / 1000;
288 if ((shadow_tv.tv_sec != s->wc_sec) ||
289 (shadow_tv.tv_usec != s->wc_usec))
290 update_wallclock();
291 }
293 static inline int time_values_up_to_date(int cpu)
294 {
295 struct vcpu_time_info *src;
296 struct shadow_time_info *dst;
298 src = &HYPERVISOR_shared_info->vcpu_time[cpu];
299 dst = &per_cpu(shadow_time, cpu);
301 return (dst->version == src->time_version2);
302 }
304 /*
305 * This is a special lock that is owned by the CPU and holds the index
306 * register we are working with. It is required for NMI access to the
307 * CMOS/RTC registers. See include/asm-i386/mc146818rtc.h for details.
308 */
309 volatile unsigned long cmos_lock = 0;
310 EXPORT_SYMBOL(cmos_lock);
312 /* Routines for accessing the CMOS RAM/RTC. */
313 unsigned char rtc_cmos_read(unsigned char addr)
314 {
315 unsigned char val;
316 lock_cmos_prefix(addr);
317 outb_p(addr, RTC_PORT(0));
318 val = inb_p(RTC_PORT(1));
319 lock_cmos_suffix(addr);
320 return val;
321 }
322 EXPORT_SYMBOL(rtc_cmos_read);
324 void rtc_cmos_write(unsigned char val, unsigned char addr)
325 {
326 lock_cmos_prefix(addr);
327 outb_p(addr, RTC_PORT(0));
328 outb_p(val, RTC_PORT(1));
329 lock_cmos_suffix(addr);
330 }
331 EXPORT_SYMBOL(rtc_cmos_write);
333 /*
334 * This version of gettimeofday has microsecond resolution
335 * and better than microsecond precision on fast x86 machines with TSC.
336 */
337 void do_gettimeofday(struct timeval *tv)
338 {
339 unsigned long seq;
340 unsigned long usec, sec;
341 unsigned long max_ntp_tick;
342 unsigned long flags;
343 s64 nsec;
344 unsigned int cpu;
345 struct shadow_time_info *shadow;
347 cpu = get_cpu();
348 shadow = &per_cpu(shadow_time, cpu);
350 do {
351 unsigned long lost;
353 seq = read_seqbegin(&xtime_lock);
355 usec = get_usec_offset(shadow);
356 lost = jiffies - wall_jiffies;
358 /*
359 * If time_adjust is negative then NTP is slowing the clock
360 * so make sure not to go into next possible interval.
361 * Better to lose some accuracy than have time go backwards..
362 */
363 if (unlikely(time_adjust < 0)) {
364 max_ntp_tick = (USEC_PER_SEC / HZ) - tickadj;
365 usec = min(usec, max_ntp_tick);
367 if (lost)
368 usec += lost * max_ntp_tick;
369 }
370 else if (unlikely(lost))
371 usec += lost * (USEC_PER_SEC / HZ);
373 sec = xtime.tv_sec;
374 usec += (xtime.tv_nsec / NSEC_PER_USEC);
376 nsec = shadow->system_timestamp - processed_system_time;
377 __normalize_time(&sec, &nsec);
378 usec += (long)nsec / NSEC_PER_USEC;
380 if (unlikely(!time_values_up_to_date(cpu))) {
381 /*
382 * We may have blocked for a long time,
383 * rendering our calculations invalid
384 * (e.g. the time delta may have
385 * overflowed). Detect that and recalculate
386 * with fresh values.
387 */
388 write_seqlock_irqsave(&xtime_lock, flags);
389 __get_time_values_from_xen();
390 write_sequnlock_irqrestore(&xtime_lock, flags);
391 continue;
392 }
393 } while (read_seqretry(&xtime_lock, seq));
395 put_cpu();
397 while (usec >= USEC_PER_SEC) {
398 usec -= USEC_PER_SEC;
399 sec++;
400 }
402 tv->tv_sec = sec;
403 tv->tv_usec = usec;
404 }
406 EXPORT_SYMBOL(do_gettimeofday);
408 int do_settimeofday(struct timespec *tv)
409 {
410 time_t wtm_sec, sec = tv->tv_sec;
411 long wtm_nsec;
412 s64 nsec;
413 struct timespec xentime;
414 unsigned int cpu;
415 struct shadow_time_info *shadow;
417 if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
418 return -EINVAL;
420 if (!INDEPENDENT_WALLCLOCK())
421 return 0; /* Silent failure? */
423 cpu = get_cpu();
424 shadow = &per_cpu(shadow_time, cpu);
426 write_seqlock_irq(&xtime_lock);
428 /*
429 * Ensure we don't get blocked for a long time so that our time delta
430 * overflows. If that were to happen then our shadow time values would
431 * be stale, so we can retry with fresh ones.
432 */
433 again:
434 nsec = (s64)tv->tv_nsec - (s64)get_nsec_offset(shadow);
435 if (unlikely(!time_values_up_to_date(cpu))) {
436 __get_time_values_from_xen();
437 goto again;
438 }
440 __normalize_time(&sec, &nsec);
441 set_normalized_timespec(&xentime, sec, nsec);
443 /*
444 * This is revolting. We need to set "xtime" correctly. However, the
445 * value in this location is the value at the most recent update of
446 * wall time. Discover what correction gettimeofday() would have
447 * made, and then undo it!
448 */
449 nsec -= (jiffies - wall_jiffies) * TICK_NSEC;
451 nsec -= (shadow->system_timestamp - processed_system_time);
453 __normalize_time(&sec, &nsec);
454 wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
455 wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
457 set_normalized_timespec(&xtime, sec, nsec);
458 set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
460 time_adjust = 0; /* stop active adjtime() */
461 time_status |= STA_UNSYNC;
462 time_maxerror = NTP_PHASE_LIMIT;
463 time_esterror = NTP_PHASE_LIMIT;
465 #ifdef CONFIG_XEN_PRIVILEGED_GUEST
466 if (xen_start_info.flags & SIF_INITDOMAIN) {
467 dom0_op_t op;
468 op.cmd = DOM0_SETTIME;
469 op.u.settime.secs = xentime.tv_sec;
470 op.u.settime.usecs = xentime.tv_nsec / NSEC_PER_USEC;
471 op.u.settime.system_time = shadow->system_timestamp;
472 write_sequnlock_irq(&xtime_lock);
473 HYPERVISOR_dom0_op(&op);
474 } else
475 #endif
476 write_sequnlock_irq(&xtime_lock);
478 put_cpu();
480 clock_was_set();
481 return 0;
482 }
484 EXPORT_SYMBOL(do_settimeofday);
486 #ifdef CONFIG_XEN_PRIVILEGED_GUEST
487 static int set_rtc_mmss(unsigned long nowtime)
488 {
489 int retval;
491 WARN_ON(irqs_disabled());
493 /* gets recalled with irq locally disabled */
494 spin_lock_irq(&rtc_lock);
495 if (efi_enabled)
496 retval = efi_set_rtc_mmss(nowtime);
497 else
498 retval = mach_set_rtc_mmss(nowtime);
499 spin_unlock_irq(&rtc_lock);
501 return retval;
502 }
503 #else
504 static int set_rtc_mmss(unsigned long nowtime)
505 {
506 return 0;
507 }
508 #endif
510 /* monotonic_clock(): returns # of nanoseconds passed since time_init()
511 * Note: This function is required to return accurate
512 * time even in the absence of multiple timer ticks.
513 */
514 unsigned long long monotonic_clock(void)
515 {
516 int cpu = get_cpu();
517 struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
518 s64 off;
519 unsigned long flags;
521 for ( ; ; ) {
522 off = get_nsec_offset(shadow);
523 if (time_values_up_to_date(cpu))
524 break;
525 write_seqlock_irqsave(&xtime_lock, flags);
526 __get_time_values_from_xen();
527 write_sequnlock_irqrestore(&xtime_lock, flags);
528 }
530 put_cpu();
532 return shadow->system_timestamp + off;
533 }
534 EXPORT_SYMBOL(monotonic_clock);
536 unsigned long long sched_clock(void)
537 {
538 return monotonic_clock();
539 }
541 #if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER)
542 unsigned long profile_pc(struct pt_regs *regs)
543 {
544 unsigned long pc = instruction_pointer(regs);
546 if (in_lock_functions(pc))
547 return *(unsigned long *)(regs->ebp + 4);
549 return pc;
550 }
551 EXPORT_SYMBOL(profile_pc);
552 #endif
554 /*
555 * timer_interrupt() needs to keep up the real-time clock,
556 * as well as call the "do_timer()" routine every clocktick
557 */
558 static inline void do_timer_interrupt(int irq, void *dev_id,
559 struct pt_regs *regs)
560 {
561 s64 delta, delta_cpu;
562 int cpu = smp_processor_id();
563 struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
565 do {
566 __get_time_values_from_xen();
568 delta = delta_cpu =
569 shadow->system_timestamp + get_nsec_offset(shadow);
570 delta -= processed_system_time;
571 delta_cpu -= per_cpu(processed_system_time, cpu);
572 }
573 while (!time_values_up_to_date(cpu));
575 if (unlikely(delta < 0) || unlikely(delta_cpu < 0)) {
576 printk("Timer ISR/%d: Time went backwards: "
577 "delta=%lld cpu_delta=%lld shadow=%lld "
578 "off=%lld processed=%lld cpu_processed=%lld\n",
579 cpu, delta, delta_cpu, shadow->system_timestamp,
580 (s64)get_nsec_offset(shadow),
581 processed_system_time,
582 per_cpu(processed_system_time, cpu));
583 for (cpu = 0; cpu < num_online_cpus(); cpu++)
584 printk(" %d: %lld\n", cpu,
585 per_cpu(processed_system_time, cpu));
586 return;
587 }
589 /* System-wide jiffy work. */
590 while (delta >= NS_PER_TICK) {
591 delta -= NS_PER_TICK;
592 processed_system_time += NS_PER_TICK;
593 do_timer(regs);
594 }
596 /* Local CPU jiffy work. */
597 while (delta_cpu >= NS_PER_TICK) {
598 delta_cpu -= NS_PER_TICK;
599 per_cpu(processed_system_time, cpu) += NS_PER_TICK;
600 update_process_times(user_mode(regs));
601 profile_tick(CPU_PROFILING, regs);
602 }
603 }
605 /*
606 * This is the same as the above, except we _also_ save the current
607 * Time Stamp Counter value at the time of the timer interrupt, so that
608 * we later on can estimate the time of day more exactly.
609 */
610 irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
611 {
612 /*
613 * Here we are in the timer irq handler. We just have irqs locally
614 * disabled but we don't know if the timer_bh is running on the other
615 * CPU. We need to avoid to SMP race with it. NOTE: we don' t need
616 * the irq version of write_lock because as just said we have irq
617 * locally disabled. -arca
618 */
619 write_seqlock(&xtime_lock);
620 do_timer_interrupt(irq, NULL, regs);
621 write_sequnlock(&xtime_lock);
622 return IRQ_HANDLED;
623 }
625 /* not static: needed by APM */
626 unsigned long get_cmos_time(void)
627 {
628 unsigned long retval;
630 spin_lock(&rtc_lock);
632 if (efi_enabled)
633 retval = efi_get_time();
634 else
635 retval = mach_get_cmos_time();
637 spin_unlock(&rtc_lock);
639 return retval;
640 }
641 static void sync_cmos_clock(unsigned long dummy);
643 static struct timer_list sync_cmos_timer =
644 TIMER_INITIALIZER(sync_cmos_clock, 0, 0);
646 static void sync_cmos_clock(unsigned long dummy)
647 {
648 struct timeval now, next;
649 int fail = 1;
651 /*
652 * If we have an externally synchronized Linux clock, then update
653 * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
654 * called as close as possible to 500 ms before the new second starts.
655 * This code is run on a timer. If the clock is set, that timer
656 * may not expire at the correct time. Thus, we adjust...
657 */
658 if ((time_status & STA_UNSYNC) != 0)
659 /*
660 * Not synced, exit, do not restart a timer (if one is
661 * running, let it run out).
662 */
663 return;
665 do_gettimeofday(&now);
666 if (now.tv_usec >= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 &&
667 now.tv_usec <= USEC_BEFORE + ((unsigned) TICK_SIZE) / 2)
668 fail = set_rtc_mmss(now.tv_sec);
670 next.tv_usec = USEC_AFTER - now.tv_usec;
671 if (next.tv_usec <= 0)
672 next.tv_usec += USEC_PER_SEC;
674 if (!fail)
675 next.tv_sec = 659;
676 else
677 next.tv_sec = 0;
679 if (next.tv_usec >= USEC_PER_SEC) {
680 next.tv_sec++;
681 next.tv_usec -= USEC_PER_SEC;
682 }
683 mod_timer(&sync_cmos_timer, jiffies + timeval_to_jiffies(&next));
684 }
686 void notify_arch_cmos_timer(void)
687 {
688 mod_timer(&sync_cmos_timer, jiffies + 1);
689 }
691 static long clock_cmos_diff, sleep_start;
693 static int timer_suspend(struct sys_device *dev, pm_message_t state)
694 {
695 /*
696 * Estimate time zone so that set_time can update the clock
697 */
698 clock_cmos_diff = -get_cmos_time();
699 clock_cmos_diff += get_seconds();
700 sleep_start = get_cmos_time();
701 return 0;
702 }
704 static int timer_resume(struct sys_device *dev)
705 {
706 unsigned long flags;
707 unsigned long sec;
708 unsigned long sleep_length;
710 #ifdef CONFIG_HPET_TIMER
711 if (is_hpet_enabled())
712 hpet_reenable();
713 #endif
714 sec = get_cmos_time() + clock_cmos_diff;
715 sleep_length = (get_cmos_time() - sleep_start) * HZ;
716 write_seqlock_irqsave(&xtime_lock, flags);
717 xtime.tv_sec = sec;
718 xtime.tv_nsec = 0;
719 write_sequnlock_irqrestore(&xtime_lock, flags);
720 jiffies += sleep_length;
721 wall_jiffies += sleep_length;
722 return 0;
723 }
725 static struct sysdev_class timer_sysclass = {
726 .resume = timer_resume,
727 .suspend = timer_suspend,
728 set_kset_name("timer"),
729 };
732 /* XXX this driverfs stuff should probably go elsewhere later -john */
733 static struct sys_device device_timer = {
734 .id = 0,
735 .cls = &timer_sysclass,
736 };
738 static int time_init_device(void)
739 {
740 int error = sysdev_class_register(&timer_sysclass);
741 if (!error)
742 error = sysdev_register(&device_timer);
743 return error;
744 }
746 device_initcall(time_init_device);
748 #ifdef CONFIG_HPET_TIMER
749 extern void (*late_time_init)(void);
750 /* Duplicate of time_init() below, with hpet_enable part added */
751 static void __init hpet_time_init(void)
752 {
753 xtime.tv_sec = get_cmos_time();
754 xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
755 set_normalized_timespec(&wall_to_monotonic,
756 -xtime.tv_sec, -xtime.tv_nsec);
758 if ((hpet_enable() >= 0) && hpet_use_timer) {
759 printk("Using HPET for base-timer\n");
760 }
762 cur_timer = select_timer();
763 printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name);
765 time_init_hook();
766 }
767 #endif
769 /* Dynamically-mapped IRQ. */
770 static DEFINE_PER_CPU(int, timer_irq);
772 static struct irqaction irq_timer = {
773 timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "timer0",
774 NULL, NULL
775 };
777 void __init time_init(void)
778 {
779 #ifdef CONFIG_HPET_TIMER
780 if (is_hpet_capable()) {
781 /*
782 * HPET initialization needs to do memory-mapped io. So, let
783 * us do a late initialization after mem_init().
784 */
785 late_time_init = hpet_time_init;
786 return;
787 }
788 #endif
789 __get_time_values_from_xen();
790 xtime.tv_sec = shadow_tv.tv_sec;
791 xtime.tv_nsec = shadow_tv.tv_usec * NSEC_PER_USEC;
792 set_normalized_timespec(&wall_to_monotonic,
793 -xtime.tv_sec, -xtime.tv_nsec);
794 processed_system_time = per_cpu(shadow_time, 0).system_timestamp;
795 per_cpu(processed_system_time, 0) = processed_system_time;
797 init_cpu_khz();
799 #if defined(__x86_64__)
800 vxtime.mode = VXTIME_TSC;
801 vxtime.quot = (1000000L << 32) / vxtime_hz;
802 vxtime.tsc_quot = (1000L << 32) / cpu_khz;
803 vxtime.hz = vxtime_hz;
804 sync_core();
805 rdtscll(vxtime.last_tsc);
806 #endif
808 per_cpu(timer_irq, 0) = bind_virq_to_irq(VIRQ_TIMER);
809 (void)setup_irq(per_cpu(timer_irq, 0), &irq_timer);
810 }
812 /* Convert jiffies to system time. */
813 static inline u64 jiffies_to_st(unsigned long j)
814 {
815 unsigned long seq;
816 long delta;
817 u64 st;
819 do {
820 seq = read_seqbegin(&xtime_lock);
821 delta = j - jiffies;
822 /* NB. The next check can trigger in some wrap-around cases,
823 * but that's ok: we'll just end up with a shorter timeout. */
824 if (delta < 1)
825 delta = 1;
826 st = processed_system_time + (delta * NS_PER_TICK);
827 } while (read_seqretry(&xtime_lock, seq));
829 return st;
830 }
832 /*
833 * stop_hz_timer / start_hz_timer - enter/exit 'tickless mode' on an idle cpu
834 * These functions are based on implementations from arch/s390/kernel/time.c
835 */
836 void stop_hz_timer(void)
837 {
838 unsigned int cpu = smp_processor_id();
839 unsigned long j;
841 /* s390 does this /before/ checking rcu_pending(). We copy them. */
842 cpu_set(cpu, nohz_cpu_mask);
844 /* Leave ourselves in 'tick mode' if rcu or softirq pending. */
845 if (rcu_pending(cpu) || local_softirq_pending()) {
846 cpu_clear(cpu, nohz_cpu_mask);
847 j = jiffies + 1;
848 } else {
849 j = next_timer_interrupt();
850 }
852 BUG_ON(HYPERVISOR_set_timer_op(jiffies_to_st(j)) != 0);
853 }
855 void start_hz_timer(void)
856 {
857 cpu_clear(smp_processor_id(), nohz_cpu_mask);
858 }
860 void time_suspend(void)
861 {
862 /* nothing */
863 teardown_irq(per_cpu(timer_irq, 0), &irq_timer);
864 unbind_virq_from_irq(VIRQ_TIMER);
865 }
867 /* No locking required. We are only CPU running, and interrupts are off. */
868 void time_resume(void)
869 {
870 init_cpu_khz();
872 /* Get timebases for new environment. */
873 __get_time_values_from_xen();
875 /* Reset our own concept of passage of system time. */
876 processed_system_time =
877 per_cpu(shadow_time, smp_processor_id()).system_timestamp;
878 per_cpu(processed_system_time, 0) = processed_system_time;
880 per_cpu(timer_irq, 0) = bind_virq_to_irq(VIRQ_TIMER);
881 (void)setup_irq(per_cpu(timer_irq, 0), &irq_timer);
882 }
884 #ifdef CONFIG_SMP
885 static char timer_name[NR_CPUS][15];
886 void local_setup_timer_irq(void)
887 {
888 int cpu = smp_processor_id();
890 if (cpu == 0)
891 return;
892 per_cpu(timer_irq, cpu) = bind_virq_to_irq(VIRQ_TIMER);
893 sprintf(timer_name[cpu], "timer%d", cpu);
894 BUG_ON(request_irq(per_cpu(timer_irq, cpu), timer_interrupt,
895 SA_INTERRUPT, timer_name[cpu], NULL));
896 }
898 void local_setup_timer(void)
899 {
900 int seq, cpu = smp_processor_id();
902 do {
903 seq = read_seqbegin(&xtime_lock);
904 per_cpu(processed_system_time, cpu) =
905 per_cpu(shadow_time, cpu).system_timestamp;
906 } while (read_seqretry(&xtime_lock, seq));
908 local_setup_timer_irq();
909 }
911 void local_teardown_timer_irq(void)
912 {
913 int cpu = smp_processor_id();
915 if (cpu == 0)
916 return;
917 free_irq(per_cpu(timer_irq, cpu), NULL);
918 unbind_virq_from_irq(VIRQ_TIMER);
919 }
920 #endif
922 /*
923 * /proc/sys/xen: This really belongs in another file. It can stay here for
924 * now however.
925 */
926 static ctl_table xen_subtable[] = {
927 {1, "independent_wallclock", &independent_wallclock,
928 sizeof(independent_wallclock), 0644, NULL, proc_dointvec},
929 {0}
930 };
931 static ctl_table xen_table[] = {
932 {123, "xen", NULL, 0, 0555, xen_subtable},
933 {0}
934 };
935 static int __init xen_sysctl_init(void)
936 {
937 (void)register_sysctl_table(xen_table, 0);
938 return 0;
939 }
940 __initcall(xen_sysctl_init);
942 /*
943 * Local variables:
944 * c-file-style: "linux"
945 * indent-tabs-mode: t
946 * c-indent-level: 8
947 * c-basic-offset: 8
948 * tab-width: 8
949 * End:
950 */