ia64/xen-unstable

view linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c @ 8206:5ff5117291ad

Using a slow serial console with sync_console on SMP leaves an
unusable system: it gets into a flood of

Timer ISR/0: Time went backwards: delta=-3566279 cpu_delta=16433721
shadow=2019998373 off=420435384 processed=2444000000
cpu_processed=2424000000
0: 2424000000
1: 2444000000

messages which swamps the console and leaves things almost, but not
quite, completely hung. It is a simple matter to ratelimit these
printks, which completely eliminates the problem.
author kaf24@firebug.cl.cam.ac.uk
date Sat Dec 03 10:44:01 2005 +0100 (2005-12-03)
parents 36ab34f1c31e
children 0880561d46d1
line source
1 /*
2 * linux/arch/i386/kernel/time.c
3 *
4 * Copyright (C) 1991, 1992, 1995 Linus Torvalds
5 *
6 * This file contains the PC-specific time handling details:
7 * reading the RTC at bootup, etc..
8 * 1994-07-02 Alan Modra
9 * fixed set_rtc_mmss, fixed time.year for >= 2000, new mktime
10 * 1995-03-26 Markus Kuhn
11 * fixed 500 ms bug at call to set_rtc_mmss, fixed DS12887
12 * precision CMOS clock update
13 * 1996-05-03 Ingo Molnar
14 * fixed time warps in do_[slow|fast]_gettimeoffset()
15 * 1997-09-10 Updated NTP code according to technical memorandum Jan '96
16 * "A Kernel Model for Precision Timekeeping" by Dave Mills
17 * 1998-09-05 (Various)
18 * More robust do_fast_gettimeoffset() algorithm implemented
19 * (works with APM, Cyrix 6x86MX and Centaur C6),
20 * monotonic gettimeofday() with fast_get_timeoffset(),
21 * drift-proof precision TSC calibration on boot
22 * (C. Scott Ananian <cananian@alumni.princeton.edu>, Andrew D.
23 * Balsa <andrebalsa@altern.org>, Philip Gladstone <philip@raptor.com>;
24 * ported from 2.0.35 Jumbo-9 by Michael Krause <m.krause@tu-harburg.de>).
25 * 1998-12-16 Andrea Arcangeli
26 * Fixed Jumbo-9 code in 2.1.131: do_gettimeofday was missing 1 jiffy
27 * because was not accounting lost_ticks.
28 * 1998-12-24 Copyright (C) 1998 Andrea Arcangeli
29 * Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
30 * serialize accesses to xtime/lost_ticks).
31 */
33 #include <linux/errno.h>
34 #include <linux/sched.h>
35 #include <linux/kernel.h>
36 #include <linux/param.h>
37 #include <linux/string.h>
38 #include <linux/mm.h>
39 #include <linux/interrupt.h>
40 #include <linux/time.h>
41 #include <linux/delay.h>
42 #include <linux/init.h>
43 #include <linux/smp.h>
44 #include <linux/module.h>
45 #include <linux/sysdev.h>
46 #include <linux/bcd.h>
47 #include <linux/efi.h>
48 #include <linux/mca.h>
49 #include <linux/sysctl.h>
50 #include <linux/percpu.h>
52 #include <asm/io.h>
53 #include <asm/smp.h>
54 #include <asm/irq.h>
55 #include <asm/msr.h>
56 #include <asm/delay.h>
57 #include <asm/mpspec.h>
58 #include <asm/uaccess.h>
59 #include <asm/processor.h>
60 #include <asm/timer.h>
62 #include "mach_time.h"
64 #include <linux/timex.h>
65 #include <linux/config.h>
67 #include <asm/hpet.h>
69 #include <asm/arch_hooks.h>
71 #include "io_ports.h"
73 #include <asm-xen/evtchn.h>
75 extern spinlock_t i8259A_lock;
76 int pit_latch_buggy; /* extern */
78 u64 jiffies_64 = INITIAL_JIFFIES;
80 EXPORT_SYMBOL(jiffies_64);
82 #if defined(__x86_64__)
83 unsigned long vxtime_hz = PIT_TICK_RATE;
84 struct vxtime_data __vxtime __section_vxtime; /* for vsyscalls */
85 volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
86 unsigned long __wall_jiffies __section_wall_jiffies = INITIAL_JIFFIES;
87 struct timespec __xtime __section_xtime;
88 struct timezone __sys_tz __section_sys_tz;
89 #endif
91 #if defined(__x86_64__)
92 unsigned int cpu_khz; /* Detected as we calibrate the TSC */
93 #else
94 unsigned long cpu_khz; /* Detected as we calibrate the TSC */
95 #endif
97 extern unsigned long wall_jiffies;
99 DEFINE_SPINLOCK(rtc_lock);
101 DEFINE_SPINLOCK(i8253_lock);
102 EXPORT_SYMBOL(i8253_lock);
104 extern struct init_timer_opts timer_tsc_init;
105 extern struct timer_opts timer_tsc;
106 struct timer_opts *cur_timer = &timer_tsc;
108 /* These are peridically updated in shared_info, and then copied here. */
109 struct shadow_time_info {
110 u64 tsc_timestamp; /* TSC at last update of time vals. */
111 u64 system_timestamp; /* Time, in nanosecs, since boot. */
112 u32 tsc_to_nsec_mul;
113 u32 tsc_to_usec_mul;
114 int tsc_shift;
115 u32 version;
116 };
117 static DEFINE_PER_CPU(struct shadow_time_info, shadow_time);
118 static struct timespec shadow_tv;
119 static u32 shadow_tv_version;
121 /* Keep track of last time we did processing/updating of jiffies and xtime. */
122 static u64 processed_system_time; /* System time (ns) at last processing. */
123 static DEFINE_PER_CPU(u64, processed_system_time);
125 /* Must be signed, as it's compared with s64 quantities which can be -ve. */
126 #define NS_PER_TICK (1000000000LL/HZ)
128 static inline void __normalize_time(time_t *sec, s64 *nsec)
129 {
130 while (*nsec >= NSEC_PER_SEC) {
131 (*nsec) -= NSEC_PER_SEC;
132 (*sec)++;
133 }
134 while (*nsec < 0) {
135 (*nsec) += NSEC_PER_SEC;
136 (*sec)--;
137 }
138 }
140 /* Does this guest OS track Xen time, or set its wall clock independently? */
141 static int independent_wallclock = 0;
142 static int __init __independent_wallclock(char *str)
143 {
144 independent_wallclock = 1;
145 return 1;
146 }
147 __setup("independent_wallclock", __independent_wallclock);
149 int tsc_disable __initdata = 0;
151 static void delay_tsc(unsigned long loops)
152 {
153 unsigned long bclock, now;
155 rdtscl(bclock);
156 do
157 {
158 rep_nop();
159 rdtscl(now);
160 } while ((now-bclock) < loops);
161 }
163 struct timer_opts timer_tsc = {
164 .name = "tsc",
165 .delay = delay_tsc,
166 };
168 /*
169 * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
170 * yielding a 64-bit result.
171 */
172 static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
173 {
174 u64 product;
175 #ifdef __i386__
176 u32 tmp1, tmp2;
177 #endif
179 if ( shift < 0 )
180 delta >>= -shift;
181 else
182 delta <<= shift;
184 #ifdef __i386__
185 __asm__ (
186 "mul %5 ; "
187 "mov %4,%%eax ; "
188 "mov %%edx,%4 ; "
189 "mul %5 ; "
190 "xor %5,%5 ; "
191 "add %4,%%eax ; "
192 "adc %5,%%edx ; "
193 : "=A" (product), "=r" (tmp1), "=r" (tmp2)
194 : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
195 #else
196 __asm__ (
197 "mul %%rdx ; shrd $32,%%rdx,%%rax"
198 : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) );
199 #endif
201 return product;
202 }
204 void init_cpu_khz(void)
205 {
206 u64 __cpu_khz = 1000000ULL << 32;
207 struct vcpu_time_info *info;
208 info = &HYPERVISOR_shared_info->vcpu_info[0].time;
209 do_div(__cpu_khz, info->tsc_to_system_mul);
210 if ( info->tsc_shift < 0 )
211 cpu_khz = __cpu_khz << -info->tsc_shift;
212 else
213 cpu_khz = __cpu_khz >> info->tsc_shift;
214 }
216 static u64 get_nsec_offset(struct shadow_time_info *shadow)
217 {
218 u64 now, delta;
219 rdtscll(now);
220 delta = now - shadow->tsc_timestamp;
221 return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
222 }
224 static unsigned long get_usec_offset(struct shadow_time_info *shadow)
225 {
226 u64 now, delta;
227 rdtscll(now);
228 delta = now - shadow->tsc_timestamp;
229 return scale_delta(delta, shadow->tsc_to_usec_mul, shadow->tsc_shift);
230 }
232 static void __update_wallclock(time_t sec, long nsec)
233 {
234 long wtm_nsec, xtime_nsec;
235 time_t wtm_sec, xtime_sec;
236 u64 tmp, wc_nsec;
238 /* Adjust wall-clock time base based on wall_jiffies ticks. */
239 wc_nsec = processed_system_time;
240 wc_nsec += sec * (u64)NSEC_PER_SEC;
241 wc_nsec += nsec;
242 wc_nsec -= (jiffies - wall_jiffies) * (u64)NS_PER_TICK;
244 /* Split wallclock base into seconds and nanoseconds. */
245 tmp = wc_nsec;
246 xtime_nsec = do_div(tmp, 1000000000);
247 xtime_sec = (time_t)tmp;
249 wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - xtime_sec);
250 wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - xtime_nsec);
252 set_normalized_timespec(&xtime, xtime_sec, xtime_nsec);
253 set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
255 time_adjust = 0; /* stop active adjtime() */
256 time_status |= STA_UNSYNC;
257 time_maxerror = NTP_PHASE_LIMIT;
258 time_esterror = NTP_PHASE_LIMIT;
259 }
261 static void update_wallclock(void)
262 {
263 shared_info_t *s = HYPERVISOR_shared_info;
265 do {
266 shadow_tv_version = s->wc_version;
267 rmb();
268 shadow_tv.tv_sec = s->wc_sec;
269 shadow_tv.tv_nsec = s->wc_nsec;
270 rmb();
271 }
272 while ((s->wc_version & 1) | (shadow_tv_version ^ s->wc_version));
274 if (!independent_wallclock)
275 __update_wallclock(shadow_tv.tv_sec, shadow_tv.tv_nsec);
276 }
278 /*
279 * Reads a consistent set of time-base values from Xen, into a shadow data
280 * area.
281 */
282 static void get_time_values_from_xen(void)
283 {
284 shared_info_t *s = HYPERVISOR_shared_info;
285 struct vcpu_time_info *src;
286 struct shadow_time_info *dst;
288 src = &s->vcpu_info[smp_processor_id()].time;
289 dst = &per_cpu(shadow_time, smp_processor_id());
291 do {
292 dst->version = src->version;
293 rmb();
294 dst->tsc_timestamp = src->tsc_timestamp;
295 dst->system_timestamp = src->system_time;
296 dst->tsc_to_nsec_mul = src->tsc_to_system_mul;
297 dst->tsc_shift = src->tsc_shift;
298 rmb();
299 }
300 while ((src->version & 1) | (dst->version ^ src->version));
302 dst->tsc_to_usec_mul = dst->tsc_to_nsec_mul / 1000;
303 }
305 static inline int time_values_up_to_date(int cpu)
306 {
307 struct vcpu_time_info *src;
308 struct shadow_time_info *dst;
310 src = &HYPERVISOR_shared_info->vcpu_info[cpu].time;
311 dst = &per_cpu(shadow_time, cpu);
313 return (dst->version == src->version);
314 }
316 /*
317 * This is a special lock that is owned by the CPU and holds the index
318 * register we are working with. It is required for NMI access to the
319 * CMOS/RTC registers. See include/asm-i386/mc146818rtc.h for details.
320 */
321 volatile unsigned long cmos_lock = 0;
322 EXPORT_SYMBOL(cmos_lock);
324 /* Routines for accessing the CMOS RAM/RTC. */
325 unsigned char rtc_cmos_read(unsigned char addr)
326 {
327 unsigned char val;
328 lock_cmos_prefix(addr);
329 outb_p(addr, RTC_PORT(0));
330 val = inb_p(RTC_PORT(1));
331 lock_cmos_suffix(addr);
332 return val;
333 }
334 EXPORT_SYMBOL(rtc_cmos_read);
336 void rtc_cmos_write(unsigned char val, unsigned char addr)
337 {
338 lock_cmos_prefix(addr);
339 outb_p(addr, RTC_PORT(0));
340 outb_p(val, RTC_PORT(1));
341 lock_cmos_suffix(addr);
342 }
343 EXPORT_SYMBOL(rtc_cmos_write);
345 /*
346 * This version of gettimeofday has microsecond resolution
347 * and better than microsecond precision on fast x86 machines with TSC.
348 */
349 void do_gettimeofday(struct timeval *tv)
350 {
351 unsigned long seq;
352 unsigned long usec, sec;
353 unsigned long max_ntp_tick;
354 s64 nsec;
355 unsigned int cpu;
356 struct shadow_time_info *shadow;
357 u32 local_time_version;
359 cpu = get_cpu();
360 shadow = &per_cpu(shadow_time, cpu);
362 do {
363 unsigned long lost;
365 local_time_version = shadow->version;
366 seq = read_seqbegin(&xtime_lock);
368 usec = get_usec_offset(shadow);
369 lost = jiffies - wall_jiffies;
371 /*
372 * If time_adjust is negative then NTP is slowing the clock
373 * so make sure not to go into next possible interval.
374 * Better to lose some accuracy than have time go backwards..
375 */
376 if (unlikely(time_adjust < 0)) {
377 max_ntp_tick = (USEC_PER_SEC / HZ) - tickadj;
378 usec = min(usec, max_ntp_tick);
380 if (lost)
381 usec += lost * max_ntp_tick;
382 }
383 else if (unlikely(lost))
384 usec += lost * (USEC_PER_SEC / HZ);
386 sec = xtime.tv_sec;
387 usec += (xtime.tv_nsec / NSEC_PER_USEC);
389 nsec = shadow->system_timestamp - processed_system_time;
390 __normalize_time(&sec, &nsec);
391 usec += (long)nsec / NSEC_PER_USEC;
393 if (unlikely(!time_values_up_to_date(cpu))) {
394 /*
395 * We may have blocked for a long time,
396 * rendering our calculations invalid
397 * (e.g. the time delta may have
398 * overflowed). Detect that and recalculate
399 * with fresh values.
400 */
401 get_time_values_from_xen();
402 continue;
403 }
404 } while (read_seqretry(&xtime_lock, seq) ||
405 (local_time_version != shadow->version));
407 put_cpu();
409 while (usec >= USEC_PER_SEC) {
410 usec -= USEC_PER_SEC;
411 sec++;
412 }
414 tv->tv_sec = sec;
415 tv->tv_usec = usec;
416 }
418 EXPORT_SYMBOL(do_gettimeofday);
420 int do_settimeofday(struct timespec *tv)
421 {
422 time_t sec;
423 s64 nsec;
424 unsigned int cpu;
425 struct shadow_time_info *shadow;
426 dom0_op_t op;
428 if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
429 return -EINVAL;
431 cpu = get_cpu();
432 shadow = &per_cpu(shadow_time, cpu);
434 write_seqlock_irq(&xtime_lock);
436 /*
437 * Ensure we don't get blocked for a long time so that our time delta
438 * overflows. If that were to happen then our shadow time values would
439 * be stale, so we can retry with fresh ones.
440 */
441 for ( ; ; ) {
442 nsec = tv->tv_nsec - get_nsec_offset(shadow);
443 if (time_values_up_to_date(cpu))
444 break;
445 get_time_values_from_xen();
446 }
447 sec = tv->tv_sec;
448 __normalize_time(&sec, &nsec);
450 if ((xen_start_info->flags & SIF_INITDOMAIN) &&
451 !independent_wallclock) {
452 op.cmd = DOM0_SETTIME;
453 op.u.settime.secs = sec;
454 op.u.settime.nsecs = nsec;
455 op.u.settime.system_time = shadow->system_timestamp;
456 HYPERVISOR_dom0_op(&op);
457 update_wallclock();
458 } else if (independent_wallclock) {
459 nsec -= shadow->system_timestamp;
460 __normalize_time(&sec, &nsec);
461 __update_wallclock(sec, nsec);
462 }
464 write_sequnlock_irq(&xtime_lock);
466 put_cpu();
468 clock_was_set();
469 return 0;
470 }
472 EXPORT_SYMBOL(do_settimeofday);
474 #ifdef CONFIG_XEN_PRIVILEGED_GUEST
475 static int set_rtc_mmss(unsigned long nowtime)
476 {
477 int retval;
479 WARN_ON(irqs_disabled());
481 if (!(xen_start_info->flags & SIF_INITDOMAIN))
482 return 0;
484 /* gets recalled with irq locally disabled */
485 spin_lock_irq(&rtc_lock);
486 if (efi_enabled)
487 retval = efi_set_rtc_mmss(nowtime);
488 else
489 retval = mach_set_rtc_mmss(nowtime);
490 spin_unlock_irq(&rtc_lock);
492 return retval;
493 }
494 #else
495 static int set_rtc_mmss(unsigned long nowtime)
496 {
497 return 0;
498 }
499 #endif
501 /* monotonic_clock(): returns # of nanoseconds passed since time_init()
502 * Note: This function is required to return accurate
503 * time even in the absence of multiple timer ticks.
504 */
505 unsigned long long monotonic_clock(void)
506 {
507 int cpu = get_cpu();
508 struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
509 u64 time;
510 u32 local_time_version;
512 do {
513 local_time_version = shadow->version;
514 smp_rmb();
515 time = shadow->system_timestamp + get_nsec_offset(shadow);
516 if (!time_values_up_to_date(cpu))
517 get_time_values_from_xen();
518 smp_rmb();
519 } while (local_time_version != shadow->version);
521 put_cpu();
523 return time;
524 }
525 EXPORT_SYMBOL(monotonic_clock);
527 unsigned long long sched_clock(void)
528 {
529 return monotonic_clock();
530 }
532 #if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER)
533 unsigned long profile_pc(struct pt_regs *regs)
534 {
535 unsigned long pc = instruction_pointer(regs);
537 if (in_lock_functions(pc))
538 return *(unsigned long *)(regs->ebp + 4);
540 return pc;
541 }
542 EXPORT_SYMBOL(profile_pc);
543 #endif
545 irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
546 {
547 s64 delta, delta_cpu;
548 int i, cpu = smp_processor_id();
549 struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
551 write_seqlock(&xtime_lock);
553 do {
554 get_time_values_from_xen();
556 delta = delta_cpu =
557 shadow->system_timestamp + get_nsec_offset(shadow);
558 delta -= processed_system_time;
559 delta_cpu -= per_cpu(processed_system_time, cpu);
560 }
561 while (!time_values_up_to_date(cpu));
563 if ((unlikely(delta < -1000000LL) || unlikely(delta_cpu < 0))
564 && printk_ratelimit()) {
565 printk("Timer ISR/%d: Time went backwards: "
566 "delta=%lld cpu_delta=%lld shadow=%lld "
567 "off=%lld processed=%lld cpu_processed=%lld\n",
568 cpu, delta, delta_cpu, shadow->system_timestamp,
569 (s64)get_nsec_offset(shadow),
570 processed_system_time,
571 per_cpu(processed_system_time, cpu));
572 for (i = 0; i < num_online_cpus(); i++)
573 printk(" %d: %lld\n", i,
574 per_cpu(processed_system_time, i));
575 }
577 /* System-wide jiffy work. */
578 while (delta >= NS_PER_TICK) {
579 delta -= NS_PER_TICK;
580 processed_system_time += NS_PER_TICK;
581 do_timer(regs);
582 }
584 if (shadow_tv_version != HYPERVISOR_shared_info->wc_version) {
585 update_wallclock();
586 clock_was_set();
587 }
589 write_sequnlock(&xtime_lock);
591 /*
592 * Local CPU jiffy work. No need to hold xtime_lock, and I'm not sure
593 * if there is risk of deadlock if we do (since update_process_times
594 * may do scheduler rebalancing work and thus acquire runqueue locks).
595 */
596 while (delta_cpu >= NS_PER_TICK) {
597 delta_cpu -= NS_PER_TICK;
598 per_cpu(processed_system_time, cpu) += NS_PER_TICK;
599 update_process_times(user_mode(regs));
600 profile_tick(CPU_PROFILING, regs);
601 }
603 return IRQ_HANDLED;
604 }
606 /* not static: needed by APM */
607 unsigned long get_cmos_time(void)
608 {
609 unsigned long retval;
611 spin_lock(&rtc_lock);
613 if (efi_enabled)
614 retval = efi_get_time();
615 else
616 retval = mach_get_cmos_time();
618 spin_unlock(&rtc_lock);
620 return retval;
621 }
622 static void sync_cmos_clock(unsigned long dummy);
624 static struct timer_list sync_cmos_timer =
625 TIMER_INITIALIZER(sync_cmos_clock, 0, 0);
627 static void sync_cmos_clock(unsigned long dummy)
628 {
629 struct timeval now, next;
630 int fail = 1;
632 /*
633 * If we have an externally synchronized Linux clock, then update
634 * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
635 * called as close as possible to 500 ms before the new second starts.
636 * This code is run on a timer. If the clock is set, that timer
637 * may not expire at the correct time. Thus, we adjust...
638 */
639 if ((time_status & STA_UNSYNC) != 0)
640 /*
641 * Not synced, exit, do not restart a timer (if one is
642 * running, let it run out).
643 */
644 return;
646 do_gettimeofday(&now);
647 if (now.tv_usec >= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 &&
648 now.tv_usec <= USEC_BEFORE + ((unsigned) TICK_SIZE) / 2)
649 fail = set_rtc_mmss(now.tv_sec);
651 next.tv_usec = USEC_AFTER - now.tv_usec;
652 if (next.tv_usec <= 0)
653 next.tv_usec += USEC_PER_SEC;
655 if (!fail)
656 next.tv_sec = 659;
657 else
658 next.tv_sec = 0;
660 if (next.tv_usec >= USEC_PER_SEC) {
661 next.tv_sec++;
662 next.tv_usec -= USEC_PER_SEC;
663 }
664 mod_timer(&sync_cmos_timer, jiffies + timeval_to_jiffies(&next));
665 }
667 void notify_arch_cmos_timer(void)
668 {
669 mod_timer(&sync_cmos_timer, jiffies + 1);
670 }
672 static long clock_cmos_diff, sleep_start;
674 static int timer_suspend(struct sys_device *dev, pm_message_t state)
675 {
676 /*
677 * Estimate time zone so that set_time can update the clock
678 */
679 clock_cmos_diff = -get_cmos_time();
680 clock_cmos_diff += get_seconds();
681 sleep_start = get_cmos_time();
682 return 0;
683 }
685 static int timer_resume(struct sys_device *dev)
686 {
687 unsigned long flags;
688 unsigned long sec;
689 unsigned long sleep_length;
691 #ifdef CONFIG_HPET_TIMER
692 if (is_hpet_enabled())
693 hpet_reenable();
694 #endif
695 sec = get_cmos_time() + clock_cmos_diff;
696 sleep_length = (get_cmos_time() - sleep_start) * HZ;
697 write_seqlock_irqsave(&xtime_lock, flags);
698 xtime.tv_sec = sec;
699 xtime.tv_nsec = 0;
700 write_sequnlock_irqrestore(&xtime_lock, flags);
701 jiffies += sleep_length;
702 wall_jiffies += sleep_length;
703 return 0;
704 }
706 static struct sysdev_class timer_sysclass = {
707 .resume = timer_resume,
708 .suspend = timer_suspend,
709 set_kset_name("timer"),
710 };
713 /* XXX this driverfs stuff should probably go elsewhere later -john */
714 static struct sys_device device_timer = {
715 .id = 0,
716 .cls = &timer_sysclass,
717 };
719 static int time_init_device(void)
720 {
721 int error = sysdev_class_register(&timer_sysclass);
722 if (!error)
723 error = sysdev_register(&device_timer);
724 return error;
725 }
727 device_initcall(time_init_device);
729 #ifdef CONFIG_HPET_TIMER
730 extern void (*late_time_init)(void);
731 /* Duplicate of time_init() below, with hpet_enable part added */
732 static void __init hpet_time_init(void)
733 {
734 xtime.tv_sec = get_cmos_time();
735 xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
736 set_normalized_timespec(&wall_to_monotonic,
737 -xtime.tv_sec, -xtime.tv_nsec);
739 if ((hpet_enable() >= 0) && hpet_use_timer) {
740 printk("Using HPET for base-timer\n");
741 }
743 cur_timer = select_timer();
744 printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name);
746 time_init_hook();
747 }
748 #endif
750 /* Dynamically-mapped IRQ. */
751 DEFINE_PER_CPU(int, timer_irq);
753 extern void (*late_time_init)(void);
754 static void setup_cpu0_timer_irq(void)
755 {
756 per_cpu(timer_irq, 0) =
757 bind_virq_to_irqhandler(
758 VIRQ_TIMER,
759 0,
760 timer_interrupt,
761 SA_INTERRUPT,
762 "timer0",
763 NULL);
764 BUG_ON(per_cpu(timer_irq, 0) < 0);
765 }
767 void __init time_init(void)
768 {
769 #ifdef CONFIG_HPET_TIMER
770 if (is_hpet_capable()) {
771 /*
772 * HPET initialization needs to do memory-mapped io. So, let
773 * us do a late initialization after mem_init().
774 */
775 late_time_init = hpet_time_init;
776 return;
777 }
778 #endif
779 get_time_values_from_xen();
781 processed_system_time = per_cpu(shadow_time, 0).system_timestamp;
782 per_cpu(processed_system_time, 0) = processed_system_time;
784 update_wallclock();
786 init_cpu_khz();
787 printk(KERN_INFO "Xen reported: %lu.%03lu MHz processor.\n",
788 cpu_khz / 1000, cpu_khz % 1000);
790 #if defined(__x86_64__)
791 vxtime.mode = VXTIME_TSC;
792 vxtime.quot = (1000000L << 32) / vxtime_hz;
793 vxtime.tsc_quot = (1000L << 32) / cpu_khz;
794 vxtime.hz = vxtime_hz;
795 sync_core();
796 rdtscll(vxtime.last_tsc);
797 #endif
799 /* Cannot request_irq() until kmem is initialised. */
800 late_time_init = setup_cpu0_timer_irq;
801 }
803 /* Convert jiffies to system time. */
804 static inline u64 jiffies_to_st(unsigned long j)
805 {
806 unsigned long seq;
807 long delta;
808 u64 st;
810 do {
811 seq = read_seqbegin(&xtime_lock);
812 delta = j - jiffies;
813 /* NB. The next check can trigger in some wrap-around cases,
814 * but that's ok: we'll just end up with a shorter timeout. */
815 if (delta < 1)
816 delta = 1;
817 st = processed_system_time + (delta * (u64)NS_PER_TICK);
818 } while (read_seqretry(&xtime_lock, seq));
820 return st;
821 }
823 /*
824 * stop_hz_timer / start_hz_timer - enter/exit 'tickless mode' on an idle cpu
825 * These functions are based on implementations from arch/s390/kernel/time.c
826 */
827 void stop_hz_timer(void)
828 {
829 unsigned int cpu = smp_processor_id();
830 unsigned long j;
832 /* s390 does this /before/ checking rcu_pending(). We copy them. */
833 cpu_set(cpu, nohz_cpu_mask);
835 /* Leave ourselves in 'tick mode' if rcu or softirq pending. */
836 if (rcu_pending(cpu) || local_softirq_pending()) {
837 cpu_clear(cpu, nohz_cpu_mask);
838 j = jiffies + 1;
839 } else {
840 j = next_timer_interrupt();
841 }
843 BUG_ON(HYPERVISOR_set_timer_op(jiffies_to_st(j)) != 0);
844 }
846 void start_hz_timer(void)
847 {
848 cpu_clear(smp_processor_id(), nohz_cpu_mask);
849 }
851 /* No locking required. We are only CPU running, and interrupts are off. */
852 void time_resume(void)
853 {
854 init_cpu_khz();
856 get_time_values_from_xen();
858 processed_system_time = per_cpu(shadow_time, 0).system_timestamp;
859 per_cpu(processed_system_time, 0) = processed_system_time;
861 update_wallclock();
862 }
864 #ifdef CONFIG_SMP
865 static char timer_name[NR_CPUS][15];
867 void local_setup_timer(unsigned int cpu)
868 {
869 int seq;
871 BUG_ON(cpu == 0);
873 do {
874 seq = read_seqbegin(&xtime_lock);
875 per_cpu(processed_system_time, cpu) =
876 per_cpu(shadow_time, cpu).system_timestamp;
877 } while (read_seqretry(&xtime_lock, seq));
879 sprintf(timer_name[cpu], "timer%d", cpu);
880 per_cpu(timer_irq, cpu) =
881 bind_virq_to_irqhandler(
882 VIRQ_TIMER,
883 cpu,
884 timer_interrupt,
885 SA_INTERRUPT,
886 timer_name[cpu],
887 NULL);
888 BUG_ON(per_cpu(timer_irq, cpu) < 0);
889 }
891 void local_teardown_timer(unsigned int cpu)
892 {
893 BUG_ON(cpu == 0);
894 unbind_from_irqhandler(per_cpu(timer_irq, cpu), NULL);
895 }
896 #endif
898 /*
899 * /proc/sys/xen: This really belongs in another file. It can stay here for
900 * now however.
901 */
902 static ctl_table xen_subtable[] = {
903 {1, "independent_wallclock", &independent_wallclock,
904 sizeof(independent_wallclock), 0644, NULL, proc_dointvec},
905 {0}
906 };
907 static ctl_table xen_table[] = {
908 {123, "xen", NULL, 0, 0555, xen_subtable},
909 {0}
910 };
911 static int __init xen_sysctl_init(void)
912 {
913 (void)register_sysctl_table(xen_table, 0);
914 return 0;
915 }
916 __initcall(xen_sysctl_init);
918 /*
919 * Local variables:
920 * c-file-style: "linux"
921 * indent-tabs-mode: t
922 * c-indent-level: 8
923 * c-basic-offset: 8
924 * tab-width: 8
925 * End:
926 */