ia64/xen-unstable

view linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c @ 8725:a12e08eb0209

Proper fix for profile_pc on x86_64.
Also enable CONFIG_FRAME_POINTER for dom0 and domU kernels.

Signed-off-by: Christian Limpach <Christian.Limpach@cl.cam.ac.uk>
author cl349@firebug.cl.cam.ac.uk
date Wed Feb 01 15:01:04 2006 +0000 (2006-02-01)
parents a3fbce644bcc
children 2c5873f99fe5
line source
1 /*
2 * linux/arch/i386/kernel/time.c
3 *
4 * Copyright (C) 1991, 1992, 1995 Linus Torvalds
5 *
6 * This file contains the PC-specific time handling details:
7 * reading the RTC at bootup, etc..
8 * 1994-07-02 Alan Modra
9 * fixed set_rtc_mmss, fixed time.year for >= 2000, new mktime
10 * 1995-03-26 Markus Kuhn
11 * fixed 500 ms bug at call to set_rtc_mmss, fixed DS12887
12 * precision CMOS clock update
13 * 1996-05-03 Ingo Molnar
14 * fixed time warps in do_[slow|fast]_gettimeoffset()
15 * 1997-09-10 Updated NTP code according to technical memorandum Jan '96
16 * "A Kernel Model for Precision Timekeeping" by Dave Mills
17 * 1998-09-05 (Various)
18 * More robust do_fast_gettimeoffset() algorithm implemented
19 * (works with APM, Cyrix 6x86MX and Centaur C6),
20 * monotonic gettimeofday() with fast_get_timeoffset(),
21 * drift-proof precision TSC calibration on boot
22 * (C. Scott Ananian <cananian@alumni.princeton.edu>, Andrew D.
23 * Balsa <andrebalsa@altern.org>, Philip Gladstone <philip@raptor.com>;
24 * ported from 2.0.35 Jumbo-9 by Michael Krause <m.krause@tu-harburg.de>).
25 * 1998-12-16 Andrea Arcangeli
26 * Fixed Jumbo-9 code in 2.1.131: do_gettimeofday was missing 1 jiffy
27 * because was not accounting lost_ticks.
28 * 1998-12-24 Copyright (C) 1998 Andrea Arcangeli
29 * Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
30 * serialize accesses to xtime/lost_ticks).
31 */
33 #include <linux/errno.h>
34 #include <linux/sched.h>
35 #include <linux/kernel.h>
36 #include <linux/param.h>
37 #include <linux/string.h>
38 #include <linux/mm.h>
39 #include <linux/interrupt.h>
40 #include <linux/time.h>
41 #include <linux/delay.h>
42 #include <linux/init.h>
43 #include <linux/smp.h>
44 #include <linux/module.h>
45 #include <linux/sysdev.h>
46 #include <linux/bcd.h>
47 #include <linux/efi.h>
48 #include <linux/mca.h>
49 #include <linux/sysctl.h>
50 #include <linux/percpu.h>
52 #include <asm/io.h>
53 #include <asm/smp.h>
54 #include <asm/irq.h>
55 #include <asm/msr.h>
56 #include <asm/delay.h>
57 #include <asm/mpspec.h>
58 #include <asm/uaccess.h>
59 #include <asm/processor.h>
60 #include <asm/timer.h>
61 #include <asm/sections.h>
63 #include "mach_time.h"
65 #include <linux/timex.h>
66 #include <linux/config.h>
68 #include <asm/hpet.h>
70 #include <asm/arch_hooks.h>
72 #include "io_ports.h"
74 #include <xen/evtchn.h>
76 #if defined (__i386__)
77 #include <asm/i8259.h>
78 #endif
80 int pit_latch_buggy; /* extern */
82 u64 jiffies_64 = INITIAL_JIFFIES;
84 EXPORT_SYMBOL(jiffies_64);
86 #if defined(__x86_64__)
87 unsigned long vxtime_hz = PIT_TICK_RATE;
88 struct vxtime_data __vxtime __section_vxtime; /* for vsyscalls */
89 volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
90 unsigned long __wall_jiffies __section_wall_jiffies = INITIAL_JIFFIES;
91 struct timespec __xtime __section_xtime;
92 struct timezone __sys_tz __section_sys_tz;
93 #endif
95 unsigned int cpu_khz; /* Detected as we calibrate the TSC */
96 EXPORT_SYMBOL(cpu_khz);
98 extern unsigned long wall_jiffies;
100 DEFINE_SPINLOCK(rtc_lock);
101 EXPORT_SYMBOL(rtc_lock);
103 #if defined (__i386__)
104 #include <asm/i8253.h>
105 #endif
107 DEFINE_SPINLOCK(i8253_lock);
108 EXPORT_SYMBOL(i8253_lock);
110 extern struct init_timer_opts timer_tsc_init;
111 extern struct timer_opts timer_tsc;
112 #define timer_none timer_tsc
113 struct timer_opts *cur_timer __read_mostly = &timer_tsc;
115 /* These are peridically updated in shared_info, and then copied here. */
116 struct shadow_time_info {
117 u64 tsc_timestamp; /* TSC at last update of time vals. */
118 u64 system_timestamp; /* Time, in nanosecs, since boot. */
119 u32 tsc_to_nsec_mul;
120 u32 tsc_to_usec_mul;
121 int tsc_shift;
122 u32 version;
123 };
124 static DEFINE_PER_CPU(struct shadow_time_info, shadow_time);
125 static struct timespec shadow_tv;
126 static u32 shadow_tv_version;
128 /* Keep track of last time we did processing/updating of jiffies and xtime. */
129 static u64 processed_system_time; /* System time (ns) at last processing. */
130 static DEFINE_PER_CPU(u64, processed_system_time);
132 /* Must be signed, as it's compared with s64 quantities which can be -ve. */
133 #define NS_PER_TICK (1000000000LL/HZ)
135 static inline void __normalize_time(time_t *sec, s64 *nsec)
136 {
137 while (*nsec >= NSEC_PER_SEC) {
138 (*nsec) -= NSEC_PER_SEC;
139 (*sec)++;
140 }
141 while (*nsec < 0) {
142 (*nsec) += NSEC_PER_SEC;
143 (*sec)--;
144 }
145 }
147 /* Does this guest OS track Xen time, or set its wall clock independently? */
148 static int independent_wallclock = 0;
149 static int __init __independent_wallclock(char *str)
150 {
151 independent_wallclock = 1;
152 return 1;
153 }
154 __setup("independent_wallclock", __independent_wallclock);
156 int tsc_disable __initdata = 0;
158 static void delay_tsc(unsigned long loops)
159 {
160 unsigned long bclock, now;
162 rdtscl(bclock);
163 do
164 {
165 rep_nop();
166 rdtscl(now);
167 } while ((now-bclock) < loops);
168 }
170 struct timer_opts timer_tsc = {
171 .name = "tsc",
172 .delay = delay_tsc,
173 };
175 /*
176 * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
177 * yielding a 64-bit result.
178 */
179 static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
180 {
181 u64 product;
182 #ifdef __i386__
183 u32 tmp1, tmp2;
184 #endif
186 if ( shift < 0 )
187 delta >>= -shift;
188 else
189 delta <<= shift;
191 #ifdef __i386__
192 __asm__ (
193 "mul %5 ; "
194 "mov %4,%%eax ; "
195 "mov %%edx,%4 ; "
196 "mul %5 ; "
197 "xor %5,%5 ; "
198 "add %4,%%eax ; "
199 "adc %5,%%edx ; "
200 : "=A" (product), "=r" (tmp1), "=r" (tmp2)
201 : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
202 #else
203 __asm__ (
204 "mul %%rdx ; shrd $32,%%rdx,%%rax"
205 : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) );
206 #endif
208 return product;
209 }
211 #if defined (__i386__)
212 int read_current_timer(unsigned long *timer_val)
213 {
214 rdtscl(*timer_val);
215 return 0;
216 }
217 #endif
219 void init_cpu_khz(void)
220 {
221 u64 __cpu_khz = 1000000ULL << 32;
222 struct vcpu_time_info *info;
223 info = &HYPERVISOR_shared_info->vcpu_info[0].time;
224 do_div(__cpu_khz, info->tsc_to_system_mul);
225 if ( info->tsc_shift < 0 )
226 cpu_khz = __cpu_khz << -info->tsc_shift;
227 else
228 cpu_khz = __cpu_khz >> info->tsc_shift;
229 }
231 static u64 get_nsec_offset(struct shadow_time_info *shadow)
232 {
233 u64 now, delta;
234 rdtscll(now);
235 delta = now - shadow->tsc_timestamp;
236 return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
237 }
239 static unsigned long get_usec_offset(struct shadow_time_info *shadow)
240 {
241 u64 now, delta;
242 rdtscll(now);
243 delta = now - shadow->tsc_timestamp;
244 return scale_delta(delta, shadow->tsc_to_usec_mul, shadow->tsc_shift);
245 }
247 static void __update_wallclock(time_t sec, long nsec)
248 {
249 long wtm_nsec, xtime_nsec;
250 time_t wtm_sec, xtime_sec;
251 u64 tmp, wc_nsec;
253 /* Adjust wall-clock time base based on wall_jiffies ticks. */
254 wc_nsec = processed_system_time;
255 wc_nsec += sec * (u64)NSEC_PER_SEC;
256 wc_nsec += nsec;
257 wc_nsec -= (jiffies - wall_jiffies) * (u64)NS_PER_TICK;
259 /* Split wallclock base into seconds and nanoseconds. */
260 tmp = wc_nsec;
261 xtime_nsec = do_div(tmp, 1000000000);
262 xtime_sec = (time_t)tmp;
264 wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - xtime_sec);
265 wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - xtime_nsec);
267 set_normalized_timespec(&xtime, xtime_sec, xtime_nsec);
268 set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
270 ntp_clear();
271 }
273 static void update_wallclock(void)
274 {
275 shared_info_t *s = HYPERVISOR_shared_info;
277 do {
278 shadow_tv_version = s->wc_version;
279 rmb();
280 shadow_tv.tv_sec = s->wc_sec;
281 shadow_tv.tv_nsec = s->wc_nsec;
282 rmb();
283 }
284 while ((s->wc_version & 1) | (shadow_tv_version ^ s->wc_version));
286 if (!independent_wallclock)
287 __update_wallclock(shadow_tv.tv_sec, shadow_tv.tv_nsec);
288 }
290 /*
291 * Reads a consistent set of time-base values from Xen, into a shadow data
292 * area.
293 */
294 static void get_time_values_from_xen(void)
295 {
296 shared_info_t *s = HYPERVISOR_shared_info;
297 struct vcpu_time_info *src;
298 struct shadow_time_info *dst;
300 src = &s->vcpu_info[smp_processor_id()].time;
301 dst = &per_cpu(shadow_time, smp_processor_id());
303 do {
304 dst->version = src->version;
305 rmb();
306 dst->tsc_timestamp = src->tsc_timestamp;
307 dst->system_timestamp = src->system_time;
308 dst->tsc_to_nsec_mul = src->tsc_to_system_mul;
309 dst->tsc_shift = src->tsc_shift;
310 rmb();
311 }
312 while ((src->version & 1) | (dst->version ^ src->version));
314 dst->tsc_to_usec_mul = dst->tsc_to_nsec_mul / 1000;
315 }
317 static inline int time_values_up_to_date(int cpu)
318 {
319 struct vcpu_time_info *src;
320 struct shadow_time_info *dst;
322 src = &HYPERVISOR_shared_info->vcpu_info[cpu].time;
323 dst = &per_cpu(shadow_time, cpu);
325 return (dst->version == src->version);
326 }
328 /*
329 * This is a special lock that is owned by the CPU and holds the index
330 * register we are working with. It is required for NMI access to the
331 * CMOS/RTC registers. See include/asm-i386/mc146818rtc.h for details.
332 */
333 volatile unsigned long cmos_lock = 0;
334 EXPORT_SYMBOL(cmos_lock);
336 /* Routines for accessing the CMOS RAM/RTC. */
337 unsigned char rtc_cmos_read(unsigned char addr)
338 {
339 unsigned char val;
340 lock_cmos_prefix(addr);
341 outb_p(addr, RTC_PORT(0));
342 val = inb_p(RTC_PORT(1));
343 lock_cmos_suffix(addr);
344 return val;
345 }
346 EXPORT_SYMBOL(rtc_cmos_read);
348 void rtc_cmos_write(unsigned char val, unsigned char addr)
349 {
350 lock_cmos_prefix(addr);
351 outb_p(addr, RTC_PORT(0));
352 outb_p(val, RTC_PORT(1));
353 lock_cmos_suffix(addr);
354 }
355 EXPORT_SYMBOL(rtc_cmos_write);
357 /*
358 * This version of gettimeofday has microsecond resolution
359 * and better than microsecond precision on fast x86 machines with TSC.
360 */
361 void do_gettimeofday(struct timeval *tv)
362 {
363 unsigned long seq;
364 unsigned long usec, sec;
365 unsigned long max_ntp_tick;
366 s64 nsec;
367 unsigned int cpu;
368 struct shadow_time_info *shadow;
369 u32 local_time_version;
371 cpu = get_cpu();
372 shadow = &per_cpu(shadow_time, cpu);
374 do {
375 unsigned long lost;
377 local_time_version = shadow->version;
378 seq = read_seqbegin(&xtime_lock);
380 usec = get_usec_offset(shadow);
381 lost = jiffies - wall_jiffies;
383 /*
384 * If time_adjust is negative then NTP is slowing the clock
385 * so make sure not to go into next possible interval.
386 * Better to lose some accuracy than have time go backwards..
387 */
388 if (unlikely(time_adjust < 0)) {
389 max_ntp_tick = (USEC_PER_SEC / HZ) - tickadj;
390 usec = min(usec, max_ntp_tick);
392 if (lost)
393 usec += lost * max_ntp_tick;
394 }
395 else if (unlikely(lost))
396 usec += lost * (USEC_PER_SEC / HZ);
398 sec = xtime.tv_sec;
399 usec += (xtime.tv_nsec / NSEC_PER_USEC);
401 nsec = shadow->system_timestamp - processed_system_time;
402 __normalize_time(&sec, &nsec);
403 usec += (long)nsec / NSEC_PER_USEC;
405 if (unlikely(!time_values_up_to_date(cpu))) {
406 /*
407 * We may have blocked for a long time,
408 * rendering our calculations invalid
409 * (e.g. the time delta may have
410 * overflowed). Detect that and recalculate
411 * with fresh values.
412 */
413 get_time_values_from_xen();
414 continue;
415 }
416 } while (read_seqretry(&xtime_lock, seq) ||
417 (local_time_version != shadow->version));
419 put_cpu();
421 while (usec >= USEC_PER_SEC) {
422 usec -= USEC_PER_SEC;
423 sec++;
424 }
426 tv->tv_sec = sec;
427 tv->tv_usec = usec;
428 }
430 EXPORT_SYMBOL(do_gettimeofday);
432 int do_settimeofday(struct timespec *tv)
433 {
434 time_t sec;
435 s64 nsec;
436 unsigned int cpu;
437 struct shadow_time_info *shadow;
438 dom0_op_t op;
440 if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
441 return -EINVAL;
443 cpu = get_cpu();
444 shadow = &per_cpu(shadow_time, cpu);
446 write_seqlock_irq(&xtime_lock);
448 /*
449 * Ensure we don't get blocked for a long time so that our time delta
450 * overflows. If that were to happen then our shadow time values would
451 * be stale, so we can retry with fresh ones.
452 */
453 for ( ; ; ) {
454 nsec = tv->tv_nsec - get_nsec_offset(shadow);
455 if (time_values_up_to_date(cpu))
456 break;
457 get_time_values_from_xen();
458 }
459 sec = tv->tv_sec;
460 __normalize_time(&sec, &nsec);
462 if ((xen_start_info->flags & SIF_INITDOMAIN) &&
463 !independent_wallclock) {
464 op.cmd = DOM0_SETTIME;
465 op.u.settime.secs = sec;
466 op.u.settime.nsecs = nsec;
467 op.u.settime.system_time = shadow->system_timestamp;
468 HYPERVISOR_dom0_op(&op);
469 update_wallclock();
470 } else if (independent_wallclock) {
471 nsec -= shadow->system_timestamp;
472 __normalize_time(&sec, &nsec);
473 __update_wallclock(sec, nsec);
474 }
476 write_sequnlock_irq(&xtime_lock);
478 put_cpu();
480 clock_was_set();
481 return 0;
482 }
484 EXPORT_SYMBOL(do_settimeofday);
486 #ifdef CONFIG_XEN_PRIVILEGED_GUEST
487 static int set_rtc_mmss(unsigned long nowtime)
488 {
489 int retval;
491 WARN_ON(irqs_disabled());
493 if (!(xen_start_info->flags & SIF_INITDOMAIN))
494 return 0;
496 /* gets recalled with irq locally disabled */
497 spin_lock_irq(&rtc_lock);
498 if (efi_enabled)
499 retval = efi_set_rtc_mmss(nowtime);
500 else
501 retval = mach_set_rtc_mmss(nowtime);
502 spin_unlock_irq(&rtc_lock);
504 return retval;
505 }
506 #else
507 static int set_rtc_mmss(unsigned long nowtime)
508 {
509 return 0;
510 }
511 #endif
513 /* monotonic_clock(): returns # of nanoseconds passed since time_init()
514 * Note: This function is required to return accurate
515 * time even in the absence of multiple timer ticks.
516 */
517 unsigned long long monotonic_clock(void)
518 {
519 int cpu = get_cpu();
520 struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
521 u64 time;
522 u32 local_time_version;
524 do {
525 local_time_version = shadow->version;
526 smp_rmb();
527 time = shadow->system_timestamp + get_nsec_offset(shadow);
528 if (!time_values_up_to_date(cpu))
529 get_time_values_from_xen();
530 smp_rmb();
531 } while (local_time_version != shadow->version);
533 put_cpu();
535 return time;
536 }
537 EXPORT_SYMBOL(monotonic_clock);
539 unsigned long long sched_clock(void)
540 {
541 return monotonic_clock();
542 }
544 #if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER)
545 unsigned long profile_pc(struct pt_regs *regs)
546 {
547 unsigned long pc = instruction_pointer(regs);
549 #ifdef __x86_64__
550 /* Assume the lock function has either no stack frame or only a single word.
551 This checks if the address on the stack looks like a kernel text address.
552 There is a small window for false hits, but in that case the tick
553 is just accounted to the spinlock function.
554 Better would be to write these functions in assembler again
555 and check exactly. */
556 if (in_lock_functions(pc)) {
557 char *v = *(char **)regs->rsp;
558 if ((v >= _stext && v <= _etext) ||
559 (v >= _sinittext && v <= _einittext) ||
560 (v >= (char *)MODULES_VADDR && v <= (char *)MODULES_END))
561 return (unsigned long)v;
562 return ((unsigned long *)regs->rsp)[1];
563 }
564 #else
565 if (in_lock_functions(pc))
566 return *(unsigned long *)(regs->REG_BP + 4);
567 #endif
569 return pc;
570 }
571 EXPORT_SYMBOL(profile_pc);
572 #endif
574 irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
575 {
576 s64 delta, delta_cpu;
577 int i, cpu = smp_processor_id();
578 struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
580 write_seqlock(&xtime_lock);
582 do {
583 get_time_values_from_xen();
585 delta = delta_cpu =
586 shadow->system_timestamp + get_nsec_offset(shadow);
587 delta -= processed_system_time;
588 delta_cpu -= per_cpu(processed_system_time, cpu);
589 }
590 while (!time_values_up_to_date(cpu));
592 if ((unlikely(delta < -1000000LL) || unlikely(delta_cpu < 0))
593 && printk_ratelimit()) {
594 printk("Timer ISR/%d: Time went backwards: "
595 "delta=%lld cpu_delta=%lld shadow=%lld "
596 "off=%lld processed=%lld cpu_processed=%lld\n",
597 cpu, delta, delta_cpu, shadow->system_timestamp,
598 (s64)get_nsec_offset(shadow),
599 processed_system_time,
600 per_cpu(processed_system_time, cpu));
601 for (i = 0; i < num_online_cpus(); i++)
602 printk(" %d: %lld\n", i,
603 per_cpu(processed_system_time, i));
604 }
606 /* System-wide jiffy work. */
607 while (delta >= NS_PER_TICK) {
608 delta -= NS_PER_TICK;
609 processed_system_time += NS_PER_TICK;
610 do_timer(regs);
611 }
613 if (shadow_tv_version != HYPERVISOR_shared_info->wc_version) {
614 update_wallclock();
615 clock_was_set();
616 }
618 write_sequnlock(&xtime_lock);
620 /*
621 * Local CPU jiffy work. No need to hold xtime_lock, and I'm not sure
622 * if there is risk of deadlock if we do (since update_process_times
623 * may do scheduler rebalancing work and thus acquire runqueue locks).
624 */
625 while (delta_cpu >= NS_PER_TICK) {
626 delta_cpu -= NS_PER_TICK;
627 per_cpu(processed_system_time, cpu) += NS_PER_TICK;
628 update_process_times(user_mode(regs));
629 profile_tick(CPU_PROFILING, regs);
630 }
632 return IRQ_HANDLED;
633 }
635 /* not static: needed by APM */
636 unsigned long get_cmos_time(void)
637 {
638 unsigned long retval;
640 spin_lock(&rtc_lock);
642 if (efi_enabled)
643 retval = efi_get_time();
644 else
645 retval = mach_get_cmos_time();
647 spin_unlock(&rtc_lock);
649 return retval;
650 }
651 EXPORT_SYMBOL(get_cmos_time);
653 static void sync_cmos_clock(unsigned long dummy);
655 static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0);
657 static void sync_cmos_clock(unsigned long dummy)
658 {
659 struct timeval now, next;
660 int fail = 1;
662 /*
663 * If we have an externally synchronized Linux clock, then update
664 * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
665 * called as close as possible to 500 ms before the new second starts.
666 * This code is run on a timer. If the clock is set, that timer
667 * may not expire at the correct time. Thus, we adjust...
668 */
669 if (!ntp_synced())
670 /*
671 * Not synced, exit, do not restart a timer (if one is
672 * running, let it run out).
673 */
674 return;
676 do_gettimeofday(&now);
677 if (now.tv_usec >= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 &&
678 now.tv_usec <= USEC_BEFORE + ((unsigned) TICK_SIZE) / 2)
679 fail = set_rtc_mmss(now.tv_sec);
681 next.tv_usec = USEC_AFTER - now.tv_usec;
682 if (next.tv_usec <= 0)
683 next.tv_usec += USEC_PER_SEC;
685 if (!fail)
686 next.tv_sec = 659;
687 else
688 next.tv_sec = 0;
690 if (next.tv_usec >= USEC_PER_SEC) {
691 next.tv_sec++;
692 next.tv_usec -= USEC_PER_SEC;
693 }
694 mod_timer(&sync_cmos_timer, jiffies + timeval_to_jiffies(&next));
695 }
697 void notify_arch_cmos_timer(void)
698 {
699 mod_timer(&sync_cmos_timer, jiffies + 1);
700 }
702 static long clock_cmos_diff, sleep_start;
704 static struct timer_opts *last_timer;
705 static int timer_suspend(struct sys_device *dev, pm_message_t state)
706 {
707 /*
708 * Estimate time zone so that set_time can update the clock
709 */
710 clock_cmos_diff = -get_cmos_time();
711 clock_cmos_diff += get_seconds();
712 sleep_start = get_cmos_time();
713 last_timer = cur_timer;
714 cur_timer = &timer_none;
715 if (last_timer->suspend)
716 last_timer->suspend(state);
717 return 0;
718 }
720 static int timer_resume(struct sys_device *dev)
721 {
722 unsigned long flags;
723 unsigned long sec;
724 unsigned long sleep_length;
726 #ifdef CONFIG_HPET_TIMER
727 if (is_hpet_enabled())
728 hpet_reenable();
729 #endif
730 sec = get_cmos_time() + clock_cmos_diff;
731 sleep_length = (get_cmos_time() - sleep_start) * HZ;
732 write_seqlock_irqsave(&xtime_lock, flags);
733 xtime.tv_sec = sec;
734 xtime.tv_nsec = 0;
735 write_sequnlock_irqrestore(&xtime_lock, flags);
736 jiffies += sleep_length;
737 wall_jiffies += sleep_length;
738 if (last_timer->resume)
739 last_timer->resume();
740 cur_timer = last_timer;
741 last_timer = NULL;
742 touch_softlockup_watchdog();
743 return 0;
744 }
746 static struct sysdev_class timer_sysclass = {
747 .resume = timer_resume,
748 .suspend = timer_suspend,
749 set_kset_name("timer"),
750 };
753 /* XXX this driverfs stuff should probably go elsewhere later -john */
754 static struct sys_device device_timer = {
755 .id = 0,
756 .cls = &timer_sysclass,
757 };
759 static int time_init_device(void)
760 {
761 int error = sysdev_class_register(&timer_sysclass);
762 if (!error)
763 error = sysdev_register(&device_timer);
764 return error;
765 }
767 device_initcall(time_init_device);
769 #ifdef CONFIG_HPET_TIMER
770 extern void (*late_time_init)(void);
771 /* Duplicate of time_init() below, with hpet_enable part added */
772 static void __init hpet_time_init(void)
773 {
774 xtime.tv_sec = get_cmos_time();
775 xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
776 set_normalized_timespec(&wall_to_monotonic,
777 -xtime.tv_sec, -xtime.tv_nsec);
779 if ((hpet_enable() >= 0) && hpet_use_timer) {
780 printk("Using HPET for base-timer\n");
781 }
783 cur_timer = select_timer();
784 printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name);
786 time_init_hook();
787 }
788 #endif
790 /* Dynamically-mapped IRQ. */
791 DEFINE_PER_CPU(int, timer_irq);
793 extern void (*late_time_init)(void);
794 static void setup_cpu0_timer_irq(void)
795 {
796 per_cpu(timer_irq, 0) =
797 bind_virq_to_irqhandler(
798 VIRQ_TIMER,
799 0,
800 timer_interrupt,
801 SA_INTERRUPT,
802 "timer0",
803 NULL);
804 BUG_ON(per_cpu(timer_irq, 0) < 0);
805 }
807 void __init time_init(void)
808 {
809 #ifdef CONFIG_HPET_TIMER
810 if (is_hpet_capable()) {
811 /*
812 * HPET initialization needs to do memory-mapped io. So, let
813 * us do a late initialization after mem_init().
814 */
815 late_time_init = hpet_time_init;
816 return;
817 }
818 #endif
819 get_time_values_from_xen();
821 processed_system_time = per_cpu(shadow_time, 0).system_timestamp;
822 per_cpu(processed_system_time, 0) = processed_system_time;
824 update_wallclock();
826 init_cpu_khz();
827 printk(KERN_INFO "Xen reported: %u.%03u MHz processor.\n",
828 cpu_khz / 1000, cpu_khz % 1000);
830 #if defined(__x86_64__)
831 vxtime.mode = VXTIME_TSC;
832 vxtime.quot = (1000000L << 32) / vxtime_hz;
833 vxtime.tsc_quot = (1000L << 32) / cpu_khz;
834 sync_core();
835 rdtscll(vxtime.last_tsc);
836 #endif
838 /* Cannot request_irq() until kmem is initialised. */
839 late_time_init = setup_cpu0_timer_irq;
840 }
842 /* Convert jiffies to system time. */
843 static inline u64 jiffies_to_st(unsigned long j)
844 {
845 unsigned long seq;
846 long delta;
847 u64 st;
849 do {
850 seq = read_seqbegin(&xtime_lock);
851 delta = j - jiffies;
852 /* NB. The next check can trigger in some wrap-around cases,
853 * but that's ok: we'll just end up with a shorter timeout. */
854 if (delta < 1)
855 delta = 1;
856 st = processed_system_time + (delta * (u64)NS_PER_TICK);
857 } while (read_seqretry(&xtime_lock, seq));
859 return st;
860 }
862 /*
863 * stop_hz_timer / start_hz_timer - enter/exit 'tickless mode' on an idle cpu
864 * These functions are based on implementations from arch/s390/kernel/time.c
865 */
866 void stop_hz_timer(void)
867 {
868 unsigned int cpu = smp_processor_id();
869 unsigned long j;
871 /* We must do this /before/ checking rcu_pending(). */
872 cpu_set(cpu, nohz_cpu_mask);
873 smp_mb();
875 /* Leave ourselves in 'tick mode' if rcu or softirq pending. */
876 if (rcu_pending(cpu) || local_softirq_pending()) {
877 cpu_clear(cpu, nohz_cpu_mask);
878 j = jiffies + 1;
879 } else {
880 j = next_timer_interrupt();
881 }
883 BUG_ON(HYPERVISOR_set_timer_op(jiffies_to_st(j)) != 0);
884 }
886 void start_hz_timer(void)
887 {
888 cpu_clear(smp_processor_id(), nohz_cpu_mask);
889 }
891 /* No locking required. We are only CPU running, and interrupts are off. */
892 void time_resume(void)
893 {
894 init_cpu_khz();
896 get_time_values_from_xen();
898 processed_system_time = per_cpu(shadow_time, 0).system_timestamp;
899 per_cpu(processed_system_time, 0) = processed_system_time;
901 update_wallclock();
902 }
904 #ifdef CONFIG_SMP
905 static char timer_name[NR_CPUS][15];
907 void local_setup_timer(unsigned int cpu)
908 {
909 int seq;
911 BUG_ON(cpu == 0);
913 do {
914 seq = read_seqbegin(&xtime_lock);
915 /* Use cpu0 timestamp: cpu's shadow is not initialised yet. */
916 per_cpu(processed_system_time, cpu) =
917 per_cpu(shadow_time, 0).system_timestamp;
918 } while (read_seqretry(&xtime_lock, seq));
920 sprintf(timer_name[cpu], "timer%d", cpu);
921 per_cpu(timer_irq, cpu) =
922 bind_virq_to_irqhandler(
923 VIRQ_TIMER,
924 cpu,
925 timer_interrupt,
926 SA_INTERRUPT,
927 timer_name[cpu],
928 NULL);
929 BUG_ON(per_cpu(timer_irq, cpu) < 0);
930 }
932 void local_teardown_timer(unsigned int cpu)
933 {
934 BUG_ON(cpu == 0);
935 unbind_from_irqhandler(per_cpu(timer_irq, cpu), NULL);
936 }
937 #endif
939 /*
940 * /proc/sys/xen: This really belongs in another file. It can stay here for
941 * now however.
942 */
943 static ctl_table xen_subtable[] = {
944 {1, "independent_wallclock", &independent_wallclock,
945 sizeof(independent_wallclock), 0644, NULL, proc_dointvec},
946 {0}
947 };
948 static ctl_table xen_table[] = {
949 {123, "xen", NULL, 0, 0555, xen_subtable},
950 {0}
951 };
952 static int __init xen_sysctl_init(void)
953 {
954 (void)register_sysctl_table(xen_table, 0);
955 return 0;
956 }
957 __initcall(xen_sysctl_init);
959 /*
960 * Local variables:
961 * c-file-style: "linux"
962 * indent-tabs-mode: t
963 * c-indent-level: 8
964 * c-basic-offset: 8
965 * tab-width: 8
966 * End:
967 */