ia64/xen-unstable

view xen/arch/x86/time.c @ 19545:34dca01addc9

x86: Disable cpuidle by default unless hpet broadcast is available.
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Wed Apr 15 08:40:12 2009 +0100 (2009-04-15)
parents ce8f37efc992
children b44db970f6b7
line source
1 /******************************************************************************
2 * arch/x86/time.c
3 *
4 * Per-CPU time calibration and management.
5 *
6 * Copyright (c) 2002-2005, K A Fraser
7 *
8 * Portions from Linux are:
9 * Copyright (c) 1991, 1992, 1995 Linus Torvalds
10 */
12 #include <xen/config.h>
13 #include <xen/errno.h>
14 #include <xen/event.h>
15 #include <xen/sched.h>
16 #include <xen/lib.h>
17 #include <xen/config.h>
18 #include <xen/init.h>
19 #include <xen/time.h>
20 #include <xen/timer.h>
21 #include <xen/smp.h>
22 #include <xen/irq.h>
23 #include <xen/softirq.h>
24 #include <asm/io.h>
25 #include <asm/msr.h>
26 #include <asm/mpspec.h>
27 #include <asm/processor.h>
28 #include <asm/fixmap.h>
29 #include <asm/mc146818rtc.h>
30 #include <asm/div64.h>
31 #include <asm/hpet.h>
32 #include <io_ports.h>
34 /* opt_clocksource: Force clocksource to one of: pit, hpet, cyclone, acpi. */
35 static char opt_clocksource[10];
36 string_param("clocksource", opt_clocksource);
38 /*
39 * opt_consistent_tscs: All TSCs tick at the exact same rate, allowing
40 * simplified system time handling.
41 */
42 static int opt_consistent_tscs;
43 boolean_param("consistent_tscs", opt_consistent_tscs);
45 unsigned long cpu_khz; /* CPU clock frequency in kHz. */
46 DEFINE_SPINLOCK(rtc_lock);
47 unsigned long pit0_ticks;
48 static u32 wc_sec, wc_nsec; /* UTC time at last 'time update'. */
49 static DEFINE_SPINLOCK(wc_lock);
51 struct time_scale {
52 int shift;
53 u32 mul_frac;
54 };
56 struct cpu_time {
57 u64 local_tsc_stamp;
58 s_time_t stime_local_stamp;
59 s_time_t stime_master_stamp;
60 struct time_scale tsc_scale;
61 };
63 struct platform_timesource {
64 char *id;
65 char *name;
66 u64 frequency;
67 u64 (*read_counter)(void);
68 int (*init)(struct platform_timesource *);
69 void (*resume)(struct platform_timesource *);
70 int counter_bits;
71 };
73 static DEFINE_PER_CPU(struct cpu_time, cpu_time);
75 /* Calibrate all CPUs to platform timer every EPOCH. */
76 #define EPOCH MILLISECS(1000)
77 static struct timer calibration_timer;
79 /*
80 * We simulate a 32-bit platform timer from the 16-bit PIT ch2 counter.
81 * Otherwise overflow happens too quickly (~50ms) for us to guarantee that
82 * softirq handling will happen in time.
83 *
84 * The pit_lock protects the 16- and 32-bit stamp fields as well as the
85 */
86 static DEFINE_SPINLOCK(pit_lock);
87 static u16 pit_stamp16;
88 static u32 pit_stamp32;
89 static int using_pit;
91 /*
92 * 32-bit division of integer dividend and integer divisor yielding
93 * 32-bit fractional quotient.
94 */
95 static inline u32 div_frac(u32 dividend, u32 divisor)
96 {
97 u32 quotient, remainder;
98 ASSERT(dividend < divisor);
99 asm (
100 "divl %4"
101 : "=a" (quotient), "=d" (remainder)
102 : "0" (0), "1" (dividend), "r" (divisor) );
103 return quotient;
104 }
106 /*
107 * 32-bit multiplication of multiplicand and fractional multiplier
108 * yielding 32-bit product (radix point at same position as in multiplicand).
109 */
110 static inline u32 mul_frac(u32 multiplicand, u32 multiplier)
111 {
112 u32 product_int, product_frac;
113 asm (
114 "mul %3"
115 : "=a" (product_frac), "=d" (product_int)
116 : "0" (multiplicand), "r" (multiplier) );
117 return product_int;
118 }
120 /*
121 * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
122 * yielding a 64-bit result.
123 */
124 static inline u64 scale_delta(u64 delta, struct time_scale *scale)
125 {
126 u64 product;
127 #ifdef CONFIG_X86_32
128 u32 tmp1, tmp2;
129 #endif
131 if ( scale->shift < 0 )
132 delta >>= -scale->shift;
133 else
134 delta <<= scale->shift;
136 #ifdef CONFIG_X86_32
137 asm (
138 "mul %5 ; "
139 "mov %4,%%eax ; "
140 "mov %%edx,%4 ; "
141 "mul %5 ; "
142 "xor %5,%5 ; "
143 "add %4,%%eax ; "
144 "adc %5,%%edx ; "
145 : "=A" (product), "=r" (tmp1), "=r" (tmp2)
146 : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (scale->mul_frac) );
147 #else
148 asm (
149 "mul %%rdx ; shrd $32,%%rdx,%%rax"
150 : "=a" (product) : "0" (delta), "d" ((u64)scale->mul_frac) );
151 #endif
153 return product;
154 }
156 /* Compute the reciprocal of the given time_scale. */
157 static inline struct time_scale scale_reciprocal(struct time_scale scale)
158 {
159 struct time_scale reciprocal;
160 u32 dividend;
162 dividend = 0x80000000u;
163 reciprocal.shift = 1 - scale.shift;
164 while ( unlikely(dividend >= scale.mul_frac) )
165 {
166 dividend >>= 1;
167 reciprocal.shift++;
168 }
170 asm (
171 "divl %4"
172 : "=a" (reciprocal.mul_frac), "=d" (dividend)
173 : "0" (0), "1" (dividend), "r" (scale.mul_frac) );
175 return reciprocal;
176 }
178 /*
179 * cpu_mask that denotes the CPUs that needs timer interrupt coming in as
180 * IPIs in place of local APIC timers
181 */
182 extern int xen_cpuidle;
183 static cpumask_t pit_broadcast_mask;
185 static void smp_send_timer_broadcast_ipi(void)
186 {
187 int cpu = smp_processor_id();
188 cpumask_t mask;
190 cpus_and(mask, cpu_online_map, pit_broadcast_mask);
192 if ( cpu_isset(cpu, mask) )
193 {
194 cpu_clear(cpu, mask);
195 raise_softirq(TIMER_SOFTIRQ);
196 }
198 if ( !cpus_empty(mask) )
199 {
200 cpumask_raise_softirq(mask, TIMER_SOFTIRQ);
201 }
202 }
204 static void timer_interrupt(int irq, void *dev_id, struct cpu_user_regs *regs)
205 {
206 ASSERT(local_irq_is_enabled());
208 if ( hpet_legacy_irq_tick() )
209 return;
211 /* Only for start-of-day interruopt tests in io_apic.c. */
212 (*(volatile unsigned long *)&pit0_ticks)++;
214 /* Rough hack to allow accurate timers to sort-of-work with no APIC. */
215 if ( !cpu_has_apic )
216 raise_softirq(TIMER_SOFTIRQ);
218 if ( xen_cpuidle )
219 smp_send_timer_broadcast_ipi();
221 /* Emulate a 32-bit PIT counter. */
222 if ( using_pit )
223 {
224 u16 count;
226 spin_lock_irq(&pit_lock);
228 outb(0x80, PIT_MODE);
229 count = inb(PIT_CH2);
230 count |= inb(PIT_CH2) << 8;
232 pit_stamp32 += (u16)(pit_stamp16 - count);
233 pit_stamp16 = count;
235 spin_unlock_irq(&pit_lock);
236 }
237 }
239 static struct irqaction irq0 = { timer_interrupt, "timer", NULL };
241 /* ------ Calibrate the TSC -------
242 * Return processor ticks per second / CALIBRATE_FRAC.
243 */
245 #define CLOCK_TICK_RATE 1193182 /* system crystal frequency (Hz) */
246 #define CALIBRATE_FRAC 20 /* calibrate over 50ms */
247 #define CALIBRATE_LATCH ((CLOCK_TICK_RATE+(CALIBRATE_FRAC/2))/CALIBRATE_FRAC)
249 static u64 init_pit_and_calibrate_tsc(void)
250 {
251 u64 start, end;
252 unsigned long count;
254 /* Set PIT channel 0 to HZ Hz. */
255 #define LATCH (((CLOCK_TICK_RATE)+(HZ/2))/HZ)
256 outb_p(0x34, PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */
257 outb_p(LATCH & 0xff, PIT_CH0); /* LSB */
258 outb(LATCH >> 8, PIT_CH0); /* MSB */
260 /* Set the Gate high, disable speaker */
261 outb((inb(0x61) & ~0x02) | 0x01, 0x61);
263 /*
264 * Now let's take care of CTC channel 2
265 *
266 * Set the Gate high, program CTC channel 2 for mode 0, (interrupt on
267 * terminal count mode), binary count, load 5 * LATCH count, (LSB and MSB)
268 * to begin countdown.
269 */
270 outb(0xb0, PIT_MODE); /* binary, mode 0, LSB/MSB, Ch 2 */
271 outb(CALIBRATE_LATCH & 0xff, PIT_CH2); /* LSB of count */
272 outb(CALIBRATE_LATCH >> 8, PIT_CH2); /* MSB of count */
274 rdtscll(start);
275 for ( count = 0; (inb(0x61) & 0x20) == 0; count++ )
276 continue;
277 rdtscll(end);
279 /* Error if the CTC doesn't behave itself. */
280 if ( count == 0 )
281 return 0;
283 return ((end - start) * (u64)CALIBRATE_FRAC);
284 }
286 static void set_time_scale(struct time_scale *ts, u64 ticks_per_sec)
287 {
288 u64 tps64 = ticks_per_sec;
289 u32 tps32;
290 int shift = 0;
292 ASSERT(tps64 != 0);
294 while ( tps64 > (MILLISECS(1000)*2) )
295 {
296 tps64 >>= 1;
297 shift--;
298 }
300 tps32 = (u32)tps64;
301 while ( tps32 <= (u32)MILLISECS(1000) )
302 {
303 tps32 <<= 1;
304 shift++;
305 }
307 ts->mul_frac = div_frac(MILLISECS(1000), tps32);
308 ts->shift = shift;
309 }
311 static atomic_t tsc_calibrate_gang = ATOMIC_INIT(0);
312 static unsigned int tsc_calibrate_status = 0;
314 void calibrate_tsc_bp(void)
315 {
316 while ( atomic_read(&tsc_calibrate_gang) != (num_booting_cpus() - 1) )
317 mb();
319 outb(CALIBRATE_LATCH & 0xff, PIT_CH2);
320 outb(CALIBRATE_LATCH >> 8, PIT_CH2);
322 tsc_calibrate_status = 1;
323 wmb();
325 while ( (inb(0x61) & 0x20) == 0 )
326 continue;
328 tsc_calibrate_status = 2;
329 wmb();
331 while ( atomic_read(&tsc_calibrate_gang) != 0 )
332 mb();
333 }
335 void calibrate_tsc_ap(void)
336 {
337 u64 t1, t2, ticks_per_sec;
339 atomic_inc(&tsc_calibrate_gang);
341 while ( tsc_calibrate_status < 1 )
342 mb();
344 rdtscll(t1);
346 while ( tsc_calibrate_status < 2 )
347 mb();
349 rdtscll(t2);
351 ticks_per_sec = (t2 - t1) * (u64)CALIBRATE_FRAC;
352 set_time_scale(&this_cpu(cpu_time).tsc_scale, ticks_per_sec);
354 atomic_dec(&tsc_calibrate_gang);
355 }
357 static char *freq_string(u64 freq)
358 {
359 static char s[20];
360 unsigned int x, y;
361 y = (unsigned int)do_div(freq, 1000000) / 1000;
362 x = (unsigned int)freq;
363 snprintf(s, sizeof(s), "%u.%03uMHz", x, y);
364 return s;
365 }
367 /************************************************************
368 * PLATFORM TIMER 1: PROGRAMMABLE INTERVAL TIMER (LEGACY PIT)
369 */
371 static u64 read_pit_count(void)
372 {
373 u16 count16;
374 u32 count32;
375 unsigned long flags;
377 spin_lock_irqsave(&pit_lock, flags);
379 outb(0x80, PIT_MODE);
380 count16 = inb(PIT_CH2);
381 count16 |= inb(PIT_CH2) << 8;
383 count32 = pit_stamp32 + (u16)(pit_stamp16 - count16);
385 spin_unlock_irqrestore(&pit_lock, flags);
387 return count32;
388 }
390 static int init_pit(struct platform_timesource *pts)
391 {
392 using_pit = 1;
393 return 1;
394 }
396 static struct platform_timesource plt_pit =
397 {
398 .id = "pit",
399 .name = "PIT",
400 .frequency = CLOCK_TICK_RATE,
401 .read_counter = read_pit_count,
402 .counter_bits = 32,
403 .init = init_pit
404 };
406 /************************************************************
407 * PLATFORM TIMER 2: HIGH PRECISION EVENT TIMER (HPET)
408 */
410 static u64 read_hpet_count(void)
411 {
412 return hpet_read32(HPET_COUNTER);
413 }
415 static int init_hpet(struct platform_timesource *pts)
416 {
417 u64 hpet_rate = hpet_setup();
419 if ( hpet_rate == 0 )
420 return 0;
422 pts->frequency = hpet_rate;
423 return 1;
424 }
426 static void resume_hpet(struct platform_timesource *pts)
427 {
428 u64 hpet_rate = hpet_setup();
430 BUG_ON(hpet_rate == 0);
431 pts->frequency = hpet_rate;
432 }
434 static struct platform_timesource plt_hpet =
435 {
436 .id = "hpet",
437 .name = "HPET",
438 .read_counter = read_hpet_count,
439 .counter_bits = 32,
440 .init = init_hpet,
441 .resume = resume_hpet
442 };
444 /************************************************************
445 * PLATFORM TIMER 3: IBM 'CYCLONE' TIMER
446 */
448 int use_cyclone;
450 /*
451 * Although the counter is read via a 64-bit register, I believe it is actually
452 * a 40-bit counter. Since this will wrap, I read only the low 32 bits and
453 * periodically fold into a 64-bit software counter, just as for PIT and HPET.
454 */
455 #define CYCLONE_CBAR_ADDR 0xFEB00CD0
456 #define CYCLONE_PMCC_OFFSET 0x51A0
457 #define CYCLONE_MPMC_OFFSET 0x51D0
458 #define CYCLONE_MPCS_OFFSET 0x51A8
459 #define CYCLONE_TIMER_FREQ 100000000
461 /* Cyclone MPMC0 register. */
462 static volatile u32 *cyclone_timer;
464 static u64 read_cyclone_count(void)
465 {
466 return *cyclone_timer;
467 }
469 static volatile u32 *map_cyclone_reg(unsigned long regaddr)
470 {
471 unsigned long pageaddr = regaddr & PAGE_MASK;
472 unsigned long offset = regaddr & ~PAGE_MASK;
473 set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
474 return (volatile u32 *)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
475 }
477 static int init_cyclone(struct platform_timesource *pts)
478 {
479 u32 base;
481 if ( !use_cyclone )
482 return 0;
484 /* Find base address. */
485 base = *(map_cyclone_reg(CYCLONE_CBAR_ADDR));
486 if ( base == 0 )
487 {
488 printk(KERN_ERR "Cyclone: Could not find valid CBAR value.\n");
489 return 0;
490 }
492 /* Enable timer and map the counter register. */
493 *(map_cyclone_reg(base + CYCLONE_PMCC_OFFSET)) = 1;
494 *(map_cyclone_reg(base + CYCLONE_MPCS_OFFSET)) = 1;
495 cyclone_timer = map_cyclone_reg(base + CYCLONE_MPMC_OFFSET);
496 return 1;
497 }
499 static struct platform_timesource plt_cyclone =
500 {
501 .id = "cyclone",
502 .name = "IBM Cyclone",
503 .frequency = CYCLONE_TIMER_FREQ,
504 .read_counter = read_cyclone_count,
505 .counter_bits = 32,
506 .init = init_cyclone
507 };
509 /************************************************************
510 * PLATFORM TIMER 4: ACPI PM TIMER
511 */
513 u32 pmtmr_ioport;
515 /* ACPI PM timer ticks at 3.579545 MHz. */
516 #define ACPI_PM_FREQUENCY 3579545
518 static u64 read_pmtimer_count(void)
519 {
520 return inl(pmtmr_ioport);
521 }
523 static int init_pmtimer(struct platform_timesource *pts)
524 {
525 if ( pmtmr_ioport == 0 )
526 return 0;
528 return 1;
529 }
531 static struct platform_timesource plt_pmtimer =
532 {
533 .id = "acpi",
534 .name = "ACPI PM Timer",
535 .frequency = ACPI_PM_FREQUENCY,
536 .read_counter = read_pmtimer_count,
537 .counter_bits = 24,
538 .init = init_pmtimer
539 };
541 static struct time_scale pmt_scale;
542 static struct time_scale pmt_scale_r;
543 static __init int init_pmtmr_scale(void)
544 {
545 set_time_scale(&pmt_scale, ACPI_PM_FREQUENCY);
546 pmt_scale_r = scale_reciprocal(pmt_scale);
547 return 0;
548 }
549 __initcall(init_pmtmr_scale);
551 uint64_t acpi_pm_tick_to_ns(uint64_t ticks)
552 {
553 return scale_delta(ticks, &pmt_scale);
554 }
556 uint64_t ns_to_acpi_pm_tick(uint64_t ns)
557 {
558 return scale_delta(ns, &pmt_scale_r);
559 }
561 /************************************************************
562 * GENERIC PLATFORM TIMER INFRASTRUCTURE
563 */
565 static struct platform_timesource plt_src; /* details of chosen timesource */
566 static u64 plt_mask; /* hardware-width mask */
567 static u64 plt_overflow_period; /* ns between calls to plt_overflow() */
568 static struct time_scale plt_scale; /* scale: platform counter -> nanosecs */
570 /* Protected by platform_timer_lock. */
571 static DEFINE_SPINLOCK(platform_timer_lock);
572 static s_time_t stime_platform_stamp; /* System time at below platform time */
573 static u64 platform_timer_stamp; /* Platform time at above system time */
574 static u64 plt_stamp64; /* 64-bit platform counter stamp */
575 static u64 plt_stamp; /* hardware-width platform counter stamp */
576 static struct timer plt_overflow_timer;
578 static void plt_overflow(void *unused)
579 {
580 u64 count;
582 spin_lock_irq(&platform_timer_lock);
583 count = plt_src.read_counter();
584 plt_stamp64 += (count - plt_stamp) & plt_mask;
585 plt_stamp = count;
586 spin_unlock_irq(&platform_timer_lock);
588 set_timer(&plt_overflow_timer, NOW() + plt_overflow_period);
589 }
591 static s_time_t __read_platform_stime(u64 platform_time)
592 {
593 u64 diff = platform_time - platform_timer_stamp;
594 ASSERT(spin_is_locked(&platform_timer_lock));
595 return (stime_platform_stamp + scale_delta(diff, &plt_scale));
596 }
598 static s_time_t read_platform_stime(void)
599 {
600 u64 count;
601 s_time_t stime;
603 ASSERT(!local_irq_is_enabled());
605 spin_lock(&platform_timer_lock);
606 count = plt_stamp64 + ((plt_src.read_counter() - plt_stamp) & plt_mask);
607 stime = __read_platform_stime(count);
608 spin_unlock(&platform_timer_lock);
610 return stime;
611 }
613 static void platform_time_calibration(void)
614 {
615 u64 count;
616 s_time_t stamp;
617 unsigned long flags;
619 spin_lock_irqsave(&platform_timer_lock, flags);
620 count = plt_stamp64 + ((plt_src.read_counter() - plt_stamp) & plt_mask);
621 stamp = __read_platform_stime(count);
622 stime_platform_stamp = stamp;
623 platform_timer_stamp = count;
624 spin_unlock_irqrestore(&platform_timer_lock, flags);
625 }
627 static void resume_platform_timer(void)
628 {
629 /* Timer source can be reset when backing from S3 to S0 */
630 if ( plt_src.resume )
631 plt_src.resume(&plt_src);
633 plt_stamp64 = platform_timer_stamp;
634 plt_stamp = plt_src.read_counter();
635 }
637 static void init_platform_timer(void)
638 {
639 static struct platform_timesource * const plt_timers[] = {
640 &plt_cyclone, &plt_hpet, &plt_pmtimer, &plt_pit
641 };
643 struct platform_timesource *pts = NULL;
644 int i, rc = -1;
646 if ( opt_clocksource[0] != '\0' )
647 {
648 for ( i = 0; i < ARRAY_SIZE(plt_timers); i++ )
649 {
650 pts = plt_timers[i];
651 if ( !strcmp(opt_clocksource, pts->id) )
652 {
653 rc = pts->init(pts);
654 break;
655 }
656 }
658 if ( rc <= 0 )
659 printk("WARNING: %s clocksource '%s'.\n",
660 (rc == 0) ? "Could not initialise" : "Unrecognised",
661 opt_clocksource);
662 }
664 if ( rc <= 0 )
665 {
666 for ( i = 0; i < ARRAY_SIZE(plt_timers); i++ )
667 {
668 pts = plt_timers[i];
669 if ( (rc = pts->init(pts)) > 0 )
670 break;
671 }
672 }
674 BUG_ON(rc <= 0);
676 plt_mask = (u64)~0ull >> (64 - pts->counter_bits);
678 set_time_scale(&plt_scale, pts->frequency);
680 plt_overflow_period = scale_delta(
681 1ull << (pts->counter_bits-1), &plt_scale);
682 init_timer(&plt_overflow_timer, plt_overflow, NULL, 0);
683 plt_src = *pts;
684 plt_overflow(NULL);
686 platform_timer_stamp = plt_stamp64;
687 stime_platform_stamp = NOW();
689 printk("Platform timer is %s %s\n",
690 freq_string(pts->frequency), pts->name);
691 }
693 void cstate_restore_tsc(void)
694 {
695 struct cpu_time *t = &this_cpu(cpu_time);
696 struct time_scale sys_to_tsc = scale_reciprocal(t->tsc_scale);
697 s_time_t stime_delta;
698 u64 tsc_delta;
700 if ( boot_cpu_has(X86_FEATURE_NOSTOP_TSC) )
701 return;
703 stime_delta = read_platform_stime() - t->stime_master_stamp;
704 if ( stime_delta < 0 )
705 stime_delta = 0;
707 tsc_delta = scale_delta(stime_delta, &sys_to_tsc);
709 wrmsrl(MSR_IA32_TSC, t->local_tsc_stamp + tsc_delta);
710 }
712 /***************************************************************************
713 * CMOS Timer functions
714 ***************************************************************************/
716 /* Converts Gregorian date to seconds since 1970-01-01 00:00:00.
717 * Assumes input in normal date format, i.e. 1980-12-31 23:59:59
718 * => year=1980, mon=12, day=31, hour=23, min=59, sec=59.
719 *
720 * [For the Julian calendar (which was used in Russia before 1917,
721 * Britain & colonies before 1752, anywhere else before 1582,
722 * and is still in use by some communities) leave out the
723 * -year/100+year/400 terms, and add 10.]
724 *
725 * This algorithm was first published by Gauss (I think).
726 *
727 * WARNING: this function will overflow on 2106-02-07 06:28:16 on
728 * machines were long is 32-bit! (However, as time_t is signed, we
729 * will already get problems at other places on 2038-01-19 03:14:08)
730 */
731 unsigned long
732 mktime (unsigned int year, unsigned int mon,
733 unsigned int day, unsigned int hour,
734 unsigned int min, unsigned int sec)
735 {
736 /* 1..12 -> 11,12,1..10: put Feb last since it has a leap day. */
737 if ( 0 >= (int) (mon -= 2) )
738 {
739 mon += 12;
740 year -= 1;
741 }
743 return ((((unsigned long)(year/4 - year/100 + year/400 + 367*mon/12 + day)+
744 year*365 - 719499
745 )*24 + hour /* now have hours */
746 )*60 + min /* now have minutes */
747 )*60 + sec; /* finally seconds */
748 }
750 static unsigned long __get_cmos_time(void)
751 {
752 unsigned int year, mon, day, hour, min, sec;
754 sec = CMOS_READ(RTC_SECONDS);
755 min = CMOS_READ(RTC_MINUTES);
756 hour = CMOS_READ(RTC_HOURS);
757 day = CMOS_READ(RTC_DAY_OF_MONTH);
758 mon = CMOS_READ(RTC_MONTH);
759 year = CMOS_READ(RTC_YEAR);
761 if ( !(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD )
762 {
763 BCD_TO_BIN(sec);
764 BCD_TO_BIN(min);
765 BCD_TO_BIN(hour);
766 BCD_TO_BIN(day);
767 BCD_TO_BIN(mon);
768 BCD_TO_BIN(year);
769 }
771 if ( (year += 1900) < 1970 )
772 year += 100;
774 return mktime(year, mon, day, hour, min, sec);
775 }
777 static unsigned long get_cmos_time(void)
778 {
779 unsigned long res, flags;
780 int i;
782 spin_lock_irqsave(&rtc_lock, flags);
784 /* read RTC exactly on falling edge of update flag */
785 for ( i = 0 ; i < 1000000 ; i++ ) /* may take up to 1 second... */
786 if ( (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) )
787 break;
788 for ( i = 0 ; i < 1000000 ; i++ ) /* must try at least 2.228 ms */
789 if ( !(CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) )
790 break;
792 res = __get_cmos_time();
794 spin_unlock_irqrestore(&rtc_lock, flags);
795 return res;
796 }
798 /***************************************************************************
799 * System Time
800 ***************************************************************************/
802 s_time_t get_s_time(void)
803 {
804 struct cpu_time *t = &this_cpu(cpu_time);
805 u64 tsc, delta;
806 s_time_t now;
808 rdtscll(tsc);
809 delta = tsc - t->local_tsc_stamp;
810 now = t->stime_local_stamp + scale_delta(delta, &t->tsc_scale);
812 return now;
813 }
815 static inline void version_update_begin(u32 *version)
816 {
817 /* Explicitly OR with 1 just in case version number gets out of sync. */
818 *version = (*version + 1) | 1;
819 wmb();
820 }
822 static inline void version_update_end(u32 *version)
823 {
824 wmb();
825 (*version)++;
826 }
828 void update_vcpu_system_time(struct vcpu *v)
829 {
830 struct cpu_time *t;
831 struct vcpu_time_info *u;
833 if ( v->vcpu_info == NULL )
834 return;
836 t = &this_cpu(cpu_time);
837 u = &vcpu_info(v, time);
839 if ( u->tsc_timestamp == t->local_tsc_stamp )
840 return;
842 version_update_begin(&u->version);
844 u->tsc_timestamp = t->local_tsc_stamp;
845 u->system_time = t->stime_local_stamp;
846 u->tsc_to_system_mul = t->tsc_scale.mul_frac;
847 u->tsc_shift = (s8)t->tsc_scale.shift;
849 version_update_end(&u->version);
850 }
852 void update_domain_wallclock_time(struct domain *d)
853 {
854 spin_lock(&wc_lock);
855 version_update_begin(&shared_info(d, wc_version));
856 shared_info(d, wc_sec) = wc_sec + d->time_offset_seconds;
857 shared_info(d, wc_nsec) = wc_nsec;
858 version_update_end(&shared_info(d, wc_version));
859 spin_unlock(&wc_lock);
860 }
862 void domain_set_time_offset(struct domain *d, int32_t time_offset_seconds)
863 {
864 d->time_offset_seconds = time_offset_seconds;
865 if ( is_hvm_domain(d) )
866 rtc_update_clock(d);
867 }
869 int cpu_frequency_change(u64 freq)
870 {
871 struct cpu_time *t = &this_cpu(cpu_time);
872 u64 curr_tsc;
874 /* Sanity check: CPU frequency allegedly dropping below 1MHz? */
875 if ( freq < 1000000u )
876 {
877 gdprintk(XENLOG_WARNING, "Rejecting CPU frequency change "
878 "to %"PRIu64" Hz.\n", freq);
879 return -EINVAL;
880 }
882 local_irq_disable();
883 /* Platform time /first/, as we may be delayed by platform_timer_lock. */
884 t->stime_master_stamp = read_platform_stime();
885 /* TSC-extrapolated time may be bogus after frequency change. */
886 /*t->stime_local_stamp = get_s_time();*/
887 t->stime_local_stamp = t->stime_master_stamp;
888 rdtscll(curr_tsc);
889 t->local_tsc_stamp = curr_tsc;
890 set_time_scale(&t->tsc_scale, freq);
891 local_irq_enable();
893 update_vcpu_system_time(current);
895 /* A full epoch should pass before we check for deviation. */
896 if ( smp_processor_id() == 0 )
897 {
898 set_timer(&calibration_timer, NOW() + EPOCH);
899 platform_time_calibration();
900 }
902 return 0;
903 }
905 /* Set clock to <secs,usecs> after 00:00:00 UTC, 1 January, 1970. */
906 void do_settime(unsigned long secs, unsigned long nsecs, u64 system_time_base)
907 {
908 u64 x;
909 u32 y, _wc_sec, _wc_nsec;
910 struct domain *d;
912 x = (secs * 1000000000ULL) + (u64)nsecs - system_time_base;
913 y = do_div(x, 1000000000);
915 spin_lock(&wc_lock);
916 wc_sec = _wc_sec = (u32)x;
917 wc_nsec = _wc_nsec = (u32)y;
918 spin_unlock(&wc_lock);
920 rcu_read_lock(&domlist_read_lock);
921 for_each_domain ( d )
922 update_domain_wallclock_time(d);
923 rcu_read_unlock(&domlist_read_lock);
924 }
926 /* Per-CPU communication between rendezvous IRQ and softirq handler. */
927 struct cpu_calibration {
928 u64 local_tsc_stamp;
929 s_time_t stime_local_stamp;
930 s_time_t stime_master_stamp;
931 };
932 static DEFINE_PER_CPU(struct cpu_calibration, cpu_calibration);
934 /* Softirq handler for per-CPU time calibration. */
935 static void local_time_calibration(void)
936 {
937 struct cpu_time *t = &this_cpu(cpu_time);
938 struct cpu_calibration *c = &this_cpu(cpu_calibration);
940 /*
941 * System timestamps, extrapolated from local and master oscillators,
942 * taken during this calibration and the previous calibration.
943 */
944 s_time_t prev_local_stime, curr_local_stime;
945 s_time_t prev_master_stime, curr_master_stime;
947 /* TSC timestamps taken during this calibration and prev calibration. */
948 u64 prev_tsc, curr_tsc;
950 /*
951 * System time and TSC ticks elapsed during the previous calibration
952 * 'epoch'. These values are down-shifted to fit in 32 bits.
953 */
954 u64 stime_elapsed64, tsc_elapsed64;
955 u32 stime_elapsed32, tsc_elapsed32;
957 /* The accumulated error in the local estimate. */
958 u64 local_stime_err;
960 /* Error correction to slow down a fast local clock. */
961 u32 error_factor = 0;
963 /* Calculated TSC shift to ensure 32-bit scale multiplier. */
964 int tsc_shift = 0;
966 /* The overall calibration scale multiplier. */
967 u32 calibration_mul_frac;
969 if ( opt_consistent_tscs )
970 {
971 /* Atomically read cpu_calibration struct and write cpu_time struct. */
972 local_irq_disable();
973 t->local_tsc_stamp = c->local_tsc_stamp;
974 t->stime_local_stamp = c->stime_master_stamp;
975 t->stime_master_stamp = c->stime_master_stamp;
976 local_irq_enable();
977 update_vcpu_system_time(current);
978 goto out;
979 }
981 prev_tsc = t->local_tsc_stamp;
982 prev_local_stime = t->stime_local_stamp;
983 prev_master_stime = t->stime_master_stamp;
985 /* Disabling IRQs ensures we atomically read cpu_calibration struct. */
986 local_irq_disable();
987 curr_tsc = c->local_tsc_stamp;
988 curr_local_stime = c->stime_local_stamp;
989 curr_master_stime = c->stime_master_stamp;
990 local_irq_enable();
992 #if 0
993 printk("PRE%d: tsc=%"PRIu64" stime=%"PRIu64" master=%"PRIu64"\n",
994 smp_processor_id(), prev_tsc, prev_local_stime, prev_master_stime);
995 printk("CUR%d: tsc=%"PRIu64" stime=%"PRIu64" master=%"PRIu64
996 " -> %"PRId64"\n",
997 smp_processor_id(), curr_tsc, curr_local_stime, curr_master_stime,
998 curr_master_stime - curr_local_stime);
999 #endif
1001 /* Local time warps forward if it lags behind master time. */
1002 if ( curr_local_stime < curr_master_stime )
1003 curr_local_stime = curr_master_stime;
1005 stime_elapsed64 = curr_master_stime - prev_master_stime;
1006 tsc_elapsed64 = curr_tsc - prev_tsc;
1008 /*
1009 * Weirdness can happen if we lose sync with the platform timer.
1010 * We could be smarter here: resync platform timer with local timer?
1011 */
1012 if ( ((s64)stime_elapsed64 < (EPOCH / 2)) )
1013 goto out;
1015 /*
1016 * Calculate error-correction factor. This only slows down a fast local
1017 * clock (slow clocks are warped forwards). The scale factor is clamped
1018 * to >= 0.5.
1019 */
1020 if ( curr_local_stime != curr_master_stime )
1022 local_stime_err = curr_local_stime - curr_master_stime;
1023 if ( local_stime_err > EPOCH )
1024 local_stime_err = EPOCH;
1025 error_factor = div_frac(EPOCH, EPOCH + (u32)local_stime_err);
1028 /*
1029 * We require 0 < stime_elapsed < 2^31.
1030 * This allows us to binary shift a 32-bit tsc_elapsed such that:
1031 * stime_elapsed < tsc_elapsed <= 2*stime_elapsed
1032 */
1033 while ( ((u32)stime_elapsed64 != stime_elapsed64) ||
1034 ((s32)stime_elapsed64 < 0) )
1036 stime_elapsed64 >>= 1;
1037 tsc_elapsed64 >>= 1;
1040 /* stime_master_diff now fits in a 32-bit word. */
1041 stime_elapsed32 = (u32)stime_elapsed64;
1043 /* tsc_elapsed <= 2*stime_elapsed */
1044 while ( tsc_elapsed64 > (stime_elapsed32 * 2) )
1046 tsc_elapsed64 >>= 1;
1047 tsc_shift--;
1050 /* Local difference must now fit in 32 bits. */
1051 ASSERT((u32)tsc_elapsed64 == tsc_elapsed64);
1052 tsc_elapsed32 = (u32)tsc_elapsed64;
1054 /* tsc_elapsed > stime_elapsed */
1055 ASSERT(tsc_elapsed32 != 0);
1056 while ( tsc_elapsed32 <= stime_elapsed32 )
1058 tsc_elapsed32 <<= 1;
1059 tsc_shift++;
1062 calibration_mul_frac = div_frac(stime_elapsed32, tsc_elapsed32);
1063 if ( error_factor != 0 )
1064 calibration_mul_frac = mul_frac(calibration_mul_frac, error_factor);
1066 #if 0
1067 printk("---%d: %08x %08x %d\n", smp_processor_id(),
1068 error_factor, calibration_mul_frac, tsc_shift);
1069 #endif
1071 /* Record new timestamp information, atomically w.r.t. interrupts. */
1072 local_irq_disable();
1073 t->tsc_scale.mul_frac = calibration_mul_frac;
1074 t->tsc_scale.shift = tsc_shift;
1075 t->local_tsc_stamp = curr_tsc;
1076 t->stime_local_stamp = curr_local_stime;
1077 t->stime_master_stamp = curr_master_stime;
1078 local_irq_enable();
1080 update_vcpu_system_time(current);
1082 out:
1083 if ( smp_processor_id() == 0 )
1085 set_timer(&calibration_timer, NOW() + EPOCH);
1086 platform_time_calibration();
1090 /*
1091 * Rendezvous for all CPUs in IRQ context.
1092 * Master CPU snapshots the platform timer.
1093 * All CPUS snapshot their local TSC and extrapolation of system time.
1094 */
1095 struct calibration_rendezvous {
1096 cpumask_t cpu_calibration_map;
1097 atomic_t semaphore;
1098 s_time_t master_stime;
1099 u64 master_tsc_stamp;
1100 };
1102 static void time_calibration_tsc_rendezvous(void *_r)
1104 int i;
1105 struct cpu_calibration *c = &this_cpu(cpu_calibration);
1106 struct calibration_rendezvous *r = _r;
1107 unsigned int total_cpus = cpus_weight(r->cpu_calibration_map);
1109 /* Loop to get rid of cache effects on TSC skew. */
1110 for ( i = 4; i >= 0; i-- )
1112 if ( smp_processor_id() == 0 )
1114 while ( atomic_read(&r->semaphore) != (total_cpus - 1) )
1115 mb();
1117 if ( r->master_stime == 0 )
1119 r->master_stime = read_platform_stime();
1120 rdtscll(r->master_tsc_stamp);
1122 atomic_inc(&r->semaphore);
1124 if ( i == 0 )
1125 write_tsc((u32)r->master_tsc_stamp,
1126 (u32)(r->master_tsc_stamp >> 32));
1128 while ( atomic_read(&r->semaphore) != (2*total_cpus - 1) )
1129 mb();
1130 atomic_set(&r->semaphore, 0);
1132 else
1134 atomic_inc(&r->semaphore);
1135 while ( atomic_read(&r->semaphore) < total_cpus )
1136 mb();
1138 if ( i == 0 )
1139 write_tsc((u32)r->master_tsc_stamp,
1140 (u32)(r->master_tsc_stamp >> 32));
1142 atomic_inc(&r->semaphore);
1143 while ( atomic_read(&r->semaphore) > total_cpus )
1144 mb();
1148 rdtscll(c->local_tsc_stamp);
1149 c->stime_local_stamp = get_s_time();
1150 c->stime_master_stamp = r->master_stime;
1152 raise_softirq(TIME_CALIBRATE_SOFTIRQ);
1155 static void time_calibration_std_rendezvous(void *_r)
1157 struct cpu_calibration *c = &this_cpu(cpu_calibration);
1158 struct calibration_rendezvous *r = _r;
1159 unsigned int total_cpus = cpus_weight(r->cpu_calibration_map);
1161 if ( smp_processor_id() == 0 )
1163 while ( atomic_read(&r->semaphore) != (total_cpus - 1) )
1164 cpu_relax();
1165 r->master_stime = read_platform_stime();
1166 mb(); /* write r->master_stime /then/ signal */
1167 atomic_inc(&r->semaphore);
1169 else
1171 atomic_inc(&r->semaphore);
1172 while ( atomic_read(&r->semaphore) != total_cpus )
1173 cpu_relax();
1174 mb(); /* receive signal /then/ read r->master_stime */
1177 rdtscll(c->local_tsc_stamp);
1178 c->stime_local_stamp = get_s_time();
1179 c->stime_master_stamp = r->master_stime;
1181 raise_softirq(TIME_CALIBRATE_SOFTIRQ);
1184 static void time_calibration(void *unused)
1186 struct calibration_rendezvous r = {
1187 .cpu_calibration_map = cpu_online_map,
1188 .semaphore = ATOMIC_INIT(0)
1189 };
1191 /* @wait=1 because we must wait for all cpus before freeing @r. */
1192 on_selected_cpus(r.cpu_calibration_map,
1193 opt_consistent_tscs
1194 ? time_calibration_tsc_rendezvous
1195 : time_calibration_std_rendezvous,
1196 &r, 0, 1);
1199 void init_percpu_time(void)
1201 struct cpu_time *t = &this_cpu(cpu_time);
1202 unsigned long flags;
1203 s_time_t now;
1205 local_irq_save(flags);
1206 rdtscll(t->local_tsc_stamp);
1207 now = read_platform_stime();
1208 local_irq_restore(flags);
1210 t->stime_master_stamp = now;
1211 t->stime_local_stamp = now;
1213 if ( smp_processor_id() == 0 )
1215 init_timer(&calibration_timer, time_calibration, NULL, 0);
1216 set_timer(&calibration_timer, NOW() + EPOCH);
1220 /* Late init function (after all CPUs are booted). */
1221 int __init init_xen_time(void)
1223 if ( !boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
1224 opt_consistent_tscs = 0;
1226 /* If we have constant TSCs then scale factor can be shared. */
1227 if ( opt_consistent_tscs )
1229 int cpu;
1230 for_each_cpu ( cpu )
1231 per_cpu(cpu_time, cpu).tsc_scale = per_cpu(cpu_time, 0).tsc_scale;
1234 open_softirq(TIME_CALIBRATE_SOFTIRQ, local_time_calibration);
1236 /* System time (get_s_time()) starts ticking from now. */
1237 rdtscll(this_cpu(cpu_time).local_tsc_stamp);
1239 /* NB. get_cmos_time() can take over one second to execute. */
1240 do_settime(get_cmos_time(), 0, NOW());
1242 init_platform_timer();
1244 init_percpu_time();
1246 return 0;
1250 /* Early init function. */
1251 void __init early_time_init(void)
1253 u64 tmp = init_pit_and_calibrate_tsc();
1255 set_time_scale(&this_cpu(cpu_time).tsc_scale, tmp);
1257 do_div(tmp, 1000);
1258 cpu_khz = (unsigned long)tmp;
1259 printk("Detected %lu.%03lu MHz processor.\n",
1260 cpu_khz / 1000, cpu_khz % 1000);
1262 setup_irq(0, &irq0);
1265 /* keep pit enabled for pit_broadcast working while cpuidle enabled */
1266 static int disable_pit_irq(void)
1268 if ( using_pit || !cpu_has_apic )
1269 return 0;
1271 /*
1272 * If we do not rely on PIT CH0 then we can use HPET for one-shot timer
1273 * emulation when entering deep C states.
1274 * XXX dom0 may rely on RTC interrupt delivery, so only enable
1275 * hpet_broadcast if FSB mode available or if force_hpet_broadcast.
1276 */
1277 if ( xen_cpuidle )
1279 hpet_broadcast_init();
1280 if ( !hpet_broadcast_is_available() )
1282 if ( xen_cpuidle == -1 )
1284 xen_cpuidle = 0;
1285 printk("CPUIDLE: disabled due to no HPET. "
1286 "Force enable with 'cpuidle'.\n");
1288 else
1290 printk("HPET broadcast init failed, turn to PIT broadcast.\n");
1291 return 0;
1296 /* Disable PIT CH0 timer interrupt. */
1297 outb_p(0x30, PIT_MODE);
1298 outb_p(0, PIT_CH0);
1299 outb_p(0, PIT_CH0);
1301 return 0;
1303 __initcall(disable_pit_irq);
1305 void pit_broadcast_enter(void)
1307 cpu_set(smp_processor_id(), pit_broadcast_mask);
1310 void pit_broadcast_exit(void)
1312 int cpu = smp_processor_id();
1314 if ( cpu_test_and_clear(cpu, pit_broadcast_mask) )
1315 reprogram_timer(per_cpu(timer_deadline, cpu));
1318 int pit_broadcast_is_available(void)
1320 return xen_cpuidle;
1323 void send_timer_event(struct vcpu *v)
1325 send_guest_vcpu_virq(v, VIRQ_TIMER);
1328 /* Return secs after 00:00:00 localtime, 1 January, 1970. */
1329 unsigned long get_localtime(struct domain *d)
1331 return wc_sec + (wc_nsec + NOW()) / 1000000000ULL
1332 + d->time_offset_seconds;
1335 /* "cmos_utc_offset" is the difference between UTC time and CMOS time. */
1336 static long cmos_utc_offset; /* in seconds */
1338 int time_suspend(void)
1340 if ( smp_processor_id() == 0 )
1342 cmos_utc_offset = -get_cmos_time();
1343 cmos_utc_offset += (wc_sec + (wc_nsec + NOW()) / 1000000000ULL);
1344 kill_timer(&calibration_timer);
1346 /* Sync platform timer stamps. */
1347 platform_time_calibration();
1350 /* Better to cancel calibration timer for accuracy. */
1351 clear_bit(TIME_CALIBRATE_SOFTIRQ, &softirq_pending(smp_processor_id()));
1353 return 0;
1356 int time_resume(void)
1358 /*u64 tmp = */init_pit_and_calibrate_tsc();
1360 /* Disable this while calibrate_tsc_ap() also is skipped. */
1361 /*set_time_scale(&this_cpu(cpu_time).tsc_scale, tmp);*/
1363 resume_platform_timer();
1365 disable_pit_irq();
1367 init_percpu_time();
1369 do_settime(get_cmos_time() + cmos_utc_offset, 0, NOW());
1371 update_vcpu_system_time(current);
1373 return 0;
1376 int dom0_pit_access(struct ioreq *ioreq)
1378 /* Is Xen using Channel 2? Then disallow direct dom0 access. */
1379 if ( using_pit )
1380 return 0;
1382 switch ( ioreq->addr )
1384 case PIT_CH2:
1385 if ( ioreq->dir == IOREQ_READ )
1386 ioreq->data = inb(PIT_CH2);
1387 else
1388 outb(ioreq->data, PIT_CH2);
1389 return 1;
1391 case PIT_MODE:
1392 if ( ioreq->dir == IOREQ_READ )
1393 return 0; /* urk! */
1394 switch ( ioreq->data & 0xc0 )
1396 case 0xc0: /* Read Back */
1397 if ( ioreq->data & 0x08 ) /* Select Channel 2? */
1398 outb(ioreq->data & 0xf8, PIT_MODE);
1399 if ( !(ioreq->data & 0x06) ) /* Select Channel 0/1? */
1400 return 1; /* no - we're done */
1401 /* Filter Channel 2 and reserved bit 0. */
1402 ioreq->data &= ~0x09;
1403 return 0; /* emulate ch0/1 readback */
1404 case 0x80: /* Select Counter 2 */
1405 outb(ioreq->data, PIT_MODE);
1406 return 1;
1409 case 0x61:
1410 if ( ioreq->dir == IOREQ_READ )
1411 ioreq->data = inb(0x61);
1412 else
1413 outb((inb(0x61) & ~3) | (ioreq->data & 3), 0x61);
1414 return 1;
1417 return 0;
1420 struct tm wallclock_time(void)
1422 uint64_t seconds;
1424 if ( !wc_sec )
1425 return (struct tm) { 0 };
1427 seconds = NOW() + (wc_sec * 1000000000ull) + wc_nsec;
1428 do_div(seconds, 1000000000);
1429 return gmtime(seconds);
1432 /*
1433 * Local variables:
1434 * mode: C
1435 * c-set-style: "BSD"
1436 * c-basic-offset: 4
1437 * tab-width: 4
1438 * indent-tabs-mode: nil
1439 * End:
1440 */