ia64/xen-unstable

view xen/arch/x86/time.c @ 18051:79517ed2a108

x86: PIT broadcast to fix local APIC timer stop issue for Deep C state

Local APIC timer may stop at deep C state (C3/C4...) entry. Initial
HPET broadcast working in legacy replacing mode, broke RTC intr, so
was bypassed. This patch add the logic that use platform timer (PIT)
to reenable local APIC timer at C state entry/exit.

Currently, only keep PIT enabled with 100Hz freq. The next step is
trying to dynamically enable/disable PIT while needed, and give it
lower freq.

Signed-off-by: Yu Ke <ke.yu@intel.com>
Signed-off-by: Tian Kevin <kevin.tian@intel.com>
Signed-off-by: Wei Gang <gang.wei@intel.com>
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Mon Jul 14 10:43:32 2008 +0100 (2008-07-14)
parents f2148e532c81
children 1c22d42043bb
line source
1 /******************************************************************************
2 * arch/x86/time.c
3 *
4 * Per-CPU time calibration and management.
5 *
6 * Copyright (c) 2002-2005, K A Fraser
7 *
8 * Portions from Linux are:
9 * Copyright (c) 1991, 1992, 1995 Linus Torvalds
10 */
12 #include <xen/config.h>
13 #include <xen/errno.h>
14 #include <xen/event.h>
15 #include <xen/sched.h>
16 #include <xen/lib.h>
17 #include <xen/config.h>
18 #include <xen/init.h>
19 #include <xen/time.h>
20 #include <xen/timer.h>
21 #include <xen/smp.h>
22 #include <xen/irq.h>
23 #include <xen/softirq.h>
24 #include <asm/io.h>
25 #include <asm/msr.h>
26 #include <asm/mpspec.h>
27 #include <asm/processor.h>
28 #include <asm/fixmap.h>
29 #include <asm/mc146818rtc.h>
30 #include <asm/div64.h>
31 #include <asm/hpet.h>
32 #include <io_ports.h>
34 /* opt_clocksource: Force clocksource to one of: pit, hpet, cyclone, acpi. */
35 static char opt_clocksource[10];
36 string_param("clocksource", opt_clocksource);
38 #define EPOCH MILLISECS(1000)
40 unsigned long cpu_khz; /* CPU clock frequency in kHz. */
41 DEFINE_SPINLOCK(rtc_lock);
42 unsigned long pit0_ticks;
43 static u32 wc_sec, wc_nsec; /* UTC time at last 'time update'. */
44 static DEFINE_SPINLOCK(wc_lock);
46 struct time_scale {
47 int shift;
48 u32 mul_frac;
49 };
51 struct cpu_time {
52 u64 local_tsc_stamp;
53 u64 cstate_tsc_stamp;
54 s_time_t stime_local_stamp;
55 s_time_t stime_master_stamp;
56 struct time_scale tsc_scale;
57 u32 cstate_plt_count_stamp;
58 struct timer calibration_timer;
59 };
61 struct platform_timesource {
62 char *name;
63 u64 frequency;
64 u32 (*read_counter)(void);
65 int counter_bits;
66 };
68 static DEFINE_PER_CPU(struct cpu_time, cpu_time);
70 /* TSC is invariant on C state entry? */
71 static bool_t tsc_invariant;
73 /*
74 * We simulate a 32-bit platform timer from the 16-bit PIT ch2 counter.
75 * Otherwise overflow happens too quickly (~50ms) for us to guarantee that
76 * softirq handling will happen in time.
77 *
78 * The pit_lock protects the 16- and 32-bit stamp fields as well as the
79 */
80 static DEFINE_SPINLOCK(pit_lock);
81 static u16 pit_stamp16;
82 static u32 pit_stamp32;
83 static int using_pit;
85 /*
86 * 32-bit division of integer dividend and integer divisor yielding
87 * 32-bit fractional quotient.
88 */
89 static inline u32 div_frac(u32 dividend, u32 divisor)
90 {
91 u32 quotient, remainder;
92 ASSERT(dividend < divisor);
93 asm (
94 "divl %4"
95 : "=a" (quotient), "=d" (remainder)
96 : "0" (0), "1" (dividend), "r" (divisor) );
97 return quotient;
98 }
100 /*
101 * 32-bit multiplication of multiplicand and fractional multiplier
102 * yielding 32-bit product (radix point at same position as in multiplicand).
103 */
104 static inline u32 mul_frac(u32 multiplicand, u32 multiplier)
105 {
106 u32 product_int, product_frac;
107 asm (
108 "mul %3"
109 : "=a" (product_frac), "=d" (product_int)
110 : "0" (multiplicand), "r" (multiplier) );
111 return product_int;
112 }
114 /*
115 * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
116 * yielding a 64-bit result.
117 */
118 static inline u64 scale_delta(u64 delta, struct time_scale *scale)
119 {
120 u64 product;
121 #ifdef CONFIG_X86_32
122 u32 tmp1, tmp2;
123 #endif
125 if ( scale->shift < 0 )
126 delta >>= -scale->shift;
127 else
128 delta <<= scale->shift;
130 #ifdef CONFIG_X86_32
131 asm (
132 "mul %5 ; "
133 "mov %4,%%eax ; "
134 "mov %%edx,%4 ; "
135 "mul %5 ; "
136 "xor %5,%5 ; "
137 "add %4,%%eax ; "
138 "adc %5,%%edx ; "
139 : "=A" (product), "=r" (tmp1), "=r" (tmp2)
140 : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (scale->mul_frac) );
141 #else
142 asm (
143 "mul %%rdx ; shrd $32,%%rdx,%%rax"
144 : "=a" (product) : "0" (delta), "d" ((u64)scale->mul_frac) );
145 #endif
147 return product;
148 }
150 /*
151 * cpu_mask that denotes the CPUs that needs timer interrupt coming in as
152 * IPIs in place of local APIC timers
153 */
154 extern int xen_cpuidle;
155 static cpumask_t pit_broadcast_mask;
157 static void smp_send_timer_broadcast_ipi(void)
158 {
159 int cpu = smp_processor_id();
160 cpumask_t mask;
162 cpus_and(mask, cpu_online_map, pit_broadcast_mask);
164 if ( cpu_isset(cpu, mask) )
165 {
166 cpu_clear(cpu, mask);
167 raise_softirq(TIMER_SOFTIRQ);
168 }
170 if ( !cpus_empty(mask) )
171 {
172 cpumask_raise_softirq(mask, TIMER_SOFTIRQ);
173 }
174 }
176 static void timer_interrupt(int irq, void *dev_id, struct cpu_user_regs *regs)
177 {
178 ASSERT(local_irq_is_enabled());
180 if ( hpet_legacy_irq_tick() )
181 return;
183 /* Only for start-of-day interruopt tests in io_apic.c. */
184 (*(volatile unsigned long *)&pit0_ticks)++;
186 /* Rough hack to allow accurate timers to sort-of-work with no APIC. */
187 if ( !cpu_has_apic )
188 raise_softirq(TIMER_SOFTIRQ);
190 if ( xen_cpuidle )
191 smp_send_timer_broadcast_ipi();
193 /* Emulate a 32-bit PIT counter. */
194 if ( using_pit )
195 {
196 u16 count;
198 spin_lock_irq(&pit_lock);
200 outb(0x80, PIT_MODE);
201 count = inb(PIT_CH2);
202 count |= inb(PIT_CH2) << 8;
204 pit_stamp32 += (u16)(pit_stamp16 - count);
205 pit_stamp16 = count;
207 spin_unlock_irq(&pit_lock);
208 }
209 }
211 static struct irqaction irq0 = { timer_interrupt, "timer", NULL };
213 /* ------ Calibrate the TSC -------
214 * Return processor ticks per second / CALIBRATE_FRAC.
215 */
217 #define CLOCK_TICK_RATE 1193180 /* system crystal frequency (Hz) */
218 #define CALIBRATE_FRAC 20 /* calibrate over 50ms */
219 #define CALIBRATE_LATCH ((CLOCK_TICK_RATE+(CALIBRATE_FRAC/2))/CALIBRATE_FRAC)
221 static u64 init_pit_and_calibrate_tsc(void)
222 {
223 u64 start, end;
224 unsigned long count;
226 /* Set PIT channel 0 to HZ Hz. */
227 #define LATCH (((CLOCK_TICK_RATE)+(HZ/2))/HZ)
228 outb_p(0x34, PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */
229 outb_p(LATCH & 0xff, PIT_CH0); /* LSB */
230 outb(LATCH >> 8, PIT_CH0); /* MSB */
232 /* Set the Gate high, disable speaker */
233 outb((inb(0x61) & ~0x02) | 0x01, 0x61);
235 /*
236 * Now let's take care of CTC channel 2
237 *
238 * Set the Gate high, program CTC channel 2 for mode 0, (interrupt on
239 * terminal count mode), binary count, load 5 * LATCH count, (LSB and MSB)
240 * to begin countdown.
241 */
242 outb(0xb0, PIT_MODE); /* binary, mode 0, LSB/MSB, Ch 2 */
243 outb(CALIBRATE_LATCH & 0xff, PIT_CH2); /* LSB of count */
244 outb(CALIBRATE_LATCH >> 8, PIT_CH2); /* MSB of count */
246 rdtscll(start);
247 for ( count = 0; (inb(0x61) & 0x20) == 0; count++ )
248 continue;
249 rdtscll(end);
251 /* Error if the CTC doesn't behave itself. */
252 if ( count == 0 )
253 return 0;
255 return ((end - start) * (u64)CALIBRATE_FRAC);
256 }
258 static void set_time_scale(struct time_scale *ts, u64 ticks_per_sec)
259 {
260 u64 tps64 = ticks_per_sec;
261 u32 tps32;
262 int shift = 0;
264 ASSERT(tps64 != 0);
266 while ( tps64 > (MILLISECS(1000)*2) )
267 {
268 tps64 >>= 1;
269 shift--;
270 }
272 tps32 = (u32)tps64;
273 while ( tps32 <= (u32)MILLISECS(1000) )
274 {
275 tps32 <<= 1;
276 shift++;
277 }
279 ts->mul_frac = div_frac(MILLISECS(1000), tps32);
280 ts->shift = shift;
281 }
283 static atomic_t tsc_calibrate_gang = ATOMIC_INIT(0);
284 static unsigned int tsc_calibrate_status = 0;
286 void calibrate_tsc_bp(void)
287 {
288 while ( atomic_read(&tsc_calibrate_gang) != (num_booting_cpus() - 1) )
289 mb();
291 outb(CALIBRATE_LATCH & 0xff, PIT_CH2);
292 outb(CALIBRATE_LATCH >> 8, PIT_CH2);
294 tsc_calibrate_status = 1;
295 wmb();
297 while ( (inb(0x61) & 0x20) == 0 )
298 continue;
300 tsc_calibrate_status = 2;
301 wmb();
303 while ( atomic_read(&tsc_calibrate_gang) != 0 )
304 mb();
305 }
307 void calibrate_tsc_ap(void)
308 {
309 u64 t1, t2, ticks_per_sec;
311 atomic_inc(&tsc_calibrate_gang);
313 while ( tsc_calibrate_status < 1 )
314 mb();
316 rdtscll(t1);
318 while ( tsc_calibrate_status < 2 )
319 mb();
321 rdtscll(t2);
323 ticks_per_sec = (t2 - t1) * (u64)CALIBRATE_FRAC;
324 set_time_scale(&this_cpu(cpu_time).tsc_scale, ticks_per_sec);
326 atomic_dec(&tsc_calibrate_gang);
327 }
329 static char *freq_string(u64 freq)
330 {
331 static char s[20];
332 unsigned int x, y;
333 y = (unsigned int)do_div(freq, 1000000) / 1000;
334 x = (unsigned int)freq;
335 snprintf(s, sizeof(s), "%u.%03uMHz", x, y);
336 return s;
337 }
339 /************************************************************
340 * PLATFORM TIMER 1: PROGRAMMABLE INTERVAL TIMER (LEGACY PIT)
341 */
343 static u32 read_pit_count(void)
344 {
345 u16 count16;
346 u32 count32;
347 unsigned long flags;
349 spin_lock_irqsave(&pit_lock, flags);
351 outb(0x80, PIT_MODE);
352 count16 = inb(PIT_CH2);
353 count16 |= inb(PIT_CH2) << 8;
355 count32 = pit_stamp32 + (u16)(pit_stamp16 - count16);
357 spin_unlock_irqrestore(&pit_lock, flags);
359 return count32;
360 }
362 static void init_pit(struct platform_timesource *pts)
363 {
364 pts->name = "PIT";
365 pts->frequency = CLOCK_TICK_RATE;
366 pts->read_counter = read_pit_count;
367 pts->counter_bits = 32;
368 using_pit = 1;
369 }
371 /************************************************************
372 * PLATFORM TIMER 2: HIGH PRECISION EVENT TIMER (HPET)
373 */
375 static u32 read_hpet_count(void)
376 {
377 return hpet_read32(HPET_COUNTER);
378 }
380 static int init_hpet(struct platform_timesource *pts)
381 {
382 u64 hpet_rate = hpet_setup();
384 if ( hpet_rate == 0 )
385 return 0;
387 pts->name = "HPET";
388 pts->frequency = hpet_rate;
389 pts->read_counter = read_hpet_count;
390 pts->counter_bits = 32;
392 return 1;
393 }
395 /************************************************************
396 * PLATFORM TIMER 3: IBM 'CYCLONE' TIMER
397 */
399 int use_cyclone;
401 /*
402 * Although the counter is read via a 64-bit register, I believe it is actually
403 * a 40-bit counter. Since this will wrap, I read only the low 32 bits and
404 * periodically fold into a 64-bit software counter, just as for PIT and HPET.
405 */
406 #define CYCLONE_CBAR_ADDR 0xFEB00CD0
407 #define CYCLONE_PMCC_OFFSET 0x51A0
408 #define CYCLONE_MPMC_OFFSET 0x51D0
409 #define CYCLONE_MPCS_OFFSET 0x51A8
410 #define CYCLONE_TIMER_FREQ 100000000
412 /* Cyclone MPMC0 register. */
413 static volatile u32 *cyclone_timer;
415 static u32 read_cyclone_count(void)
416 {
417 return *cyclone_timer;
418 }
420 static volatile u32 *map_cyclone_reg(unsigned long regaddr)
421 {
422 unsigned long pageaddr = regaddr & PAGE_MASK;
423 unsigned long offset = regaddr & ~PAGE_MASK;
424 set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
425 return (volatile u32 *)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
426 }
428 static int init_cyclone(struct platform_timesource *pts)
429 {
430 u32 base;
432 if ( !use_cyclone )
433 return 0;
435 /* Find base address. */
436 base = *(map_cyclone_reg(CYCLONE_CBAR_ADDR));
437 if ( base == 0 )
438 {
439 printk(KERN_ERR "Cyclone: Could not find valid CBAR value.\n");
440 return 0;
441 }
443 /* Enable timer and map the counter register. */
444 *(map_cyclone_reg(base + CYCLONE_PMCC_OFFSET)) = 1;
445 *(map_cyclone_reg(base + CYCLONE_MPCS_OFFSET)) = 1;
446 cyclone_timer = map_cyclone_reg(base + CYCLONE_MPMC_OFFSET);
448 pts->name = "IBM Cyclone";
449 pts->frequency = CYCLONE_TIMER_FREQ;
450 pts->read_counter = read_cyclone_count;
451 pts->counter_bits = 32;
453 return 1;
454 }
456 /************************************************************
457 * PLATFORM TIMER 4: ACPI PM TIMER
458 */
460 u32 pmtmr_ioport;
462 /* ACPI PM timer ticks at 3.579545 MHz. */
463 #define ACPI_PM_FREQUENCY 3579545
465 static u32 read_pmtimer_count(void)
466 {
467 return inl(pmtmr_ioport);
468 }
470 static int init_pmtimer(struct platform_timesource *pts)
471 {
472 if ( pmtmr_ioport == 0 )
473 return 0;
475 pts->name = "ACPI PM Timer";
476 pts->frequency = ACPI_PM_FREQUENCY;
477 pts->read_counter = read_pmtimer_count;
478 pts->counter_bits = 24;
480 return 1;
481 }
483 /************************************************************
484 * GENERIC PLATFORM TIMER INFRASTRUCTURE
485 */
487 static struct platform_timesource plt_src; /* details of chosen timesource */
488 static u32 plt_mask; /* hardware-width mask */
489 static u64 plt_overflow_period; /* ns between calls to plt_overflow() */
490 static struct time_scale plt_scale; /* scale: platform counter -> nanosecs */
492 /* Protected by platform_timer_lock. */
493 static DEFINE_SPINLOCK(platform_timer_lock);
494 static s_time_t stime_platform_stamp; /* System time at below platform time */
495 static u64 platform_timer_stamp; /* Platform time at above system time */
496 static u64 plt_stamp64; /* 64-bit platform counter stamp */
497 static u32 plt_stamp; /* hardware-width platform counter stamp */
498 static struct timer plt_overflow_timer;
500 static void plt_overflow(void *unused)
501 {
502 u32 count;
504 spin_lock(&platform_timer_lock);
505 count = plt_src.read_counter();
506 plt_stamp64 += (count - plt_stamp) & plt_mask;
507 plt_stamp = count;
508 spin_unlock(&platform_timer_lock);
510 set_timer(&plt_overflow_timer, NOW() + plt_overflow_period);
511 }
513 static s_time_t __read_platform_stime(u64 platform_time)
514 {
515 u64 diff = platform_time - platform_timer_stamp;
516 ASSERT(spin_is_locked(&platform_timer_lock));
517 return (stime_platform_stamp + scale_delta(diff, &plt_scale));
518 }
520 static s_time_t read_platform_stime(void)
521 {
522 u64 count;
523 s_time_t stime;
525 spin_lock(&platform_timer_lock);
526 count = plt_stamp64 + ((plt_src.read_counter() - plt_stamp) & plt_mask);
527 stime = __read_platform_stime(count);
528 spin_unlock(&platform_timer_lock);
530 return stime;
531 }
533 static void platform_time_calibration(void)
534 {
535 u64 count;
536 s_time_t stamp;
538 spin_lock(&platform_timer_lock);
539 count = plt_stamp64 + ((plt_src.read_counter() - plt_stamp) & plt_mask);
540 stamp = __read_platform_stime(count);
541 stime_platform_stamp = stamp;
542 platform_timer_stamp = count;
543 spin_unlock(&platform_timer_lock);
544 }
546 static void resume_platform_timer(void)
547 {
548 /* No change in platform_stime across suspend/resume. */
549 platform_timer_stamp = plt_stamp64;
550 plt_stamp = plt_src.read_counter();
551 }
553 static void init_platform_timer(void)
554 {
555 struct platform_timesource *pts = &plt_src;
556 int rc = -1;
558 if ( opt_clocksource[0] != '\0' )
559 {
560 if ( !strcmp(opt_clocksource, "pit") )
561 rc = (init_pit(pts), 1);
562 else if ( !strcmp(opt_clocksource, "hpet") )
563 rc = init_hpet(pts);
564 else if ( !strcmp(opt_clocksource, "cyclone") )
565 rc = init_cyclone(pts);
566 else if ( !strcmp(opt_clocksource, "acpi") )
567 rc = init_pmtimer(pts);
569 if ( rc <= 0 )
570 printk("WARNING: %s clocksource '%s'.\n",
571 (rc == 0) ? "Could not initialise" : "Unrecognised",
572 opt_clocksource);
573 }
575 if ( (rc <= 0) &&
576 !init_cyclone(pts) &&
577 !init_hpet(pts) &&
578 !init_pmtimer(pts) )
579 init_pit(pts);
581 plt_mask = (u32)~0u >> (32 - pts->counter_bits);
583 set_time_scale(&plt_scale, pts->frequency);
585 plt_overflow_period = scale_delta(
586 1ull << (pts->counter_bits-1), &plt_scale);
587 init_timer(&plt_overflow_timer, plt_overflow, NULL, 0);
588 plt_overflow(NULL);
590 platform_timer_stamp = plt_stamp64;
592 printk("Platform timer is %s %s\n",
593 freq_string(pts->frequency), pts->name);
594 }
596 void cstate_save_tsc(void)
597 {
598 struct cpu_time *t = &this_cpu(cpu_time);
600 if (!tsc_invariant){
601 t->cstate_plt_count_stamp = plt_src.read_counter();
602 rdtscll(t->cstate_tsc_stamp);
603 }
604 }
606 void cstate_restore_tsc(void)
607 {
608 struct cpu_time *t;
609 u32 plt_count_delta;
610 u64 tsc_delta;
612 if (!tsc_invariant){
613 t = &this_cpu(cpu_time);
615 /* if platform counter overflow happens, interrupt will bring CPU from
616 C state to working state, so the platform counter won't wrap the
617 cstate_plt_count_stamp, and the 32 bit unsigned platform counter
618 is enough for delta calculation
619 */
620 plt_count_delta =
621 (plt_src.read_counter() - t->cstate_plt_count_stamp) & plt_mask;
622 tsc_delta = scale_delta(plt_count_delta, &plt_scale)*cpu_khz/1000000UL;
623 wrmsrl(MSR_IA32_TSC, t->cstate_tsc_stamp + tsc_delta);
624 }
625 }
627 /***************************************************************************
628 * CMOS Timer functions
629 ***************************************************************************/
631 /* Converts Gregorian date to seconds since 1970-01-01 00:00:00.
632 * Assumes input in normal date format, i.e. 1980-12-31 23:59:59
633 * => year=1980, mon=12, day=31, hour=23, min=59, sec=59.
634 *
635 * [For the Julian calendar (which was used in Russia before 1917,
636 * Britain & colonies before 1752, anywhere else before 1582,
637 * and is still in use by some communities) leave out the
638 * -year/100+year/400 terms, and add 10.]
639 *
640 * This algorithm was first published by Gauss (I think).
641 *
642 * WARNING: this function will overflow on 2106-02-07 06:28:16 on
643 * machines were long is 32-bit! (However, as time_t is signed, we
644 * will already get problems at other places on 2038-01-19 03:14:08)
645 */
646 unsigned long
647 mktime (unsigned int year, unsigned int mon,
648 unsigned int day, unsigned int hour,
649 unsigned int min, unsigned int sec)
650 {
651 /* 1..12 -> 11,12,1..10: put Feb last since it has a leap day. */
652 if ( 0 >= (int) (mon -= 2) )
653 {
654 mon += 12;
655 year -= 1;
656 }
658 return ((((unsigned long)(year/4 - year/100 + year/400 + 367*mon/12 + day)+
659 year*365 - 719499
660 )*24 + hour /* now have hours */
661 )*60 + min /* now have minutes */
662 )*60 + sec; /* finally seconds */
663 }
665 static unsigned long __get_cmos_time(void)
666 {
667 unsigned int year, mon, day, hour, min, sec;
669 sec = CMOS_READ(RTC_SECONDS);
670 min = CMOS_READ(RTC_MINUTES);
671 hour = CMOS_READ(RTC_HOURS);
672 day = CMOS_READ(RTC_DAY_OF_MONTH);
673 mon = CMOS_READ(RTC_MONTH);
674 year = CMOS_READ(RTC_YEAR);
676 if ( !(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD )
677 {
678 BCD_TO_BIN(sec);
679 BCD_TO_BIN(min);
680 BCD_TO_BIN(hour);
681 BCD_TO_BIN(day);
682 BCD_TO_BIN(mon);
683 BCD_TO_BIN(year);
684 }
686 if ( (year += 1900) < 1970 )
687 year += 100;
689 return mktime(year, mon, day, hour, min, sec);
690 }
692 static unsigned long get_cmos_time(void)
693 {
694 unsigned long res, flags;
695 int i;
697 spin_lock_irqsave(&rtc_lock, flags);
699 /* read RTC exactly on falling edge of update flag */
700 for ( i = 0 ; i < 1000000 ; i++ ) /* may take up to 1 second... */
701 if ( (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) )
702 break;
703 for ( i = 0 ; i < 1000000 ; i++ ) /* must try at least 2.228 ms */
704 if ( !(CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) )
705 break;
707 res = __get_cmos_time();
709 spin_unlock_irqrestore(&rtc_lock, flags);
710 return res;
711 }
713 /***************************************************************************
714 * System Time
715 ***************************************************************************/
717 s_time_t get_s_time(void)
718 {
719 struct cpu_time *t = &this_cpu(cpu_time);
720 u64 tsc, delta;
721 s_time_t now;
723 rdtscll(tsc);
724 delta = tsc - t->local_tsc_stamp;
725 now = t->stime_local_stamp + scale_delta(delta, &t->tsc_scale);
727 return now;
728 }
730 static inline void version_update_begin(u32 *version)
731 {
732 /* Explicitly OR with 1 just in case version number gets out of sync. */
733 *version = (*version + 1) | 1;
734 wmb();
735 }
737 static inline void version_update_end(u32 *version)
738 {
739 wmb();
740 (*version)++;
741 }
743 void update_vcpu_system_time(struct vcpu *v)
744 {
745 struct cpu_time *t;
746 struct vcpu_time_info *u;
748 if ( v->vcpu_info == NULL )
749 return;
751 t = &this_cpu(cpu_time);
752 u = &vcpu_info(v, time);
754 if ( u->tsc_timestamp == t->local_tsc_stamp )
755 return;
757 version_update_begin(&u->version);
759 u->tsc_timestamp = t->local_tsc_stamp;
760 u->system_time = t->stime_local_stamp;
761 u->tsc_to_system_mul = t->tsc_scale.mul_frac;
762 u->tsc_shift = (s8)t->tsc_scale.shift;
764 version_update_end(&u->version);
765 }
767 void update_domain_wallclock_time(struct domain *d)
768 {
769 spin_lock(&wc_lock);
770 version_update_begin(&shared_info(d, wc_version));
771 shared_info(d, wc_sec) = wc_sec + d->time_offset_seconds;
772 shared_info(d, wc_nsec) = wc_nsec;
773 version_update_end(&shared_info(d, wc_version));
774 spin_unlock(&wc_lock);
775 }
777 void domain_set_time_offset(struct domain *d, int32_t time_offset_seconds)
778 {
779 d->time_offset_seconds = time_offset_seconds;
780 if ( is_hvm_domain(d) )
781 rtc_update_clock(d);
782 }
784 int cpu_frequency_change(u64 freq)
785 {
786 struct cpu_time *t = &this_cpu(cpu_time);
787 u64 curr_tsc;
789 /* Sanity check: CPU frequency allegedly dropping below 1MHz? */
790 if ( freq < 1000000u )
791 {
792 gdprintk(XENLOG_WARNING, "Rejecting CPU frequency change "
793 "to %"PRIu64" Hz.\n", freq);
794 return -EINVAL;
795 }
797 local_irq_disable();
798 /* Platform time /first/, as we may be delayed by platform_timer_lock. */
799 t->stime_master_stamp = read_platform_stime();
800 /* TSC-extrapolated time may be bogus after frequency change. */
801 /*t->stime_local_stamp = get_s_time();*/
802 t->stime_local_stamp = t->stime_master_stamp;
803 rdtscll(curr_tsc);
804 t->local_tsc_stamp = curr_tsc;
805 set_time_scale(&t->tsc_scale, freq);
806 local_irq_enable();
808 /* A full epoch should pass before we check for deviation. */
809 set_timer(&t->calibration_timer, NOW() + EPOCH);
810 if ( smp_processor_id() == 0 )
811 platform_time_calibration();
813 return 0;
814 }
816 /* Set clock to <secs,usecs> after 00:00:00 UTC, 1 January, 1970. */
817 void do_settime(unsigned long secs, unsigned long nsecs, u64 system_time_base)
818 {
819 u64 x;
820 u32 y, _wc_sec, _wc_nsec;
821 struct domain *d;
823 x = (secs * 1000000000ULL) + (u64)nsecs - system_time_base;
824 y = do_div(x, 1000000000);
826 spin_lock(&wc_lock);
827 wc_sec = _wc_sec = (u32)x;
828 wc_nsec = _wc_nsec = (u32)y;
829 spin_unlock(&wc_lock);
831 rcu_read_lock(&domlist_read_lock);
832 for_each_domain ( d )
833 update_domain_wallclock_time(d);
834 rcu_read_unlock(&domlist_read_lock);
835 }
837 static void local_time_calibration(void *unused)
838 {
839 struct cpu_time *t = &this_cpu(cpu_time);
841 /*
842 * System timestamps, extrapolated from local and master oscillators,
843 * taken during this calibration and the previous calibration.
844 */
845 s_time_t prev_local_stime, curr_local_stime;
846 s_time_t prev_master_stime, curr_master_stime;
848 /* TSC timestamps taken during this calibration and prev calibration. */
849 u64 prev_tsc, curr_tsc;
851 /*
852 * System time and TSC ticks elapsed during the previous calibration
853 * 'epoch'. These values are down-shifted to fit in 32 bits.
854 */
855 u64 stime_elapsed64, tsc_elapsed64;
856 u32 stime_elapsed32, tsc_elapsed32;
858 /* The accumulated error in the local estimate. */
859 u64 local_stime_err;
861 /* Error correction to slow down a fast local clock. */
862 u32 error_factor = 0;
864 /* Calculated TSC shift to ensure 32-bit scale multiplier. */
865 int tsc_shift = 0;
867 /* The overall calibration scale multiplier. */
868 u32 calibration_mul_frac;
870 prev_tsc = t->local_tsc_stamp;
871 prev_local_stime = t->stime_local_stamp;
872 prev_master_stime = t->stime_master_stamp;
874 /*
875 * Disable IRQs to get 'instantaneous' current timestamps. We read platform
876 * time first, as we may be delayed when acquiring platform_timer_lock.
877 */
878 local_irq_disable();
879 curr_master_stime = read_platform_stime();
880 curr_local_stime = get_s_time();
881 rdtscll(curr_tsc);
882 local_irq_enable();
884 #if 0
885 printk("PRE%d: tsc=%"PRIu64" stime=%"PRIu64" master=%"PRIu64"\n",
886 smp_processor_id(), prev_tsc, prev_local_stime, prev_master_stime);
887 printk("CUR%d: tsc=%"PRIu64" stime=%"PRIu64" master=%"PRIu64
888 " -> %"PRId64"\n",
889 smp_processor_id(), curr_tsc, curr_local_stime, curr_master_stime,
890 curr_master_stime - curr_local_stime);
891 #endif
893 /* Local time warps forward if it lags behind master time. */
894 if ( curr_local_stime < curr_master_stime )
895 curr_local_stime = curr_master_stime;
897 stime_elapsed64 = curr_master_stime - prev_master_stime;
898 tsc_elapsed64 = curr_tsc - prev_tsc;
900 /*
901 * Weirdness can happen if we lose sync with the platform timer.
902 * We could be smarter here: resync platform timer with local timer?
903 */
904 if ( ((s64)stime_elapsed64 < (EPOCH / 2)) )
905 goto out;
907 /*
908 * Calculate error-correction factor. This only slows down a fast local
909 * clock (slow clocks are warped forwards). The scale factor is clamped
910 * to >= 0.5.
911 */
912 if ( curr_local_stime != curr_master_stime )
913 {
914 local_stime_err = curr_local_stime - curr_master_stime;
915 if ( local_stime_err > EPOCH )
916 local_stime_err = EPOCH;
917 error_factor = div_frac(EPOCH, EPOCH + (u32)local_stime_err);
918 }
920 /*
921 * We require 0 < stime_elapsed < 2^31.
922 * This allows us to binary shift a 32-bit tsc_elapsed such that:
923 * stime_elapsed < tsc_elapsed <= 2*stime_elapsed
924 */
925 while ( ((u32)stime_elapsed64 != stime_elapsed64) ||
926 ((s32)stime_elapsed64 < 0) )
927 {
928 stime_elapsed64 >>= 1;
929 tsc_elapsed64 >>= 1;
930 }
932 /* stime_master_diff now fits in a 32-bit word. */
933 stime_elapsed32 = (u32)stime_elapsed64;
935 /* tsc_elapsed <= 2*stime_elapsed */
936 while ( tsc_elapsed64 > (stime_elapsed32 * 2) )
937 {
938 tsc_elapsed64 >>= 1;
939 tsc_shift--;
940 }
942 /* Local difference must now fit in 32 bits. */
943 ASSERT((u32)tsc_elapsed64 == tsc_elapsed64);
944 tsc_elapsed32 = (u32)tsc_elapsed64;
946 /* tsc_elapsed > stime_elapsed */
947 ASSERT(tsc_elapsed32 != 0);
948 while ( tsc_elapsed32 <= stime_elapsed32 )
949 {
950 tsc_elapsed32 <<= 1;
951 tsc_shift++;
952 }
954 calibration_mul_frac = div_frac(stime_elapsed32, tsc_elapsed32);
955 if ( error_factor != 0 )
956 calibration_mul_frac = mul_frac(calibration_mul_frac, error_factor);
958 #if 0
959 printk("---%d: %08x %08x %d\n", smp_processor_id(),
960 error_factor, calibration_mul_frac, tsc_shift);
961 #endif
963 /* Record new timestamp information, atomically w.r.t. interrupts. */
964 local_irq_disable();
965 t->tsc_scale.mul_frac = calibration_mul_frac;
966 t->tsc_scale.shift = tsc_shift;
967 t->local_tsc_stamp = curr_tsc;
968 t->stime_local_stamp = curr_local_stime;
969 t->stime_master_stamp = curr_master_stime;
970 local_irq_enable();
972 update_vcpu_system_time(current);
974 out:
975 set_timer(&t->calibration_timer, NOW() + EPOCH);
977 if ( smp_processor_id() == 0 )
978 platform_time_calibration();
979 }
981 void init_percpu_time(void)
982 {
983 struct cpu_time *t = &this_cpu(cpu_time);
984 unsigned long flags;
985 s_time_t now;
987 local_irq_save(flags);
988 rdtscll(t->local_tsc_stamp);
989 now = !plt_src.read_counter ? 0 : read_platform_stime();
990 local_irq_restore(flags);
992 t->stime_master_stamp = now;
993 t->stime_local_stamp = now;
995 init_timer(&t->calibration_timer, local_time_calibration,
996 NULL, smp_processor_id());
997 set_timer(&t->calibration_timer, NOW() + EPOCH);
998 }
1000 /* Late init function (after all CPUs are booted). */
1001 int __init init_xen_time(void)
1003 wc_sec = get_cmos_time();
1005 local_irq_disable();
1007 init_percpu_time();
1009 stime_platform_stamp = 0;
1010 init_platform_timer();
1012 /* check if TSC is invariant during deep C state
1013 this is a new feature introduced by Nehalem*/
1014 if ( cpuid_edx(0x80000007) & (1U<<8) )
1015 tsc_invariant = 1;
1017 local_irq_enable();
1019 return 0;
1023 /* Early init function. */
1024 void __init early_time_init(void)
1026 u64 tmp = init_pit_and_calibrate_tsc();
1028 set_time_scale(&this_cpu(cpu_time).tsc_scale, tmp);
1030 do_div(tmp, 1000);
1031 cpu_khz = (unsigned long)tmp;
1032 printk("Detected %lu.%03lu MHz processor.\n",
1033 cpu_khz / 1000, cpu_khz % 1000);
1035 setup_irq(0, &irq0);
1038 /* keep pit enabled for pit_broadcast working while cpuidle enabled */
1039 static int disable_pit_irq(void)
1041 if ( !using_pit && cpu_has_apic && !xen_cpuidle )
1043 /* Disable PIT CH0 timer interrupt. */
1044 outb_p(0x30, PIT_MODE);
1045 outb_p(0, PIT_CH0);
1046 outb_p(0, PIT_CH0);
1048 /*
1049 * If we do not rely on PIT CH0 then we can use HPET for one-shot
1050 * timer emulation when entering deep C states.
1051 */
1052 /*hpet_broadcast_init(); XXX dom0 may rely on RTC interrupt delivery */
1055 return 0;
1057 __initcall(disable_pit_irq);
1059 void pit_broadcast_enter(void)
1061 cpu_set(smp_processor_id(), pit_broadcast_mask);
1064 void pit_broadcast_exit(void)
1066 cpu_clear(smp_processor_id(), pit_broadcast_mask);
1069 int pit_broadcast_is_available(void)
1071 return xen_cpuidle;
1074 void send_timer_event(struct vcpu *v)
1076 send_guest_vcpu_virq(v, VIRQ_TIMER);
1079 /* Return secs after 00:00:00 localtime, 1 January, 1970. */
1080 unsigned long get_localtime(struct domain *d)
1082 return wc_sec + (wc_nsec + NOW()) / 1000000000ULL
1083 + d->time_offset_seconds;
1086 /* "cmos_utc_offset" is the difference between UTC time and CMOS time. */
1087 static long cmos_utc_offset; /* in seconds */
1089 int time_suspend(void)
1091 if ( smp_processor_id() == 0 )
1093 cmos_utc_offset = -get_cmos_time();
1094 cmos_utc_offset += (wc_sec + (wc_nsec + NOW()) / 1000000000ULL);
1097 /* Better to cancel calibration timer for accuracy. */
1098 kill_timer(&this_cpu(cpu_time).calibration_timer);
1100 return 0;
1103 int time_resume(void)
1105 u64 tmp = init_pit_and_calibrate_tsc();
1107 disable_pit_irq();
1109 set_time_scale(&this_cpu(cpu_time).tsc_scale, tmp);
1111 resume_platform_timer();
1113 do_settime(get_cmos_time() + cmos_utc_offset, 0, read_platform_stime());
1115 init_percpu_time();
1117 if ( !is_idle_vcpu(current) )
1118 update_vcpu_system_time(current);
1120 return 0;
1123 int dom0_pit_access(struct ioreq *ioreq)
1125 /* Is Xen using Channel 2? Then disallow direct dom0 access. */
1126 if ( using_pit )
1127 return 0;
1129 switch ( ioreq->addr )
1131 case PIT_CH2:
1132 if ( ioreq->dir == IOREQ_READ )
1133 ioreq->data = inb(PIT_CH2);
1134 else
1135 outb(ioreq->data, PIT_CH2);
1136 return 1;
1138 case PIT_MODE:
1139 if ( ioreq->dir == IOREQ_READ )
1140 return 0; /* urk! */
1141 switch ( ioreq->data & 0xc0 )
1143 case 0xc0: /* Read Back */
1144 if ( ioreq->data & 0x08 ) /* Select Channel 2? */
1145 outb(ioreq->data & 0xf8, PIT_MODE);
1146 if ( !(ioreq->data & 0x06) ) /* Select Channel 0/1? */
1147 return 1; /* no - we're done */
1148 /* Filter Channel 2 and reserved bit 0. */
1149 ioreq->data &= ~0x09;
1150 return 0; /* emulate ch0/1 readback */
1151 case 0x80: /* Select Counter 2 */
1152 outb(ioreq->data, PIT_MODE);
1153 return 1;
1156 case 0x61:
1157 if ( ioreq->dir == IOREQ_READ )
1158 ioreq->data = inb(0x61);
1159 else
1160 outb((inb(0x61) & ~3) | (ioreq->data & 3), 0x61);
1161 return 1;
1164 return 0;
1167 struct tm wallclock_time(void)
1169 uint64_t seconds;
1171 if ( !wc_sec )
1172 return (struct tm) { 0 };
1174 seconds = NOW() + (wc_sec * 1000000000ull) + wc_nsec;
1175 do_div(seconds, 1000000000);
1176 return gmtime(seconds);
1179 /*
1180 * Local variables:
1181 * mode: C
1182 * c-set-style: "BSD"
1183 * c-basic-offset: 4
1184 * tab-width: 4
1185 * indent-tabs-mode: nil
1186 * End:
1187 */