ia64/xen-unstable

view xen/arch/x86/time.c @ 18806:ed8524f4a044

x86: Re-initialise HPET on resume from S3

Signed-off-by: Guanqun Lu <guanqun.lu@intel.com>
Signed-off-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Tue Nov 18 15:55:14 2008 +0000 (2008-11-18)
parents 85fda344dba7
children 9958ef441af0
line source
1 /******************************************************************************
2 * arch/x86/time.c
3 *
4 * Per-CPU time calibration and management.
5 *
6 * Copyright (c) 2002-2005, K A Fraser
7 *
8 * Portions from Linux are:
9 * Copyright (c) 1991, 1992, 1995 Linus Torvalds
10 */
12 #include <xen/config.h>
13 #include <xen/errno.h>
14 #include <xen/event.h>
15 #include <xen/sched.h>
16 #include <xen/lib.h>
17 #include <xen/config.h>
18 #include <xen/init.h>
19 #include <xen/time.h>
20 #include <xen/timer.h>
21 #include <xen/smp.h>
22 #include <xen/irq.h>
23 #include <xen/softirq.h>
24 #include <asm/io.h>
25 #include <asm/msr.h>
26 #include <asm/mpspec.h>
27 #include <asm/processor.h>
28 #include <asm/fixmap.h>
29 #include <asm/mc146818rtc.h>
30 #include <asm/div64.h>
31 #include <asm/hpet.h>
32 #include <io_ports.h>
34 /* opt_clocksource: Force clocksource to one of: pit, hpet, cyclone, acpi. */
35 static char opt_clocksource[10];
36 string_param("clocksource", opt_clocksource);
38 unsigned long cpu_khz; /* CPU clock frequency in kHz. */
39 DEFINE_SPINLOCK(rtc_lock);
40 unsigned long pit0_ticks;
41 static u32 wc_sec, wc_nsec; /* UTC time at last 'time update'. */
42 static DEFINE_SPINLOCK(wc_lock);
44 struct time_scale {
45 int shift;
46 u32 mul_frac;
47 };
49 struct cpu_time {
50 u64 local_tsc_stamp;
51 u64 cstate_tsc_stamp;
52 s_time_t stime_local_stamp;
53 s_time_t stime_master_stamp;
54 struct time_scale tsc_scale;
55 u64 cstate_plt_count_stamp;
56 };
58 struct platform_timesource {
59 char *name;
60 u64 frequency;
61 u64 (*read_counter)(void);
62 int (*init)(struct platform_timesource *);
63 void (*resume)(struct platform_timesource *);
64 int counter_bits;
65 };
67 static DEFINE_PER_CPU(struct cpu_time, cpu_time);
69 /* Calibrate all CPUs to platform timer every EPOCH. */
70 #define EPOCH MILLISECS(1000)
71 static struct timer calibration_timer;
73 /* TSC is invariant on C state entry? */
74 static bool_t tsc_invariant;
76 /*
77 * We simulate a 32-bit platform timer from the 16-bit PIT ch2 counter.
78 * Otherwise overflow happens too quickly (~50ms) for us to guarantee that
79 * softirq handling will happen in time.
80 *
81 * The pit_lock protects the 16- and 32-bit stamp fields as well as the
82 */
83 static DEFINE_SPINLOCK(pit_lock);
84 static u16 pit_stamp16;
85 static u32 pit_stamp32;
86 static int using_pit;
88 /*
89 * 32-bit division of integer dividend and integer divisor yielding
90 * 32-bit fractional quotient.
91 */
92 static inline u32 div_frac(u32 dividend, u32 divisor)
93 {
94 u32 quotient, remainder;
95 ASSERT(dividend < divisor);
96 asm (
97 "divl %4"
98 : "=a" (quotient), "=d" (remainder)
99 : "0" (0), "1" (dividend), "r" (divisor) );
100 return quotient;
101 }
103 /*
104 * 32-bit multiplication of multiplicand and fractional multiplier
105 * yielding 32-bit product (radix point at same position as in multiplicand).
106 */
107 static inline u32 mul_frac(u32 multiplicand, u32 multiplier)
108 {
109 u32 product_int, product_frac;
110 asm (
111 "mul %3"
112 : "=a" (product_frac), "=d" (product_int)
113 : "0" (multiplicand), "r" (multiplier) );
114 return product_int;
115 }
117 /*
118 * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
119 * yielding a 64-bit result.
120 */
121 static inline u64 scale_delta(u64 delta, struct time_scale *scale)
122 {
123 u64 product;
124 #ifdef CONFIG_X86_32
125 u32 tmp1, tmp2;
126 #endif
128 if ( scale->shift < 0 )
129 delta >>= -scale->shift;
130 else
131 delta <<= scale->shift;
133 #ifdef CONFIG_X86_32
134 asm (
135 "mul %5 ; "
136 "mov %4,%%eax ; "
137 "mov %%edx,%4 ; "
138 "mul %5 ; "
139 "xor %5,%5 ; "
140 "add %4,%%eax ; "
141 "adc %5,%%edx ; "
142 : "=A" (product), "=r" (tmp1), "=r" (tmp2)
143 : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (scale->mul_frac) );
144 #else
145 asm (
146 "mul %%rdx ; shrd $32,%%rdx,%%rax"
147 : "=a" (product) : "0" (delta), "d" ((u64)scale->mul_frac) );
148 #endif
150 return product;
151 }
153 /*
154 * cpu_mask that denotes the CPUs that needs timer interrupt coming in as
155 * IPIs in place of local APIC timers
156 */
157 extern int xen_cpuidle;
158 static cpumask_t pit_broadcast_mask;
160 static void smp_send_timer_broadcast_ipi(void)
161 {
162 int cpu = smp_processor_id();
163 cpumask_t mask;
165 cpus_and(mask, cpu_online_map, pit_broadcast_mask);
167 if ( cpu_isset(cpu, mask) )
168 {
169 cpu_clear(cpu, mask);
170 raise_softirq(TIMER_SOFTIRQ);
171 }
173 if ( !cpus_empty(mask) )
174 {
175 cpumask_raise_softirq(mask, TIMER_SOFTIRQ);
176 }
177 }
179 static void timer_interrupt(int irq, void *dev_id, struct cpu_user_regs *regs)
180 {
181 ASSERT(local_irq_is_enabled());
183 if ( hpet_legacy_irq_tick() )
184 return;
186 /* Only for start-of-day interruopt tests in io_apic.c. */
187 (*(volatile unsigned long *)&pit0_ticks)++;
189 /* Rough hack to allow accurate timers to sort-of-work with no APIC. */
190 if ( !cpu_has_apic )
191 raise_softirq(TIMER_SOFTIRQ);
193 if ( xen_cpuidle )
194 smp_send_timer_broadcast_ipi();
196 /* Emulate a 32-bit PIT counter. */
197 if ( using_pit )
198 {
199 u16 count;
201 spin_lock_irq(&pit_lock);
203 outb(0x80, PIT_MODE);
204 count = inb(PIT_CH2);
205 count |= inb(PIT_CH2) << 8;
207 pit_stamp32 += (u16)(pit_stamp16 - count);
208 pit_stamp16 = count;
210 spin_unlock_irq(&pit_lock);
211 }
212 }
214 static struct irqaction irq0 = { timer_interrupt, "timer", NULL };
216 /* ------ Calibrate the TSC -------
217 * Return processor ticks per second / CALIBRATE_FRAC.
218 */
220 #define CLOCK_TICK_RATE 1193182 /* system crystal frequency (Hz) */
221 #define CALIBRATE_FRAC 20 /* calibrate over 50ms */
222 #define CALIBRATE_LATCH ((CLOCK_TICK_RATE+(CALIBRATE_FRAC/2))/CALIBRATE_FRAC)
224 static u64 init_pit_and_calibrate_tsc(void)
225 {
226 u64 start, end;
227 unsigned long count;
229 /* Set PIT channel 0 to HZ Hz. */
230 #define LATCH (((CLOCK_TICK_RATE)+(HZ/2))/HZ)
231 outb_p(0x34, PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */
232 outb_p(LATCH & 0xff, PIT_CH0); /* LSB */
233 outb(LATCH >> 8, PIT_CH0); /* MSB */
235 /* Set the Gate high, disable speaker */
236 outb((inb(0x61) & ~0x02) | 0x01, 0x61);
238 /*
239 * Now let's take care of CTC channel 2
240 *
241 * Set the Gate high, program CTC channel 2 for mode 0, (interrupt on
242 * terminal count mode), binary count, load 5 * LATCH count, (LSB and MSB)
243 * to begin countdown.
244 */
245 outb(0xb0, PIT_MODE); /* binary, mode 0, LSB/MSB, Ch 2 */
246 outb(CALIBRATE_LATCH & 0xff, PIT_CH2); /* LSB of count */
247 outb(CALIBRATE_LATCH >> 8, PIT_CH2); /* MSB of count */
249 rdtscll(start);
250 for ( count = 0; (inb(0x61) & 0x20) == 0; count++ )
251 continue;
252 rdtscll(end);
254 /* Error if the CTC doesn't behave itself. */
255 if ( count == 0 )
256 return 0;
258 return ((end - start) * (u64)CALIBRATE_FRAC);
259 }
261 static void set_time_scale(struct time_scale *ts, u64 ticks_per_sec)
262 {
263 u64 tps64 = ticks_per_sec;
264 u32 tps32;
265 int shift = 0;
267 ASSERT(tps64 != 0);
269 while ( tps64 > (MILLISECS(1000)*2) )
270 {
271 tps64 >>= 1;
272 shift--;
273 }
275 tps32 = (u32)tps64;
276 while ( tps32 <= (u32)MILLISECS(1000) )
277 {
278 tps32 <<= 1;
279 shift++;
280 }
282 ts->mul_frac = div_frac(MILLISECS(1000), tps32);
283 ts->shift = shift;
284 }
286 static atomic_t tsc_calibrate_gang = ATOMIC_INIT(0);
287 static unsigned int tsc_calibrate_status = 0;
289 void calibrate_tsc_bp(void)
290 {
291 while ( atomic_read(&tsc_calibrate_gang) != (num_booting_cpus() - 1) )
292 mb();
294 outb(CALIBRATE_LATCH & 0xff, PIT_CH2);
295 outb(CALIBRATE_LATCH >> 8, PIT_CH2);
297 tsc_calibrate_status = 1;
298 wmb();
300 while ( (inb(0x61) & 0x20) == 0 )
301 continue;
303 tsc_calibrate_status = 2;
304 wmb();
306 while ( atomic_read(&tsc_calibrate_gang) != 0 )
307 mb();
308 }
310 void calibrate_tsc_ap(void)
311 {
312 u64 t1, t2, ticks_per_sec;
314 atomic_inc(&tsc_calibrate_gang);
316 while ( tsc_calibrate_status < 1 )
317 mb();
319 rdtscll(t1);
321 while ( tsc_calibrate_status < 2 )
322 mb();
324 rdtscll(t2);
326 ticks_per_sec = (t2 - t1) * (u64)CALIBRATE_FRAC;
327 set_time_scale(&this_cpu(cpu_time).tsc_scale, ticks_per_sec);
329 atomic_dec(&tsc_calibrate_gang);
330 }
332 static char *freq_string(u64 freq)
333 {
334 static char s[20];
335 unsigned int x, y;
336 y = (unsigned int)do_div(freq, 1000000) / 1000;
337 x = (unsigned int)freq;
338 snprintf(s, sizeof(s), "%u.%03uMHz", x, y);
339 return s;
340 }
342 /************************************************************
343 * PLATFORM TIMER 1: PROGRAMMABLE INTERVAL TIMER (LEGACY PIT)
344 */
346 static u64 read_pit_count(void)
347 {
348 u16 count16;
349 u32 count32;
350 unsigned long flags;
352 spin_lock_irqsave(&pit_lock, flags);
354 outb(0x80, PIT_MODE);
355 count16 = inb(PIT_CH2);
356 count16 |= inb(PIT_CH2) << 8;
358 count32 = pit_stamp32 + (u16)(pit_stamp16 - count16);
360 spin_unlock_irqrestore(&pit_lock, flags);
362 return count32;
363 }
365 static int init_pit(struct platform_timesource *pts)
366 {
367 using_pit = 1;
368 return 1;
369 }
371 static struct platform_timesource plt_pit =
372 {
373 .name = "PIT",
374 .frequency = CLOCK_TICK_RATE,
375 .read_counter = read_pit_count,
376 .counter_bits = 32,
377 .init = init_pit
378 };
380 /************************************************************
381 * PLATFORM TIMER 2: HIGH PRECISION EVENT TIMER (HPET)
382 */
384 static u64 read_hpet_count(void)
385 {
386 return hpet_read32(HPET_COUNTER);
387 }
389 static int init_hpet(struct platform_timesource *pts)
390 {
391 u64 hpet_rate = hpet_setup();
393 if ( hpet_rate == 0 )
394 return 0;
396 pts->frequency = hpet_rate;
397 return 1;
398 }
400 static void resume_hpet(struct platform_timesource *pts)
401 {
402 u64 hpet_rate = hpet_setup();
404 BUG_ON(hpet_rate == 0);
405 pts->frequency = hpet_rate;
406 }
408 static struct platform_timesource plt_hpet =
409 {
410 .name = "HPET",
411 .read_counter = read_hpet_count,
412 .counter_bits = 32,
413 .init = init_hpet,
414 .resume = resume_hpet
415 };
417 /************************************************************
418 * PLATFORM TIMER 3: IBM 'CYCLONE' TIMER
419 */
421 int use_cyclone;
423 /*
424 * Although the counter is read via a 64-bit register, I believe it is actually
425 * a 40-bit counter. Since this will wrap, I read only the low 32 bits and
426 * periodically fold into a 64-bit software counter, just as for PIT and HPET.
427 */
428 #define CYCLONE_CBAR_ADDR 0xFEB00CD0
429 #define CYCLONE_PMCC_OFFSET 0x51A0
430 #define CYCLONE_MPMC_OFFSET 0x51D0
431 #define CYCLONE_MPCS_OFFSET 0x51A8
432 #define CYCLONE_TIMER_FREQ 100000000
434 /* Cyclone MPMC0 register. */
435 static volatile u32 *cyclone_timer;
437 static u64 read_cyclone_count(void)
438 {
439 return *cyclone_timer;
440 }
442 static volatile u32 *map_cyclone_reg(unsigned long regaddr)
443 {
444 unsigned long pageaddr = regaddr & PAGE_MASK;
445 unsigned long offset = regaddr & ~PAGE_MASK;
446 set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
447 return (volatile u32 *)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
448 }
450 static int init_cyclone(struct platform_timesource *pts)
451 {
452 u32 base;
454 if ( !use_cyclone )
455 return 0;
457 /* Find base address. */
458 base = *(map_cyclone_reg(CYCLONE_CBAR_ADDR));
459 if ( base == 0 )
460 {
461 printk(KERN_ERR "Cyclone: Could not find valid CBAR value.\n");
462 return 0;
463 }
465 /* Enable timer and map the counter register. */
466 *(map_cyclone_reg(base + CYCLONE_PMCC_OFFSET)) = 1;
467 *(map_cyclone_reg(base + CYCLONE_MPCS_OFFSET)) = 1;
468 cyclone_timer = map_cyclone_reg(base + CYCLONE_MPMC_OFFSET);
469 return 1;
470 }
472 static struct platform_timesource plt_cyclone =
473 {
474 .name = "IBM Cyclone",
475 .frequency = CYCLONE_TIMER_FREQ,
476 .read_counter = read_cyclone_count,
477 .counter_bits = 32,
478 .init = init_cyclone
479 };
481 /************************************************************
482 * PLATFORM TIMER 4: ACPI PM TIMER
483 */
485 u32 pmtmr_ioport;
487 /* ACPI PM timer ticks at 3.579545 MHz. */
488 #define ACPI_PM_FREQUENCY 3579545
490 static u64 read_pmtimer_count(void)
491 {
492 return inl(pmtmr_ioport);
493 }
495 static int init_pmtimer(struct platform_timesource *pts)
496 {
497 if ( pmtmr_ioport == 0 )
498 return 0;
500 return 1;
501 }
503 static struct platform_timesource plt_pmtimer =
504 {
505 .name = "ACPI PM Timer",
506 .frequency = ACPI_PM_FREQUENCY,
507 .read_counter = read_pmtimer_count,
508 .counter_bits = 24,
509 .init = init_pmtimer
510 };
512 /************************************************************
513 * GENERIC PLATFORM TIMER INFRASTRUCTURE
514 */
516 static struct platform_timesource plt_src; /* details of chosen timesource */
517 static u64 plt_mask; /* hardware-width mask */
518 static u64 plt_overflow_period; /* ns between calls to plt_overflow() */
519 static struct time_scale plt_scale; /* scale: platform counter -> nanosecs */
521 /* Protected by platform_timer_lock. */
522 static DEFINE_SPINLOCK(platform_timer_lock);
523 static s_time_t stime_platform_stamp; /* System time at below platform time */
524 static u64 platform_timer_stamp; /* Platform time at above system time */
525 static u64 plt_stamp64; /* 64-bit platform counter stamp */
526 static u64 plt_stamp; /* hardware-width platform counter stamp */
527 static struct timer plt_overflow_timer;
529 static void plt_overflow(void *unused)
530 {
531 u64 count;
533 spin_lock_irq(&platform_timer_lock);
534 count = plt_src.read_counter();
535 plt_stamp64 += (count - plt_stamp) & plt_mask;
536 plt_stamp = count;
537 spin_unlock_irq(&platform_timer_lock);
539 set_timer(&plt_overflow_timer, NOW() + plt_overflow_period);
540 }
542 static s_time_t __read_platform_stime(u64 platform_time)
543 {
544 u64 diff = platform_time - platform_timer_stamp;
545 ASSERT(spin_is_locked(&platform_timer_lock));
546 return (stime_platform_stamp + scale_delta(diff, &plt_scale));
547 }
549 static s_time_t read_platform_stime(void)
550 {
551 u64 count;
552 s_time_t stime;
554 ASSERT(!local_irq_is_enabled());
556 spin_lock(&platform_timer_lock);
557 count = plt_stamp64 + ((plt_src.read_counter() - plt_stamp) & plt_mask);
558 stime = __read_platform_stime(count);
559 spin_unlock(&platform_timer_lock);
561 return stime;
562 }
564 static void platform_time_calibration(void)
565 {
566 u64 count;
567 s_time_t stamp;
569 spin_lock_irq(&platform_timer_lock);
570 count = plt_stamp64 + ((plt_src.read_counter() - plt_stamp) & plt_mask);
571 stamp = __read_platform_stime(count);
572 stime_platform_stamp = stamp;
573 platform_timer_stamp = count;
574 spin_unlock_irq(&platform_timer_lock);
575 }
577 static void resume_platform_timer(void)
578 {
579 /* Timer source can be reset when backing from S3 to S0 */
580 if ( plt_src.resume )
581 plt_src.resume(&plt_src);
583 /* No change in platform_stime across suspend/resume. */
584 platform_timer_stamp = plt_stamp64;
585 plt_stamp = plt_src.read_counter();
586 }
588 static void init_platform_timer(void)
589 {
590 static struct platform_timesource * const plt_timers[] = {
591 &plt_cyclone, &plt_hpet, &plt_pmtimer, &plt_pit
592 };
594 struct platform_timesource *pts;
595 int i, rc = -1;
597 if ( opt_clocksource[0] != '\0' )
598 {
599 for ( i = 0; i < ARRAY_SIZE(plt_timers); i++ )
600 {
601 pts = plt_timers[i];
602 if ( !strcmp(opt_clocksource, pts->name) )
603 {
604 rc = pts->init(pts);
605 break;
606 }
607 }
609 if ( rc <= 0 )
610 printk("WARNING: %s clocksource '%s'.\n",
611 (rc == 0) ? "Could not initialise" : "Unrecognised",
612 opt_clocksource);
613 }
615 if ( rc <= 0 )
616 {
617 for ( i = 0; i < ARRAY_SIZE(plt_timers); i++ )
618 {
619 pts = plt_timers[i];
620 if ( (rc = pts->init(pts)) > 0 )
621 break;
622 }
623 }
625 BUG_ON(rc <= 0);
627 plt_mask = (u64)~0ull >> (64 - pts->counter_bits);
629 set_time_scale(&plt_scale, pts->frequency);
631 plt_overflow_period = scale_delta(
632 1ull << (pts->counter_bits-1), &plt_scale);
633 init_timer(&plt_overflow_timer, plt_overflow, NULL, 0);
634 plt_src = *pts;
635 plt_overflow(NULL);
637 platform_timer_stamp = plt_stamp64;
639 printk("Platform timer is %s %s\n",
640 freq_string(pts->frequency), pts->name);
641 }
643 void cstate_save_tsc(void)
644 {
645 struct cpu_time *t = &this_cpu(cpu_time);
647 if ( tsc_invariant )
648 return;
650 t->cstate_plt_count_stamp = plt_src.read_counter();
651 rdtscll(t->cstate_tsc_stamp);
652 }
654 void cstate_restore_tsc(void)
655 {
656 struct cpu_time *t = &this_cpu(cpu_time);
657 u64 plt_count_delta, tsc_delta;
659 if ( tsc_invariant )
660 return;
662 plt_count_delta = (plt_src.read_counter() -
663 t->cstate_plt_count_stamp) & plt_mask;
664 tsc_delta = scale_delta(plt_count_delta, &plt_scale) * cpu_khz/1000000UL;
665 wrmsrl(MSR_IA32_TSC, t->cstate_tsc_stamp + tsc_delta);
666 }
668 /***************************************************************************
669 * CMOS Timer functions
670 ***************************************************************************/
672 /* Converts Gregorian date to seconds since 1970-01-01 00:00:00.
673 * Assumes input in normal date format, i.e. 1980-12-31 23:59:59
674 * => year=1980, mon=12, day=31, hour=23, min=59, sec=59.
675 *
676 * [For the Julian calendar (which was used in Russia before 1917,
677 * Britain & colonies before 1752, anywhere else before 1582,
678 * and is still in use by some communities) leave out the
679 * -year/100+year/400 terms, and add 10.]
680 *
681 * This algorithm was first published by Gauss (I think).
682 *
683 * WARNING: this function will overflow on 2106-02-07 06:28:16 on
684 * machines were long is 32-bit! (However, as time_t is signed, we
685 * will already get problems at other places on 2038-01-19 03:14:08)
686 */
687 unsigned long
688 mktime (unsigned int year, unsigned int mon,
689 unsigned int day, unsigned int hour,
690 unsigned int min, unsigned int sec)
691 {
692 /* 1..12 -> 11,12,1..10: put Feb last since it has a leap day. */
693 if ( 0 >= (int) (mon -= 2) )
694 {
695 mon += 12;
696 year -= 1;
697 }
699 return ((((unsigned long)(year/4 - year/100 + year/400 + 367*mon/12 + day)+
700 year*365 - 719499
701 )*24 + hour /* now have hours */
702 )*60 + min /* now have minutes */
703 )*60 + sec; /* finally seconds */
704 }
706 static unsigned long __get_cmos_time(void)
707 {
708 unsigned int year, mon, day, hour, min, sec;
710 sec = CMOS_READ(RTC_SECONDS);
711 min = CMOS_READ(RTC_MINUTES);
712 hour = CMOS_READ(RTC_HOURS);
713 day = CMOS_READ(RTC_DAY_OF_MONTH);
714 mon = CMOS_READ(RTC_MONTH);
715 year = CMOS_READ(RTC_YEAR);
717 if ( !(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD )
718 {
719 BCD_TO_BIN(sec);
720 BCD_TO_BIN(min);
721 BCD_TO_BIN(hour);
722 BCD_TO_BIN(day);
723 BCD_TO_BIN(mon);
724 BCD_TO_BIN(year);
725 }
727 if ( (year += 1900) < 1970 )
728 year += 100;
730 return mktime(year, mon, day, hour, min, sec);
731 }
733 static unsigned long get_cmos_time(void)
734 {
735 unsigned long res, flags;
736 int i;
738 spin_lock_irqsave(&rtc_lock, flags);
740 /* read RTC exactly on falling edge of update flag */
741 for ( i = 0 ; i < 1000000 ; i++ ) /* may take up to 1 second... */
742 if ( (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) )
743 break;
744 for ( i = 0 ; i < 1000000 ; i++ ) /* must try at least 2.228 ms */
745 if ( !(CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) )
746 break;
748 res = __get_cmos_time();
750 spin_unlock_irqrestore(&rtc_lock, flags);
751 return res;
752 }
754 /***************************************************************************
755 * System Time
756 ***************************************************************************/
758 s_time_t get_s_time(void)
759 {
760 struct cpu_time *t = &this_cpu(cpu_time);
761 u64 tsc, delta;
762 s_time_t now;
764 rdtscll(tsc);
765 delta = tsc - t->local_tsc_stamp;
766 now = t->stime_local_stamp + scale_delta(delta, &t->tsc_scale);
768 return now;
769 }
771 static inline void version_update_begin(u32 *version)
772 {
773 /* Explicitly OR with 1 just in case version number gets out of sync. */
774 *version = (*version + 1) | 1;
775 wmb();
776 }
778 static inline void version_update_end(u32 *version)
779 {
780 wmb();
781 (*version)++;
782 }
784 void update_vcpu_system_time(struct vcpu *v)
785 {
786 struct cpu_time *t;
787 struct vcpu_time_info *u;
789 if ( v->vcpu_info == NULL )
790 return;
792 t = &this_cpu(cpu_time);
793 u = &vcpu_info(v, time);
795 if ( u->tsc_timestamp == t->local_tsc_stamp )
796 return;
798 version_update_begin(&u->version);
800 u->tsc_timestamp = t->local_tsc_stamp;
801 u->system_time = t->stime_local_stamp;
802 u->tsc_to_system_mul = t->tsc_scale.mul_frac;
803 u->tsc_shift = (s8)t->tsc_scale.shift;
805 version_update_end(&u->version);
806 }
808 void update_domain_wallclock_time(struct domain *d)
809 {
810 spin_lock(&wc_lock);
811 version_update_begin(&shared_info(d, wc_version));
812 shared_info(d, wc_sec) = wc_sec + d->time_offset_seconds;
813 shared_info(d, wc_nsec) = wc_nsec;
814 version_update_end(&shared_info(d, wc_version));
815 spin_unlock(&wc_lock);
816 }
818 void domain_set_time_offset(struct domain *d, int32_t time_offset_seconds)
819 {
820 d->time_offset_seconds = time_offset_seconds;
821 if ( is_hvm_domain(d) )
822 rtc_update_clock(d);
823 }
825 int cpu_frequency_change(u64 freq)
826 {
827 struct cpu_time *t = &this_cpu(cpu_time);
828 u64 curr_tsc;
830 /* Sanity check: CPU frequency allegedly dropping below 1MHz? */
831 if ( freq < 1000000u )
832 {
833 gdprintk(XENLOG_WARNING, "Rejecting CPU frequency change "
834 "to %"PRIu64" Hz.\n", freq);
835 return -EINVAL;
836 }
838 local_irq_disable();
839 /* Platform time /first/, as we may be delayed by platform_timer_lock. */
840 t->stime_master_stamp = read_platform_stime();
841 /* TSC-extrapolated time may be bogus after frequency change. */
842 /*t->stime_local_stamp = get_s_time();*/
843 t->stime_local_stamp = t->stime_master_stamp;
844 rdtscll(curr_tsc);
845 t->local_tsc_stamp = curr_tsc;
846 set_time_scale(&t->tsc_scale, freq);
847 local_irq_enable();
849 update_vcpu_system_time(current);
851 /* A full epoch should pass before we check for deviation. */
852 if ( smp_processor_id() == 0 )
853 {
854 set_timer(&calibration_timer, NOW() + EPOCH);
855 platform_time_calibration();
856 }
858 return 0;
859 }
861 /* Set clock to <secs,usecs> after 00:00:00 UTC, 1 January, 1970. */
862 void do_settime(unsigned long secs, unsigned long nsecs, u64 system_time_base)
863 {
864 u64 x;
865 u32 y, _wc_sec, _wc_nsec;
866 struct domain *d;
868 x = (secs * 1000000000ULL) + (u64)nsecs - system_time_base;
869 y = do_div(x, 1000000000);
871 spin_lock(&wc_lock);
872 wc_sec = _wc_sec = (u32)x;
873 wc_nsec = _wc_nsec = (u32)y;
874 spin_unlock(&wc_lock);
876 rcu_read_lock(&domlist_read_lock);
877 for_each_domain ( d )
878 update_domain_wallclock_time(d);
879 rcu_read_unlock(&domlist_read_lock);
880 }
882 /* Per-CPU communication between rendezvous IRQ and softirq handler. */
883 struct cpu_calibration {
884 u64 local_tsc_stamp;
885 s_time_t stime_local_stamp;
886 s_time_t stime_master_stamp;
887 };
888 static DEFINE_PER_CPU(struct cpu_calibration, cpu_calibration);
890 /* Softirq handler for per-CPU time calibration. */
891 static void local_time_calibration(void)
892 {
893 struct cpu_time *t = &this_cpu(cpu_time);
894 struct cpu_calibration *c = &this_cpu(cpu_calibration);
896 /*
897 * System timestamps, extrapolated from local and master oscillators,
898 * taken during this calibration and the previous calibration.
899 */
900 s_time_t prev_local_stime, curr_local_stime;
901 s_time_t prev_master_stime, curr_master_stime;
903 /* TSC timestamps taken during this calibration and prev calibration. */
904 u64 prev_tsc, curr_tsc;
906 /*
907 * System time and TSC ticks elapsed during the previous calibration
908 * 'epoch'. These values are down-shifted to fit in 32 bits.
909 */
910 u64 stime_elapsed64, tsc_elapsed64;
911 u32 stime_elapsed32, tsc_elapsed32;
913 /* The accumulated error in the local estimate. */
914 u64 local_stime_err;
916 /* Error correction to slow down a fast local clock. */
917 u32 error_factor = 0;
919 /* Calculated TSC shift to ensure 32-bit scale multiplier. */
920 int tsc_shift = 0;
922 /* The overall calibration scale multiplier. */
923 u32 calibration_mul_frac;
925 prev_tsc = t->local_tsc_stamp;
926 prev_local_stime = t->stime_local_stamp;
927 prev_master_stime = t->stime_master_stamp;
929 /* Disabling IRQs ensures we atomically read cpu_calibration struct. */
930 local_irq_disable();
931 curr_tsc = c->local_tsc_stamp;
932 curr_local_stime = c->stime_local_stamp;
933 curr_master_stime = c->stime_master_stamp;
934 local_irq_enable();
936 #if 0
937 printk("PRE%d: tsc=%"PRIu64" stime=%"PRIu64" master=%"PRIu64"\n",
938 smp_processor_id(), prev_tsc, prev_local_stime, prev_master_stime);
939 printk("CUR%d: tsc=%"PRIu64" stime=%"PRIu64" master=%"PRIu64
940 " -> %"PRId64"\n",
941 smp_processor_id(), curr_tsc, curr_local_stime, curr_master_stime,
942 curr_master_stime - curr_local_stime);
943 #endif
945 /* Local time warps forward if it lags behind master time. */
946 if ( curr_local_stime < curr_master_stime )
947 curr_local_stime = curr_master_stime;
949 stime_elapsed64 = curr_master_stime - prev_master_stime;
950 tsc_elapsed64 = curr_tsc - prev_tsc;
952 /*
953 * Weirdness can happen if we lose sync with the platform timer.
954 * We could be smarter here: resync platform timer with local timer?
955 */
956 if ( ((s64)stime_elapsed64 < (EPOCH / 2)) )
957 goto out;
959 /*
960 * Calculate error-correction factor. This only slows down a fast local
961 * clock (slow clocks are warped forwards). The scale factor is clamped
962 * to >= 0.5.
963 */
964 if ( curr_local_stime != curr_master_stime )
965 {
966 local_stime_err = curr_local_stime - curr_master_stime;
967 if ( local_stime_err > EPOCH )
968 local_stime_err = EPOCH;
969 error_factor = div_frac(EPOCH, EPOCH + (u32)local_stime_err);
970 }
972 /*
973 * We require 0 < stime_elapsed < 2^31.
974 * This allows us to binary shift a 32-bit tsc_elapsed such that:
975 * stime_elapsed < tsc_elapsed <= 2*stime_elapsed
976 */
977 while ( ((u32)stime_elapsed64 != stime_elapsed64) ||
978 ((s32)stime_elapsed64 < 0) )
979 {
980 stime_elapsed64 >>= 1;
981 tsc_elapsed64 >>= 1;
982 }
984 /* stime_master_diff now fits in a 32-bit word. */
985 stime_elapsed32 = (u32)stime_elapsed64;
987 /* tsc_elapsed <= 2*stime_elapsed */
988 while ( tsc_elapsed64 > (stime_elapsed32 * 2) )
989 {
990 tsc_elapsed64 >>= 1;
991 tsc_shift--;
992 }
994 /* Local difference must now fit in 32 bits. */
995 ASSERT((u32)tsc_elapsed64 == tsc_elapsed64);
996 tsc_elapsed32 = (u32)tsc_elapsed64;
998 /* tsc_elapsed > stime_elapsed */
999 ASSERT(tsc_elapsed32 != 0);
1000 while ( tsc_elapsed32 <= stime_elapsed32 )
1002 tsc_elapsed32 <<= 1;
1003 tsc_shift++;
1006 calibration_mul_frac = div_frac(stime_elapsed32, tsc_elapsed32);
1007 if ( error_factor != 0 )
1008 calibration_mul_frac = mul_frac(calibration_mul_frac, error_factor);
1010 #if 0
1011 printk("---%d: %08x %08x %d\n", smp_processor_id(),
1012 error_factor, calibration_mul_frac, tsc_shift);
1013 #endif
1015 /* Record new timestamp information, atomically w.r.t. interrupts. */
1016 local_irq_disable();
1017 t->tsc_scale.mul_frac = calibration_mul_frac;
1018 t->tsc_scale.shift = tsc_shift;
1019 t->local_tsc_stamp = curr_tsc;
1020 t->stime_local_stamp = curr_local_stime;
1021 t->stime_master_stamp = curr_master_stime;
1022 local_irq_enable();
1024 update_vcpu_system_time(current);
1026 out:
1027 if ( smp_processor_id() == 0 )
1029 set_timer(&calibration_timer, NOW() + EPOCH);
1030 platform_time_calibration();
1034 /*
1035 * Rendezvous for all CPUs in IRQ context.
1036 * Master CPU snapshots the platform timer.
1037 * All CPUS snapshot their local TSC and extrapolation of system time.
1038 */
1039 struct calibration_rendezvous {
1040 cpumask_t cpu_calibration_map;
1041 atomic_t nr_cpus;
1042 s_time_t master_stime;
1043 };
1045 static void time_calibration_rendezvous(void *_r)
1047 struct cpu_calibration *c = &this_cpu(cpu_calibration);
1048 struct calibration_rendezvous *r = _r;
1049 unsigned int total_cpus = cpus_weight(r->cpu_calibration_map);
1051 if ( smp_processor_id() == 0 )
1053 while ( atomic_read(&r->nr_cpus) != (total_cpus - 1) )
1054 cpu_relax();
1055 r->master_stime = read_platform_stime();
1056 mb(); /* write r->master_stime /then/ signal */
1057 atomic_inc(&r->nr_cpus);
1059 else
1061 atomic_inc(&r->nr_cpus);
1062 while ( atomic_read(&r->nr_cpus) != total_cpus )
1063 cpu_relax();
1064 mb(); /* receive signal /then/ read r->master_stime */
1067 rdtscll(c->local_tsc_stamp);
1068 c->stime_local_stamp = get_s_time();
1069 c->stime_master_stamp = r->master_stime;
1071 raise_softirq(TIME_CALIBRATE_SOFTIRQ);
1074 static void time_calibration(void *unused)
1076 struct calibration_rendezvous r = {
1077 .cpu_calibration_map = cpu_online_map,
1078 .nr_cpus = ATOMIC_INIT(0)
1079 };
1081 /* @wait=1 because we must wait for all cpus before freeing @r. */
1082 on_selected_cpus(r.cpu_calibration_map,
1083 time_calibration_rendezvous, &r, 0, 1);
1086 void init_percpu_time(void)
1088 struct cpu_time *t = &this_cpu(cpu_time);
1089 unsigned long flags;
1090 s_time_t now;
1092 local_irq_save(flags);
1093 rdtscll(t->local_tsc_stamp);
1094 now = !plt_src.read_counter ? 0 : read_platform_stime();
1095 local_irq_restore(flags);
1097 t->stime_master_stamp = now;
1098 t->stime_local_stamp = now;
1100 if ( smp_processor_id() == 0 )
1102 init_timer(&calibration_timer, time_calibration, NULL, 0);
1103 set_timer(&calibration_timer, NOW() + EPOCH);
1107 /* Late init function (after all CPUs are booted). */
1108 int __init init_xen_time(void)
1110 /* check if TSC is invariant during deep C state
1111 this is a new feature introduced by Nehalem*/
1112 if ( cpuid_edx(0x80000007) & (1u<<8) )
1113 tsc_invariant = 1;
1115 open_softirq(TIME_CALIBRATE_SOFTIRQ, local_time_calibration);
1117 init_percpu_time();
1119 stime_platform_stamp = 0;
1120 init_platform_timer();
1122 do_settime(get_cmos_time(), 0, NOW());
1124 return 0;
1128 /* Early init function. */
1129 void __init early_time_init(void)
1131 u64 tmp = init_pit_and_calibrate_tsc();
1133 set_time_scale(&this_cpu(cpu_time).tsc_scale, tmp);
1135 do_div(tmp, 1000);
1136 cpu_khz = (unsigned long)tmp;
1137 printk("Detected %lu.%03lu MHz processor.\n",
1138 cpu_khz / 1000, cpu_khz % 1000);
1140 setup_irq(0, &irq0);
1143 /* force_hpet_broadcast: if true, force using hpet_broadcast to fix lapic stop
1144 issue for deep C state with pit disabled */
1145 static int force_hpet_broadcast;
1146 boolean_param("hpetbroadcast", force_hpet_broadcast);
1148 /* keep pit enabled for pit_broadcast working while cpuidle enabled */
1149 static int disable_pit_irq(void)
1151 if ( using_pit || !cpu_has_apic || (xen_cpuidle && !force_hpet_broadcast) )
1152 return 0;
1154 /*
1155 * If we do not rely on PIT CH0 then we can use HPET for one-shot timer
1156 * emulation when entering deep C states.
1157 * XXX dom0 may rely on RTC interrupt delivery, so only enable
1158 * hpet_broadcast if force_hpet_broadcast.
1159 */
1160 if ( xen_cpuidle && force_hpet_broadcast )
1162 hpet_broadcast_init();
1163 if ( !hpet_broadcast_is_available() )
1165 printk("HPET broadcast init failed, turn to PIT broadcast.\n");
1166 return 0;
1170 /* Disable PIT CH0 timer interrupt. */
1171 outb_p(0x30, PIT_MODE);
1172 outb_p(0, PIT_CH0);
1173 outb_p(0, PIT_CH0);
1175 return 0;
1177 __initcall(disable_pit_irq);
1179 void pit_broadcast_enter(void)
1181 cpu_set(smp_processor_id(), pit_broadcast_mask);
1184 void pit_broadcast_exit(void)
1186 int cpu = smp_processor_id();
1188 if ( cpu_test_and_clear(cpu, pit_broadcast_mask) )
1189 reprogram_timer(per_cpu(timer_deadline, cpu));
1192 int pit_broadcast_is_available(void)
1194 return xen_cpuidle;
1197 void send_timer_event(struct vcpu *v)
1199 send_guest_vcpu_virq(v, VIRQ_TIMER);
1202 /* Return secs after 00:00:00 localtime, 1 January, 1970. */
1203 unsigned long get_localtime(struct domain *d)
1205 return wc_sec + (wc_nsec + NOW()) / 1000000000ULL
1206 + d->time_offset_seconds;
1209 /* "cmos_utc_offset" is the difference between UTC time and CMOS time. */
1210 static long cmos_utc_offset; /* in seconds */
1212 int time_suspend(void)
1214 if ( smp_processor_id() == 0 )
1216 cmos_utc_offset = -get_cmos_time();
1217 cmos_utc_offset += (wc_sec + (wc_nsec + NOW()) / 1000000000ULL);
1218 kill_timer(&calibration_timer);
1221 /* Better to cancel calibration timer for accuracy. */
1222 clear_bit(TIME_CALIBRATE_SOFTIRQ, &softirq_pending(smp_processor_id()));
1224 return 0;
1227 int time_resume(void)
1229 /*u64 tmp = */init_pit_and_calibrate_tsc();
1231 /* Disable this while calibrate_tsc_ap() also is skipped. */
1232 /*set_time_scale(&this_cpu(cpu_time).tsc_scale, tmp);*/
1234 resume_platform_timer();
1236 disable_pit_irq();
1238 init_percpu_time();
1240 do_settime(get_cmos_time() + cmos_utc_offset, 0, NOW());
1242 if ( !is_idle_vcpu(current) )
1243 update_vcpu_system_time(current);
1245 return 0;
1248 int dom0_pit_access(struct ioreq *ioreq)
1250 /* Is Xen using Channel 2? Then disallow direct dom0 access. */
1251 if ( using_pit )
1252 return 0;
1254 switch ( ioreq->addr )
1256 case PIT_CH2:
1257 if ( ioreq->dir == IOREQ_READ )
1258 ioreq->data = inb(PIT_CH2);
1259 else
1260 outb(ioreq->data, PIT_CH2);
1261 return 1;
1263 case PIT_MODE:
1264 if ( ioreq->dir == IOREQ_READ )
1265 return 0; /* urk! */
1266 switch ( ioreq->data & 0xc0 )
1268 case 0xc0: /* Read Back */
1269 if ( ioreq->data & 0x08 ) /* Select Channel 2? */
1270 outb(ioreq->data & 0xf8, PIT_MODE);
1271 if ( !(ioreq->data & 0x06) ) /* Select Channel 0/1? */
1272 return 1; /* no - we're done */
1273 /* Filter Channel 2 and reserved bit 0. */
1274 ioreq->data &= ~0x09;
1275 return 0; /* emulate ch0/1 readback */
1276 case 0x80: /* Select Counter 2 */
1277 outb(ioreq->data, PIT_MODE);
1278 return 1;
1281 case 0x61:
1282 if ( ioreq->dir == IOREQ_READ )
1283 ioreq->data = inb(0x61);
1284 else
1285 outb((inb(0x61) & ~3) | (ioreq->data & 3), 0x61);
1286 return 1;
1289 return 0;
1292 struct tm wallclock_time(void)
1294 uint64_t seconds;
1296 if ( !wc_sec )
1297 return (struct tm) { 0 };
1299 seconds = NOW() + (wc_sec * 1000000000ull) + wc_nsec;
1300 do_div(seconds, 1000000000);
1301 return gmtime(seconds);
1304 /*
1305 * Local variables:
1306 * mode: C
1307 * c-set-style: "BSD"
1308 * c-basic-offset: 4
1309 * tab-width: 4
1310 * indent-tabs-mode: nil
1311 * End:
1312 */