ia64/xen-unstable

view xenolinux-2.4.22-sparse/arch/xeno/kernel/time.c @ 854:0cd58ce5a503

bitkeeper revision 1.531 (3f9da0175ZA8nMoVtg9tCQeXL4osyw)

time.c, xen_log.c, xen_cpuperf.c:
Add 'independent_wallclock' cmdline and sysctl options to xenolinux.
author kaf24@scramble.cl.cam.ac.uk
date Mon Oct 27 22:45:43 2003 +0000 (2003-10-27)
parents 8305b95a7772
children cb5916bb9685
line source
1 /* -*- Mode:C; c-basic-offset:4; tab-width:4 -*-
2 ****************************************************************************
3 * (C) 2002-2003 - Rolf Neugebauer - Intel Research Cambridge
4 * (C) 2002-2003 - Keir Fraser - University of Cambridge
5 ****************************************************************************
6 *
7 * File: arch/xeno/kernel/time.c
8 * Author: Rolf Neugebauer and Keir Fraser
9 *
10 * Description: Interface with Xen to get correct notion of time
11 */
13 /*
14 * linux/arch/i386/kernel/time.c
15 *
16 * Copyright (C) 1991, 1992, 1995 Linus Torvalds
17 *
18 * This file contains the PC-specific time handling details:
19 * reading the RTC at bootup, etc..
20 * 1994-07-02 Alan Modra
21 * fixed set_rtc_mmss, fixed time.year for >= 2000, new mktime
22 * 1995-03-26 Markus Kuhn
23 * fixed 500 ms bug at call to set_rtc_mmss, fixed DS12887
24 * precision CMOS clock update
25 * 1996-05-03 Ingo Molnar
26 * fixed time warps in do_[slow|fast]_gettimeoffset()
27 * 1997-09-10 Updated NTP code according to technical memorandum Jan '96
28 * "A Kernel Model for Precision Timekeeping" by Dave Mills
29 * 1998-09-05 (Various)
30 * More robust do_fast_gettimeoffset() algorithm implemented
31 * (works with APM, Cyrix 6x86MX and Centaur C6),
32 * monotonic gettimeofday() with fast_get_timeoffset(),
33 * drift-proof precision TSC calibration on boot
34 * (C. Scott Ananian <cananian@alumni.princeton.edu>, Andrew D.
35 * Balsa <andrebalsa@altern.org>, Philip Gladstone <philip@raptor.com>;
36 * ported from 2.0.35 Jumbo-9 by Michael Krause <m.krause@tu-harburg.de>).
37 * 1998-12-16 Andrea Arcangeli
38 * Fixed Jumbo-9 code in 2.1.131: do_gettimeofday was missing 1 jiffy
39 * because was not accounting lost_ticks.
40 * 1998-12-24 Copyright (C) 1998 Andrea Arcangeli
41 * Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
42 * serialize accesses to xtime/lost_ticks).
43 */
45 #include <asm/smp.h>
46 #include <asm/irq.h>
47 #include <asm/msr.h>
48 #include <asm/delay.h>
49 #include <asm/mpspec.h>
50 #include <asm/uaccess.h>
51 #include <asm/processor.h>
53 #include <asm/div64.h>
54 #include <asm/hypervisor.h>
55 #include <asm/hypervisor-ifs/dom0_ops.h>
57 #include <linux/mc146818rtc.h>
58 #include <linux/kernel.h>
59 #include <linux/interrupt.h>
60 #include <linux/time.h>
61 #include <linux/init.h>
62 #include <linux/smp.h>
63 #include <linux/irq.h>
64 #include <linux/sysctl.h>
66 spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED;
67 extern rwlock_t xtime_lock;
68 extern unsigned long wall_jiffies;
70 unsigned long cpu_khz; /* get this from Xen, used elsewhere */
72 static unsigned int rdtsc_bitshift;
73 static u32 st_scale_f; /* convert ticks -> usecs */
74 static u32 st_scale_i; /* convert ticks -> usecs */
76 /* These are peridically updated in shared_info, and then copied here. */
77 static u32 shadow_tsc_stamp;
78 static s64 shadow_system_time;
79 static u32 shadow_time_version;
80 static struct timeval shadow_tv;
82 #ifdef CONFIG_XENO_PRIV
83 /* Periodically propagate synchronised time to the RTC and to Xen. */
84 static long last_rtc_update, last_xen_update;
85 #endif
87 static u64 processed_system_time;
89 #define HANDLE_USEC_UNDERFLOW(_tv) \
90 do { \
91 while ( (_tv).tv_usec < 0 ) \
92 { \
93 (_tv).tv_usec += 1000000; \
94 (_tv).tv_sec--; \
95 } \
96 } while ( 0 )
97 #define HANDLE_USEC_OVERFLOW(_tv) \
98 do { \
99 while ( (_tv).tv_usec >= 1000000 ) \
100 { \
101 (_tv).tv_usec -= 1000000; \
102 (_tv).tv_sec++; \
103 } \
104 } while ( 0 )
107 /* Does this guest OS track Xen time, or set its wall clock independently? */
108 static int independent_wallclock = 0;
109 static int __init __independent_wallclock(char *str)
110 {
111 independent_wallclock = 1;
112 return 1;
113 }
114 __setup("independent_wallclock", __independent_wallclock);
117 #ifdef CONFIG_XENO_PRIV
118 /*
119 * In order to set the CMOS clock precisely, set_rtc_mmss has to be
120 * called 500 ms after the second nowtime has started, because when
121 * nowtime is written into the registers of the CMOS clock, it will
122 * jump to the next second precisely 500 ms later. Check the Motorola
123 * MC146818A or Dallas DS12887 data sheet for details.
124 *
125 * BUG: This routine does not handle hour overflow properly; it just
126 * sets the minutes. Usually you'll only notice that after reboot!
127 */
128 static int set_rtc_mmss(unsigned long nowtime)
129 {
130 int retval = 0;
131 int real_seconds, real_minutes, cmos_minutes;
132 unsigned char save_control, save_freq_select;
134 /* gets recalled with irq locally disabled */
135 spin_lock(&rtc_lock);
136 save_control = CMOS_READ(RTC_CONTROL);
137 CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL);
139 save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
140 CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT);
142 cmos_minutes = CMOS_READ(RTC_MINUTES);
143 if ( !(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD )
144 BCD_TO_BIN(cmos_minutes);
146 /*
147 * since we're only adjusting minutes and seconds, don't interfere with
148 * hour overflow. This avoids messing with unknown time zones but requires
149 * your RTC not to be off by more than 15 minutes
150 */
151 real_seconds = nowtime % 60;
152 real_minutes = nowtime / 60;
153 if ( ((abs(real_minutes - cmos_minutes) + 15)/30) & 1 )
154 real_minutes += 30; /* correct for half hour time zone */
155 real_minutes %= 60;
157 if ( abs(real_minutes - cmos_minutes) < 30 )
158 {
159 if ( !(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD )
160 {
161 BIN_TO_BCD(real_seconds);
162 BIN_TO_BCD(real_minutes);
163 }
164 CMOS_WRITE(real_seconds,RTC_SECONDS);
165 CMOS_WRITE(real_minutes,RTC_MINUTES);
166 }
167 else
168 {
169 printk(KERN_WARNING
170 "set_rtc_mmss: can't update from %d to %d\n",
171 cmos_minutes, real_minutes);
172 retval = -1;
173 }
175 /* The following flags have to be released exactly in this order,
176 * otherwise the DS12887 (popular MC146818A clone with integrated
177 * battery and quartz) will not reset the oscillator and will not
178 * update precisely 500 ms later. You won't find this mentioned in
179 * the Dallas Semiconductor data sheets, but who believes data
180 * sheets anyway ... -- Markus Kuhn
181 */
182 CMOS_WRITE(save_control, RTC_CONTROL);
183 CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
184 spin_unlock(&rtc_lock);
186 return retval;
187 }
188 #endif
191 /* Must be called with the xtime_lock held for writing. */
192 static void get_time_values_from_xen(void)
193 {
194 do {
195 shadow_time_version = HYPERVISOR_shared_info->time_version2;
196 rmb();
197 shadow_tv.tv_sec = HYPERVISOR_shared_info->wc_sec;
198 shadow_tv.tv_usec = HYPERVISOR_shared_info->wc_usec;
199 shadow_tsc_stamp = HYPERVISOR_shared_info->tsc_timestamp;
200 shadow_system_time = HYPERVISOR_shared_info->system_time;
201 rmb();
202 }
203 while ( shadow_time_version != HYPERVISOR_shared_info->time_version1 );
204 }
206 #define TIME_VALUES_UP_TO_DATE \
207 (shadow_time_version == HYPERVISOR_shared_info->time_version2)
210 static inline unsigned long get_time_delta_usecs(void)
211 {
212 s32 delta_tsc;
213 u32 low;
214 u64 delta, tsc;
216 rdtscll(tsc);
217 low = (u32)(tsc >> rdtsc_bitshift);
218 delta_tsc = (s32)(low - shadow_tsc_stamp);
219 if ( unlikely(delta_tsc < 0) ) delta_tsc = 0;
220 delta = ((u64)delta_tsc * st_scale_f);
221 delta >>= 32;
222 delta += ((u64)delta_tsc * st_scale_i);
224 return (unsigned long)delta;
225 }
228 void do_gettimeofday(struct timeval *tv)
229 {
230 unsigned long flags, lost;
231 struct timeval _tv;
233 again:
234 read_lock_irqsave(&xtime_lock, flags);
235 _tv.tv_usec = get_time_delta_usecs();
236 if ( (lost = (jiffies - wall_jiffies)) != 0 )
237 _tv.tv_usec += lost * (1000000 / HZ);
238 _tv.tv_sec = xtime.tv_sec;
239 _tv.tv_usec += xtime.tv_usec;
240 if ( unlikely(!TIME_VALUES_UP_TO_DATE) )
241 {
242 /*
243 * We may have blocked for a long time, rendering our calculations
244 * invalid (e.g. the time delta may have overflowed). Detect that
245 * and recalculate with fresh values.
246 */
247 read_unlock_irqrestore(&xtime_lock, flags);
248 write_lock_irqsave(&xtime_lock, flags);
249 get_time_values_from_xen();
250 write_unlock_irqrestore(&xtime_lock, flags);
251 goto again;
252 }
253 read_unlock_irqrestore(&xtime_lock, flags);
255 HANDLE_USEC_OVERFLOW(_tv);
257 *tv = _tv;
258 }
260 void do_settimeofday(struct timeval *tv)
261 {
262 struct timeval newtv;
264 if ( !independent_wallclock && (start_info.dom_id != 0) )
265 return;
267 write_lock_irq(&xtime_lock);
269 /*
270 * Ensure we don't get blocked for a long time so that our time delta
271 * overflows. If that were to happen then our shadow time values would
272 * be stale, so we can retry with fresh ones.
273 */
274 again:
275 tv->tv_usec -= get_time_delta_usecs();
276 if ( unlikely(!TIME_VALUES_UP_TO_DATE) )
277 {
278 get_time_values_from_xen();
279 goto again;
280 }
282 HANDLE_USEC_UNDERFLOW(*tv);
284 newtv = *tv;
286 tv->tv_usec -= (jiffies - wall_jiffies) * (1000000 / HZ);
287 HANDLE_USEC_UNDERFLOW(*tv);
289 xtime = *tv;
290 time_adjust = 0; /* stop active adjtime() */
291 time_status |= STA_UNSYNC;
292 time_maxerror = NTP_PHASE_LIMIT;
293 time_esterror = NTP_PHASE_LIMIT;
295 #ifdef CONFIG_XENO_PRIV
296 if ( start_info.dom_id == 0 )
297 {
298 dom0_op_t op;
299 last_rtc_update = last_xen_update = 0;
300 op.cmd = DOM0_SETTIME;
301 op.u.settime.secs = newtv.tv_sec;
302 op.u.settime.usecs = newtv.tv_usec;
303 op.u.settime.system_time = shadow_system_time;
304 write_unlock_irq(&xtime_lock);
305 HYPERVISOR_dom0_op(&op);
306 }
307 else
308 #endif
309 {
310 write_unlock_irq(&xtime_lock);
311 }
312 }
314 asmlinkage long sys_stime(int *tptr)
315 {
316 int value;
317 struct timeval tv;
319 if ( !capable(CAP_SYS_TIME) )
320 return -EPERM;
322 if ( get_user(value, tptr) )
323 return -EFAULT;
325 tv.tv_sec = value;
326 tv.tv_usec = 0;
328 do_settimeofday(&tv);
330 return 0;
331 }
333 #define NS_PER_TICK (1000000000ULL/HZ)
334 static inline void do_timer_interrupt(int irq, void *dev_id,
335 struct pt_regs *regs)
336 {
337 s64 delta;
339 get_time_values_from_xen();
341 if ( (delta = (s64)(shadow_system_time - processed_system_time)) < 0 )
342 {
343 printk("Timer ISR: Time went backwards: %lld\n", delta);
344 return;
345 }
347 while ( delta >= NS_PER_TICK )
348 {
349 do_timer(regs);
350 delta -= NS_PER_TICK;
351 processed_system_time += NS_PER_TICK;
352 }
354 if ( !independent_wallclock && ((time_status & STA_UNSYNC) != 0) )
355 {
356 /* Adjust shadow timeval for jiffies that haven't updated xtime yet. */
357 shadow_tv.tv_usec -= (jiffies - wall_jiffies) * (1000000/HZ);
358 HANDLE_USEC_UNDERFLOW(shadow_tv);
360 /* Update our unsynchronised xtime appropriately. */
361 xtime = shadow_tv;
362 }
364 #ifdef CONFIG_XENO_PRIV
365 if ( (start_info.dom_id == 0) && ((time_status & STA_UNSYNC) == 0) )
366 {
367 /* Send synchronised time to Xen approximately every minute. */
368 if ( xtime.tv_sec > (last_xen_update + 60) )
369 {
370 dom0_op_t op;
371 struct timeval tv = xtime;
373 tv.tv_usec += (jiffies - wall_jiffies) * (1000000/HZ);
374 HANDLE_USEC_OVERFLOW(tv);
376 op.cmd = DOM0_SETTIME;
377 op.u.settime.secs = tv.tv_sec;
378 op.u.settime.usecs = tv.tv_usec;
379 op.u.settime.system_time = shadow_system_time;
380 HYPERVISOR_dom0_op(&op);
382 last_xen_update = xtime.tv_sec;
383 }
385 /*
386 * If we have an externally synchronized Linux clock, then update CMOS
387 * clock accordingly every ~11 minutes. Set_rtc_mmss() has to be called
388 * as close as possible to 500 ms before the new second starts.
389 */
390 if ( (xtime.tv_sec > (last_rtc_update + 660)) &&
391 (xtime.tv_usec >= (500000 - ((unsigned) tick) / 2)) &&
392 (xtime.tv_usec <= (500000 + ((unsigned) tick) / 2)) )
393 {
394 if ( set_rtc_mmss(xtime.tv_sec) == 0 )
395 last_rtc_update = xtime.tv_sec;
396 else
397 last_rtc_update = xtime.tv_sec - 600;
398 }
399 }
400 #endif
401 }
403 static void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
404 {
405 write_lock(&xtime_lock);
406 while ( !TIME_VALUES_UP_TO_DATE )
407 do_timer_interrupt(irq, NULL, regs);
408 write_unlock(&xtime_lock);
409 }
411 static struct irqaction irq_timer = {
412 timer_interrupt,
413 SA_INTERRUPT,
414 0,
415 "timer",
416 NULL,
417 NULL
418 };
420 void __init time_init(void)
421 {
422 unsigned long long alarm;
423 u64 __cpu_khz, cpu_freq, scale, scale2;
425 __cpu_khz = HYPERVISOR_shared_info->cpu_freq;
426 do_div(__cpu_khz, 1000);
427 cpu_khz = (u32)__cpu_khz;
428 printk("Xen reported: %lu.%03lu MHz processor.\n",
429 cpu_khz / 1000, cpu_khz % 1000);
431 xtime.tv_sec = HYPERVISOR_shared_info->wc_sec;
432 xtime.tv_usec = HYPERVISOR_shared_info->wc_usec;
433 processed_system_time = shadow_system_time;
435 rdtsc_bitshift = HYPERVISOR_shared_info->rdtsc_bitshift;
436 cpu_freq = HYPERVISOR_shared_info->cpu_freq;
438 scale = 1000000LL << (32 + rdtsc_bitshift);
439 do_div(scale, (u32)cpu_freq);
441 if ( (cpu_freq >> 32) != 0 )
442 {
443 scale2 = 1000000LL << rdtsc_bitshift;
444 do_div(scale2, (u32)(cpu_freq>>32));
445 scale += scale2;
446 }
448 st_scale_f = scale & 0xffffffff;
449 st_scale_i = scale >> 32;
451 setup_irq(TIMER_IRQ, &irq_timer);
453 rdtscll(alarm);
455 clear_bit(_EVENT_TIMER, &HYPERVISOR_shared_info->events);
456 }
459 /*
460 * /proc/sys/xeno: This really belongs in another file. It can stay here for
461 * now however.
462 */
463 static ctl_table xeno_subtable[] = {
464 {1, "independent_wallclock", &independent_wallclock,
465 sizeof(independent_wallclock), 0644, NULL, proc_dointvec},
466 {0}
467 };
468 static ctl_table xeno_table[] = {
469 {123, "xeno", NULL, 0, 0555, xeno_subtable},
470 {0}
471 };
472 static int __init xeno_sysctl_init(void)
473 {
474 (void)register_sysctl_table(xeno_table, 0);
475 return 0;
476 }
477 __initcall(xeno_sysctl_init);