ia64/xen-unstable

changeset 852:8305b95a7772

bitkeeper revision 1.530 (3f9d4b50eBCuXVk_ImRaxIb9MV1j3g)

time.c:
new file
Many files:
Major fixes to time in Xen and Xenolinux. DOM0 can now set the wallclock time, perhaps using ntpd, for all other domains.
author kaf24@scramble.cl.cam.ac.uk
date Mon Oct 27 16:44:00 2003 +0000 (2003-10-27)
parents afd42c11240f
children a8685755bf95
files .rootkeys tools/internal/xi_build.c tools/internal/xi_create.c tools/internal/xi_destroy.c tools/internal/xi_list.c tools/internal/xi_sched_domain.c tools/internal/xi_sched_global.c tools/internal/xi_start.c tools/internal/xi_stop.c tools/internal/xi_usage.c tools/internal/xi_vif_params.c xen/arch/i386/apic.c xen/arch/i386/i8259.c xen/arch/i386/setup.c xen/arch/i386/time.c xen/common/dom0_ops.c xen/common/kernel.c xen/common/schedule.c xen/common/timer.c xen/include/hypervisor-ifs/dom0_ops.h xen/include/hypervisor-ifs/hypervisor-if.h xen/include/xeno/sched.h xen/include/xeno/time.h xen/include/xeno/tqueue.h xenolinux-2.4.22-sparse/arch/xeno/kernel/time.c xenolinux-2.4.22-sparse/kernel/time.c
line diff
     1.1 --- a/.rootkeys	Fri Oct 17 10:22:54 2003 +0000
     1.2 +++ b/.rootkeys	Mon Oct 27 16:44:00 2003 +0000
     1.3 @@ -639,6 +639,7 @@ 3e5a4e686V0nioX2ZpFf056sgvdiQw xenolinux
     1.4  3e5a4e68W_hpMlM3u_-QOKMp3gzcwQ xenolinux-2.4.22-sparse/init/do_mounts.c
     1.5  3e5a4e68TJJavrunYwTAnLRSBxSYqQ xenolinux-2.4.22-sparse/kernel/panic.c
     1.6  3f1056a9LXNTgSzITNh1mb-MIKV1Ng xenolinux-2.4.22-sparse/kernel/printk.c
     1.7 +3f9d4b44247udoqWEgFkaHiWv6Uvyg xenolinux-2.4.22-sparse/kernel/time.c
     1.8  3eba8f878XjouY21EkQBXwYBsPsipQ xenolinux-2.4.22-sparse/lndir-rel
     1.9  3e6e7c1efbQe93xCvOpOVCnXTMmQ5w xenolinux-2.4.22-sparse/mkbuildtree
    1.10  3e5a4e68GxCIaFH4sy01v1wjapetaA xenolinux-2.4.22-sparse/mm/memory.c
     2.1 --- a/tools/internal/xi_build.c	Fri Oct 17 10:22:54 2003 +0000
     2.2 +++ b/tools/internal/xi_build.c	Mon Oct 27 16:44:00 2003 +0000
     2.3 @@ -1,5 +1,4 @@
     2.4  
     2.5 -#include "hypervisor-ifs/dom0_ops.h"
     2.6  #include "dom0_defs.h"
     2.7  #include "mem_defs.h"
     2.8  
     3.1 --- a/tools/internal/xi_create.c	Fri Oct 17 10:22:54 2003 +0000
     3.2 +++ b/tools/internal/xi_create.c	Mon Oct 27 16:44:00 2003 +0000
     3.3 @@ -4,7 +4,6 @@
     3.4   * Usage: <executable> <mem_kb> <os image> <num_vifs> 
     3.5   */
     3.6  
     3.7 -#include <hypervisor-ifs/dom0_ops.h>
     3.8  #include "dom0_defs.h"
     3.9  #include "mem_defs.h"
    3.10  
     4.1 --- a/tools/internal/xi_destroy.c	Fri Oct 17 10:22:54 2003 +0000
     4.2 +++ b/tools/internal/xi_destroy.c	Mon Oct 27 16:44:00 2003 +0000
     4.3 @@ -1,5 +1,4 @@
     4.4  
     4.5 -#include "hypervisor-ifs/dom0_ops.h"
     4.6  #include "dom0_defs.h"
     4.7  #include "mem_defs.h"
     4.8  
     5.1 --- a/tools/internal/xi_list.c	Fri Oct 17 10:22:54 2003 +0000
     5.2 +++ b/tools/internal/xi_list.c	Mon Oct 27 16:44:00 2003 +0000
     5.3 @@ -21,7 +21,6 @@
     5.4   */
     5.5  #define SILENT_ERRORS_FROM_XEN
     5.6  
     5.7 -#include "hypervisor-ifs/dom0_ops.h"
     5.8  #include "dom0_defs.h"
     5.9  #include "mem_defs.h"
    5.10  
     6.1 --- a/tools/internal/xi_sched_domain.c	Fri Oct 17 10:22:54 2003 +0000
     6.2 +++ b/tools/internal/xi_sched_domain.c	Mon Oct 27 16:44:00 2003 +0000
     6.3 @@ -1,5 +1,4 @@
     6.4  
     6.5 -#include "hypervisor-ifs/dom0_ops.h"
     6.6  #include "dom0_defs.h"
     6.7  #include "mem_defs.h"
     6.8  
     7.1 --- a/tools/internal/xi_sched_global.c	Fri Oct 17 10:22:54 2003 +0000
     7.2 +++ b/tools/internal/xi_sched_global.c	Mon Oct 27 16:44:00 2003 +0000
     7.3 @@ -1,5 +1,4 @@
     7.4  
     7.5 -#include "hypervisor-ifs/dom0_ops.h"
     7.6  #include "dom0_defs.h"
     7.7  #include "mem_defs.h"
     7.8  
     8.1 --- a/tools/internal/xi_start.c	Fri Oct 17 10:22:54 2003 +0000
     8.2 +++ b/tools/internal/xi_start.c	Mon Oct 27 16:44:00 2003 +0000
     8.3 @@ -1,5 +1,4 @@
     8.4  
     8.5 -#include "hypervisor-ifs/dom0_ops.h"
     8.6  #include "dom0_defs.h"
     8.7  #include "mem_defs.h"
     8.8  
     9.1 --- a/tools/internal/xi_stop.c	Fri Oct 17 10:22:54 2003 +0000
     9.2 +++ b/tools/internal/xi_stop.c	Mon Oct 27 16:44:00 2003 +0000
     9.3 @@ -1,4 +1,4 @@
     9.4 -#include "hypervisor-ifs/dom0_ops.h"
     9.5 +
     9.6  #include "dom0_defs.h"
     9.7  #include "mem_defs.h"
     9.8  
    10.1 --- a/tools/internal/xi_usage.c	Fri Oct 17 10:22:54 2003 +0000
    10.2 +++ b/tools/internal/xi_usage.c	Mon Oct 27 16:44:00 2003 +0000
    10.3 @@ -1,5 +1,4 @@
    10.4  
    10.5 -#include "hypervisor-ifs/dom0_ops.h"
    10.6  #include "dom0_defs.h"
    10.7  #include "mem_defs.h"
    10.8  
    11.1 --- a/tools/internal/xi_vif_params.c	Fri Oct 17 10:22:54 2003 +0000
    11.2 +++ b/tools/internal/xi_vif_params.c	Mon Oct 27 16:44:00 2003 +0000
    11.3 @@ -1,5 +1,4 @@
    11.4  
    11.5 -#include "hypervisor-ifs/dom0_ops.h"
    11.6  #include "dom0_defs.h"
    11.7  #include "mem_defs.h"
    11.8  
    12.1 --- a/xen/arch/i386/apic.c	Fri Oct 17 10:22:54 2003 +0000
    12.2 +++ b/xen/arch/i386/apic.c	Mon Oct 27 16:44:00 2003 +0000
    12.3 @@ -713,36 +713,10 @@ int reprogram_ac_timer(s_time_t timeout)
    12.4  
    12.5  /*
    12.6   * Local timer interrupt handler.
    12.7 - * here the programmable, accurate timers are executed.
    12.8 - * If we are on CPU0 and we should have updated jiffies, we do this 
    12.9 - * as well and and deal with traditional linux timers. Note, that of 
   12.10 - * the timer APIC on CPU does not go off every 10ms or so the linux 
   12.11 - * timers loose accuracy, but that shouldn't be a problem.
   12.12 + * Here the programmable, accurate timers are executed.
   12.13   */
   12.14 -static s_time_t last_cpu0_tirq = 0;
   12.15 -inline void smp_local_timer_interrupt(struct pt_regs * regs)
   12.16 +inline void smp_local_timer_interrupt(struct pt_regs *regs)
   12.17  {
   12.18 -    int cpu = smp_processor_id();
   12.19 -    s_time_t diff, now;
   12.20 -
   12.21 -    /* if CPU 0 do old timer stuff  */
   12.22 -    if (cpu == 0)
   12.23 -    {
   12.24 -        now = NOW();
   12.25 -        diff = now - last_cpu0_tirq;
   12.26 -
   12.27 -        if (diff <= 0) {
   12.28 -            printk ("System Time went backwards: %lld\n", diff);
   12.29 -            return;
   12.30 -        }
   12.31 -
   12.32 -        while (diff >= MILLISECS(10)) {
   12.33 -            do_timer(regs);
   12.34 -            diff           -= MILLISECS(10);
   12.35 -            last_cpu0_tirq += MILLISECS(10);
   12.36 -        }
   12.37 -    }
   12.38 -    /* call accurate timer function */
   12.39      do_ac_timer();
   12.40  }
   12.41  
    13.1 --- a/xen/arch/i386/i8259.c	Fri Oct 17 10:22:54 2003 +0000
    13.2 +++ b/xen/arch/i386/i8259.c	Mon Oct 27 16:44:00 2003 +0000
    13.3 @@ -468,14 +468,3 @@ void __init init_IRQ(void)
    13.4      setup_irq(2, &irq2);
    13.5  }
    13.6  
    13.7 -/*
    13.8 - * we only need the timer interrupt for callibrating the tsc<->time<->bus cycle
    13.9 - * mappings. After this all timeing related functions should be run of the 
   13.10 - * APIC timers. This function allows us to disable the 
   13.11 - */
   13.12 -void __init disable_pit(void)
   13.13 -{
   13.14 -        printk("Disable PIT. Not needed anymore\n");
   13.15 -        /* This is not the most elegant way, but hey. */
   13.16 -        disable_irq(0);
   13.17 -}
    14.1 --- a/xen/arch/i386/setup.c	Fri Oct 17 10:22:54 2003 +0000
    14.2 +++ b/xen/arch/i386/setup.c	Mon Oct 27 16:44:00 2003 +0000
    14.3 @@ -310,10 +310,7 @@ void __init start_of_day(void)
    14.4      extern void time_init(void);
    14.5      extern void softirq_init(void);
    14.6      extern void timer_bh(void);
    14.7 -    extern void tqueue_bh(void);
    14.8 -    extern void immediate_bh(void);
    14.9      extern void init_timervecs(void);
   14.10 -    extern void disable_pit(void);
   14.11      extern void ac_timer_init(void);
   14.12      extern int  setup_network_devices(void);
   14.13      extern void net_init(void);
   14.14 @@ -366,8 +363,6 @@ void __init start_of_day(void)
   14.15      softirq_init();
   14.16      init_timervecs();
   14.17      init_bh(TIMER_BH, timer_bh);
   14.18 -    init_bh(TQUEUE_BH, tqueue_bh);
   14.19 -    init_bh(IMMEDIATE_BH, immediate_bh);
   14.20      init_apic_mappings(); /* make APICs addressable in our pagetables. */
   14.21  
   14.22  #ifndef CONFIG_SMP    
   14.23 @@ -384,14 +379,15 @@ void __init start_of_day(void)
   14.24                        * fall thru to 8259A if we have to (but slower).
   14.25                        */
   14.26  #endif
   14.27 +
   14.28      initialize_keytable(); /* call back handling for key codes      */
   14.29  
   14.30 -    if ( cpu_has_apic )
   14.31 -        disable_pit();
   14.32 -    else if ( smp_num_cpus != 1 )
   14.33 -        panic("We really need local APICs on SMP machines!");
   14.34 -    else
   14.35 +    if ( !cpu_has_apic )
   14.36 +    {
   14.37          do_timer_lists_from_pit = 1;
   14.38 +        if ( smp_num_cpus != 1 )
   14.39 +            panic("We need local APICs on SMP machines!");
   14.40 +    }
   14.41  
   14.42      ac_timer_init();    /* init accurate timers */
   14.43      init_xeno_time();	/* initialise the time */
    15.1 --- a/xen/arch/i386/time.c	Fri Oct 17 10:22:54 2003 +0000
    15.2 +++ b/xen/arch/i386/time.c	Mon Oct 27 16:44:00 2003 +0000
    15.3 @@ -43,8 +43,10 @@
    15.4  #define TRC(_x)
    15.5  #endif
    15.6  
    15.7 -/* GLOBALS */
    15.8 +extern rwlock_t xtime_lock;
    15.9 +extern unsigned long wall_jiffies;
   15.10  
   15.11 +/* GLOBAL */
   15.12  unsigned long cpu_khz;  /* Detected as we calibrate the TSC */
   15.13  unsigned long ticks_per_usec; /* TSC ticks per microsecond. */
   15.14  spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED;
   15.15 @@ -52,39 +54,22 @@ int timer_ack = 0;
   15.16  int do_timer_lists_from_pit = 0;
   15.17  
   15.18  /* PRIVATE */
   15.19 -
   15.20  static unsigned int    rdtsc_bitshift;  /* Which 32 bits of TSC do we use?   */
   15.21 -static unsigned long   init_cmos_time;  /* RTC time when system time == 0    */
   15.22 -static u64             cpu_freqs[3];    /* Slow/correct/fast CPU frequencies */
   15.23 -static u64             cpu_freq;        /* Currently-selected CPU frequency  */
   15.24 +static u64             cpu_freq;        /* CPU frequency (Hz)                */
   15.25  static u32             st_scale_f;      /* Cycles -> ns, fractional part     */
   15.26  static u32             st_scale_i;      /* Cycles -> ns, integer part        */
   15.27 -static struct ac_timer update_timer;    /* Periodic 'time update' function   */
   15.28 -static spinlock_t      stime_lock;      /* Lock for accessing sys & wc time  */
   15.29 -struct timeval         wall_clock_time; /* WC time at last 'time update'     */
   15.30  static u32             tsc_irq;         /* CPU0's TSC at last 'time update'  */
   15.31  static s_time_t        stime_irq;       /* System time at last 'time update' */
   15.32  
   15.33 -/*
   15.34 - * The scale update period is not a whole number of seconds since we want to
   15.35 - * avoid being in sync with the CMOS update-in-progress flag.
   15.36 - */
   15.37 -#define SCALE_UPDATE_PERIOD MILLISECS(50200)
   15.38 -#define TIME_UPDATE_PERIOD  MILLISECS(200)
   15.39 +static void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
   15.40 +{
   15.41 +    u64 full_tsc;
   15.42  
   15.43 +    write_lock(&xtime_lock);
   15.44  
   15.45 -static inline void do_timer_interrupt(
   15.46 -    int irq, void *dev_id, struct pt_regs *regs)
   15.47 -{
   15.48  #ifdef CONFIG_X86_IO_APIC
   15.49      if ( timer_ack ) 
   15.50      {
   15.51 -        /*
   15.52 -         * Subtle, when I/O APICs are used we have to ack timer IRQ manually 
   15.53 -         * to reset the IRR bit for do_slow_gettimeoffset(). This will also 
   15.54 -         * deassert NMI lines for the watchdog if run on an 82489DX-based 
   15.55 -         * system.
   15.56 -         */
   15.57          extern spinlock_t i8259A_lock;
   15.58          spin_lock(&i8259A_lock);
   15.59          outb(0x0c, 0x20);
   15.60 @@ -93,22 +78,27 @@ static inline void do_timer_interrupt(
   15.61          spin_unlock(&i8259A_lock);
   15.62      }
   15.63  #endif
   15.64 +    
   15.65 +    /*
   15.66 +     * Updates TSC timestamp (used to interpolate passage of time between
   15.67 +     * interrupts).
   15.68 +     */
   15.69 +    rdtscll(full_tsc);
   15.70 +    tsc_irq = (u32)(full_tsc >> rdtsc_bitshift);
   15.71  
   15.72 +    /* Updates xtime (wallclock time). */
   15.73      do_timer(regs);
   15.74  
   15.75 +    /* Updates system time (nanoseconds since boot). */
   15.76 +    stime_irq += MILLISECS(1000/HZ);
   15.77 +
   15.78 +    write_unlock(&xtime_lock);
   15.79 +
   15.80 +    /* Rough hack to allow accurate timers to sort-of-work with no APIC. */
   15.81      if ( do_timer_lists_from_pit )
   15.82          do_ac_timer();
   15.83  }
   15.84  
   15.85 -/*
   15.86 - * This is only temporarily. Once the APIC s up and running this 
   15.87 - * timer interrupt is turned off.
   15.88 - */
   15.89 -static void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
   15.90 -{
   15.91 -    do_timer_interrupt(irq, NULL, regs);
   15.92 -}
   15.93 -
   15.94  static struct irqaction irq0  = { timer_interrupt, SA_INTERRUPT, 0,
   15.95                                    "timer", NULL, NULL};
   15.96  
   15.97 @@ -222,27 +212,6 @@ static unsigned long __get_cmos_time(voi
   15.98      return mktime(year, mon, day, hour, min, sec);
   15.99  }
  15.100  
  15.101 -/* This version is fast: it bails if there's an update in progress. */
  15.102 -static unsigned long maybe_get_cmos_time(void)
  15.103 -{
  15.104 -    unsigned long ct, retval = 0, flags;
  15.105 -
  15.106 -    spin_lock_irqsave(&rtc_lock, flags);
  15.107 -
  15.108 -    if ( (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) )
  15.109 -        goto out;
  15.110 -
  15.111 -    ct = __get_cmos_time();
  15.112 -
  15.113 -    if ( !(CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) )
  15.114 -        retval = ct;
  15.115 -    
  15.116 - out:
  15.117 -    spin_unlock_irqrestore(&rtc_lock, flags);
  15.118 -    return retval;
  15.119 -}
  15.120 -
  15.121 -/* This version spins until it definitely reads a valid time from CMOS RAM. */
  15.122  static unsigned long get_cmos_time(void)
  15.123  {
  15.124      unsigned long res, flags;
  15.125 @@ -264,17 +233,16 @@ static unsigned long get_cmos_time(void)
  15.126      return res;
  15.127  }
  15.128  
  15.129 -
  15.130  /***************************************************************************
  15.131   * System Time
  15.132   ***************************************************************************/
  15.133  
  15.134 -static inline s_time_t __get_s_time(void)
  15.135 +static inline u64 get_time_delta(void)
  15.136  {
  15.137      s32      delta_tsc;
  15.138      u32      low;
  15.139      u64      delta, tsc;
  15.140 -    
  15.141 +
  15.142      rdtscll(tsc);
  15.143      low = (u32)(tsc >> rdtsc_bitshift);
  15.144      delta_tsc = (s32)(low - tsc_irq);
  15.145 @@ -283,162 +251,76 @@ static inline s_time_t __get_s_time(void
  15.146      delta >>= 32;
  15.147      delta += ((u64)delta_tsc * st_scale_i);
  15.148  
  15.149 -    return stime_irq + delta;
  15.150 +    return delta;
  15.151  }
  15.152  
  15.153  s_time_t get_s_time(void)
  15.154  {
  15.155      s_time_t now;
  15.156      unsigned long flags;
  15.157 -    spin_lock_irqsave(&stime_lock, flags);
  15.158 -    now = __get_s_time();
  15.159 -    spin_unlock_irqrestore(&stime_lock, flags);
  15.160 +    read_lock_irqsave(&xtime_lock, flags);
  15.161 +    now = stime_irq + get_time_delta();
  15.162 +    read_unlock_irqrestore(&xtime_lock, flags);
  15.163      return now; 
  15.164  }
  15.165  
  15.166  
  15.167 -void do_gettimeofday(struct timeval *tv)
  15.168 -{
  15.169 -    unsigned long flags;
  15.170 -    unsigned long usec, sec;
  15.171 -
  15.172 -    spin_lock_irqsave(&stime_lock, flags);
  15.173 -    usec = ((unsigned long)(__get_s_time() - stime_irq))/1000;
  15.174 -    sec = wall_clock_time.tv_sec;
  15.175 -    usec += wall_clock_time.tv_usec;
  15.176 -    spin_unlock_irqrestore(&stime_lock, flags);
  15.177 -
  15.178 -    while ( usec >= 1000000 ) 
  15.179 -    {
  15.180 -        usec -= 1000000;
  15.181 -        sec++;
  15.182 -    }
  15.183 -
  15.184 -    tv->tv_sec = sec;
  15.185 -    tv->tv_usec = usec;
  15.186 -}
  15.187 -
  15.188 -void do_settimeofday(struct timeval *tv)
  15.189 -{
  15.190 -    printk("XXX: do_settimeofday not implemented\n");
  15.191 -}
  15.192 -
  15.193 -
  15.194 -/***************************************************************************
  15.195 - * Update times
  15.196 - ***************************************************************************/
  15.197 -
  15.198  void update_dom_time(shared_info_t *si)
  15.199  {
  15.200      unsigned long flags;
  15.201  
  15.202 -    spin_lock_irqsave(&stime_lock, flags);
  15.203 +    read_lock_irqsave(&xtime_lock, flags);
  15.204 +
  15.205 +    si->time_version1++;
  15.206 +    wmb();
  15.207 +
  15.208 +    /* NB. These two values don't actually ever change. */
  15.209      si->cpu_freq       = cpu_freq;
  15.210      si->rdtsc_bitshift = rdtsc_bitshift;
  15.211 -    si->system_time    = stime_irq;
  15.212 -    si->st_timestamp   = tsc_irq;
  15.213 -    si->tv_sec         = wall_clock_time.tv_sec;
  15.214 -    si->tv_usec        = wall_clock_time.tv_usec;
  15.215 -    si->wc_timestamp   = stime_irq;
  15.216 -    si->wc_version++;
  15.217 -    spin_unlock_irqrestore(&stime_lock, flags);
  15.218 -}
  15.219  
  15.220 -/*
  15.221 - * VERY crude way to keep system time from drfiting.
  15.222 - * Update the scaling factor using the RTC
  15.223 - * This is done periodically of it's own timer
  15.224 - * We maintain an array of cpu frequencies.
  15.225 - * - index 0 -> go slower
  15.226 - * - index 1 -> frequency as determined during calibration
  15.227 - * - index 2 -> go faster
  15.228 - * 
  15.229 - * NB2. Note that update_scale is called from update_time with the stime_lock
  15.230 - * still held. This is because we must only slow down cpu_freq at a timebase
  15.231 - * change. If we did it in the middle of an update period then time would
  15.232 - * seem to jump backwards since BASE+OLD_FREQ*DIFF > BASE+NEW_FREQ*DIFF.
  15.233 - */
  15.234 -static void update_scale(void)
  15.235 -{
  15.236 -    unsigned long  cmos_time;
  15.237 -    u32            st, ct;
  15.238 -    s32            dt;
  15.239 -    u64            scale;
  15.240 -    int            freq_index;
  15.241 +    si->system_time    = stime_irq;
  15.242 +    si->tsc_timestamp  = tsc_irq;
  15.243 +    si->wc_sec         = xtime.tv_sec;
  15.244 +    si->wc_usec        = xtime.tv_usec;
  15.245 +    si->wc_usec       += (jiffies - wall_jiffies) * (1000000 / HZ);
  15.246 +    while ( si->wc_usec >= 1000000 )
  15.247 +    {
  15.248 +        si->wc_usec -= 1000000;
  15.249 +        si->wc_sec++;
  15.250 +    }
  15.251  
  15.252 -    if ( (cmos_time = maybe_get_cmos_time()) == 0 )
  15.253 -        return;
  15.254 -
  15.255 -    ct = (u32)(cmos_time - init_cmos_time);
  15.256 -    st = (u32)(stime_irq/SECONDS(1));
  15.257 -    dt = (s32)(ct - st);
  15.258 +    wmb();
  15.259 +    si->time_version2++;
  15.260  
  15.261 -    /* Work out adjustment to scaling factor. Allow +/- 1s drift. */
  15.262 -    if ( dt < -1 ) 
  15.263 -        freq_index = 0;   /* go slower */
  15.264 -    else if ( dt > 1 ) 
  15.265 -        freq_index = 2;   /* go faster */
  15.266 -    else 
  15.267 -        freq_index = 1;   /* correct speed */
  15.268 -
  15.269 -    if ( (dt <= -10) || (dt >= 10) )
  15.270 -        printk("Large time drift (cmos time - system time = %ds)\n", dt);
  15.271 -
  15.272 -    /* set new frequency  */
  15.273 -    cpu_freq = cpu_freqs[freq_index];
  15.274 -
  15.275 -    /* adjust scaling factor */
  15.276 -    scale = 1000000000LL << (32 + rdtsc_bitshift);
  15.277 -    scale /= cpu_freq;
  15.278 -    st_scale_f = scale & 0xffffffff;
  15.279 -    st_scale_i = scale >> 32;
  15.280 +    read_unlock_irqrestore(&xtime_lock, flags);
  15.281  }
  15.282  
  15.283  
  15.284 -static void update_time(unsigned long unused)
  15.285 +/* Set clock to <secs,usecs> after 00:00:00 UTC, 1 January, 1970. */
  15.286 +void do_settime(unsigned long secs, unsigned long usecs, u64 system_time_base)
  15.287  {
  15.288 -    unsigned long  flags;
  15.289 -    s_time_t       new_st;
  15.290 -    unsigned long  usec;
  15.291 -    u64            full_tsc;
  15.292 -    static int     calls_since_scale_update = 0;
  15.293 +    s64 delta;
  15.294 +    long _usecs = (long)usecs;
  15.295  
  15.296 -    spin_lock_irqsave(&stime_lock, flags);
  15.297 +    write_lock_irq(&xtime_lock);
  15.298  
  15.299 -    rdtscll(full_tsc);
  15.300 -    new_st = __get_s_time();
  15.301 +    delta = (s64)(stime_irq - system_time_base);
  15.302  
  15.303 -    /* Update wall clock time. */
  15.304 -    usec = ((unsigned long)(new_st - stime_irq))/1000;
  15.305 -    usec += wall_clock_time.tv_usec;
  15.306 -    while ( usec >= 1000000 ) 
  15.307 +	_usecs += (long)(delta/1000);
  15.308 +	_usecs -= (jiffies - wall_jiffies) * (1000000 / HZ);
  15.309 +
  15.310 +	while ( _usecs < 0 ) 
  15.311      {
  15.312 -        usec -= 1000000;
  15.313 -        wall_clock_time.tv_sec++;
  15.314 -    }
  15.315 -    wall_clock_time.tv_usec = usec;
  15.316 -
  15.317 -    /* Update system time. */
  15.318 -    stime_irq = new_st;
  15.319 -    tsc_irq   = (u32)(full_tsc >> rdtsc_bitshift);
  15.320 +		_usecs += 1000000;
  15.321 +		secs--;
  15.322 +	}
  15.323  
  15.324 -    /* Maybe update our rate to be in sync with the RTC. */
  15.325 -    if ( ++calls_since_scale_update >= 
  15.326 -         (SCALE_UPDATE_PERIOD/TIME_UPDATE_PERIOD) )
  15.327 -    {
  15.328 -        update_scale();
  15.329 -        calls_since_scale_update = 0;
  15.330 -    }
  15.331 +    xtime.tv_sec  = secs;
  15.332 +    xtime.tv_usec = _usecs;
  15.333  
  15.334 -    spin_unlock_irqrestore(&stime_lock, flags);
  15.335 +    write_unlock_irq(&xtime_lock);
  15.336  
  15.337 -    TRC(printk("TIME[%02d] update time: stime_irq=%lld now=%lld,wct=%ld:%ld\n",
  15.338 -               smp_processor_id(), stime_irq, new_st, wall_clock_time.tv_sec,
  15.339 -               wall_clock_time.tv_usec));
  15.340 -
  15.341 -    /* Reload the timer. */
  15.342 -    update_timer.expires = new_st + TIME_UPDATE_PERIOD;
  15.343 -    add_ac_timer(&update_timer);
  15.344 +    update_dom_time(current->shared_info);
  15.345  }
  15.346  
  15.347  
  15.348 @@ -446,21 +328,22 @@ static void update_time(unsigned long un
  15.349  int __init init_xeno_time()
  15.350  {
  15.351      u64      scale;
  15.352 -    s64      freq_off;
  15.353      u64      full_tsc;
  15.354      unsigned int cpu_ghz;
  15.355  
  15.356 -    spin_lock_init(&stime_lock);
  15.357 -
  15.358      cpu_ghz = (unsigned int)(cpu_freq / 1000000000ULL);
  15.359      for ( rdtsc_bitshift = 0; cpu_ghz != 0; rdtsc_bitshift++, cpu_ghz >>= 1 )
  15.360          continue;
  15.361  
  15.362 -    /* Calculate adjusted frequencies: +/- 0.1% */
  15.363 -    freq_off = cpu_freq/1000;
  15.364 -    cpu_freqs[0] = cpu_freq + freq_off;
  15.365 -    cpu_freqs[1] = cpu_freq;
  15.366 -    cpu_freqs[2] = cpu_freq - freq_off;
  15.367 +    /*
  15.368 +     * We actually adjust cpu_freq to be 0.001% slower than the real
  15.369 +     * frequenecy. This makes time run a little bit slower when interpolating
  15.370 +     * the passage of time between periodic interrupts, so we expect a little
  15.371 +     * jump in time whenever an interrupt comes in (roughly 100ns every 10ms).
  15.372 +     * However, this should avoid us considtently running too fast and jumping
  15.373 +     * _backwards_ on each interrupt, which would be much worse!
  15.374 +     */
  15.375 +    cpu_freq = cpu_freq - (cpu_freq / 100000ULL);
  15.376  
  15.377      scale  = 1000000000LL << (32 + rdtsc_bitshift);
  15.378      scale /= cpu_freq;
  15.379 @@ -473,15 +356,8 @@ int __init init_xeno_time()
  15.380      tsc_irq   = (u32)(full_tsc >> rdtsc_bitshift);
  15.381  
  15.382      /* Wallclock time starts as the initial RTC time. */
  15.383 -    wall_clock_time.tv_sec  = init_cmos_time = get_cmos_time();
  15.384 -    wall_clock_time.tv_usec = 0;
  15.385 +    xtime.tv_sec  = get_cmos_time();
  15.386  
  15.387 -    /* Start timer to periodically update time and frequency scale. */
  15.388 -    init_ac_timer(&update_timer, 0);
  15.389 -    update_timer.data = 1;
  15.390 -    update_timer.function = &update_time;
  15.391 -    update_time(0);
  15.392 - 
  15.393      printk("Time init:\n");
  15.394      printk(".... System Time: %lldns\n", 
  15.395             NOW());
  15.396 @@ -490,7 +366,7 @@ int __init init_xeno_time()
  15.397      printk(".... scale:       %08X:%08X\n", 
  15.398             (u32)(scale>>32), (u32)scale);
  15.399      printk(".... Wall Clock:  %lds %ldus\n", 
  15.400 -           wall_clock_time.tv_sec, wall_clock_time.tv_usec);
  15.401 +           xtime.tv_sec, xtime.tv_usec);
  15.402  
  15.403      return 0;
  15.404  }
    16.1 --- a/xen/common/dom0_ops.c	Fri Oct 17 10:22:54 2003 +0000
    16.2 +++ b/xen/common/dom0_ops.c	Mon Oct 27 16:44:00 2003 +0000
    16.3 @@ -284,39 +284,47 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
    16.4  
    16.5      case DOM0_MSR:
    16.6      {
    16.7 -      if (op.u.msr.write)
    16.8 +        if (op.u.msr.write)
    16.9  	{
   16.10 -	  msr_cpu_mask = op.u.msr.cpu_mask;
   16.11 -	  msr_addr = op.u.msr.msr;
   16.12 -	  msr_lo = op.u.msr.in1;
   16.13 -	  msr_hi = op.u.msr.in2;
   16.14 -	  smp_call_function(write_msr_for, NULL, 1, 1);
   16.15 -	  write_msr_for(NULL);
   16.16 +            msr_cpu_mask = op.u.msr.cpu_mask;
   16.17 +            msr_addr = op.u.msr.msr;
   16.18 +            msr_lo = op.u.msr.in1;
   16.19 +            msr_hi = op.u.msr.in2;
   16.20 +            smp_call_function(write_msr_for, NULL, 1, 1);
   16.21 +            write_msr_for(NULL);
   16.22  	}
   16.23 -      else
   16.24 +        else
   16.25  	{
   16.26 -          msr_cpu_mask = op.u.msr.cpu_mask;
   16.27 -          msr_addr = op.u.msr.msr;
   16.28 -	  smp_call_function(read_msr_for, NULL, 1, 1);
   16.29 -	  read_msr_for(NULL);
   16.30 +            msr_cpu_mask = op.u.msr.cpu_mask;
   16.31 +            msr_addr = op.u.msr.msr;
   16.32 +            smp_call_function(read_msr_for, NULL, 1, 1);
   16.33 +            read_msr_for(NULL);
   16.34  
   16.35 -          op.u.msr.out1 = msr_lo;
   16.36 -          op.u.msr.out2 = msr_hi;
   16.37 -	  copy_to_user(u_dom0_op, &op, sizeof(op));
   16.38 +            op.u.msr.out1 = msr_lo;
   16.39 +            op.u.msr.out2 = msr_hi;
   16.40 +            copy_to_user(u_dom0_op, &op, sizeof(op));
   16.41  	}
   16.42 -      ret = 0;
   16.43 +        ret = 0;
   16.44      }
   16.45      break;
   16.46  
   16.47      case DOM0_DEBUG:
   16.48      {
   16.49 -      op.u.debug.out1 = op.u.debug.in2 + 1;
   16.50 -      op.u.debug.out2 = op.u.debug.in1 + 1;
   16.51 -      copy_to_user(u_dom0_op, &op, sizeof(op));
   16.52 -      ret = 0;
   16.53 +        op.u.debug.out1 = op.u.debug.in2 + 1;
   16.54 +        op.u.debug.out2 = op.u.debug.in1 + 1;
   16.55 +        copy_to_user(u_dom0_op, &op, sizeof(op));
   16.56 +        ret = 0;
   16.57      }
   16.58      break;
   16.59  
   16.60 +    case DOM0_SETTIME:
   16.61 +    {
   16.62 +        do_settime(op.u.settime.secs, 
   16.63 +                   op.u.settime.usecs, 
   16.64 +                   op.u.settime.system_time);
   16.65 +        ret = 0;
   16.66 +    }
   16.67 +    break;
   16.68  
   16.69      default:
   16.70          ret = -ENOSYS;
    17.1 --- a/xen/common/kernel.c	Fri Oct 17 10:22:54 2003 +0000
    17.2 +++ b/xen/common/kernel.c	Mon Oct 27 16:44:00 2003 +0000
    17.3 @@ -227,7 +227,6 @@ void cmain (unsigned long magic, multibo
    17.4                         (mod[1].mod_end - mod[1].mod_start):0)
    17.5           != 0 ) panic("Could not set up DOM0 guest OS\n");
    17.6  
    17.7 -    update_dom_time(new_dom->shared_info);
    17.8      wake_up(new_dom);
    17.9  
   17.10      startup_cpu_idle_loop();
    18.1 --- a/xen/common/schedule.c	Fri Oct 17 10:22:54 2003 +0000
    18.2 +++ b/xen/common/schedule.c	Mon Oct 27 16:44:00 2003 +0000
    18.3 @@ -535,6 +535,8 @@ static void virt_timer(unsigned long foo
    18.4      do {
    18.5          if ( is_idle_task(p) ) continue;
    18.6          cpu_mask |= mark_guest_event(p, _EVENT_TIMER);
    18.7 +        if ( p->has_cpu ) 
    18.8 +            update_dom_time(p->shared_info);
    18.9      }
   18.10      while ( (p = p->next_task) != &idle0_task );
   18.11      read_unlock(&tasklist_lock);
    19.1 --- a/xen/common/timer.c	Fri Oct 17 10:22:54 2003 +0000
    19.2 +++ b/xen/common/timer.c	Mon Oct 27 16:44:00 2003 +0000
    19.3 @@ -22,58 +22,15 @@
    19.4  #include <linux/timex.h>
    19.5  #include <linux/tqueue.h>
    19.6  #include <linux/delay.h>
    19.7 -//#include <linux/smp_lock.h>
    19.8  #include <linux/interrupt.h>
    19.9 -//#include <linux/kernel_stat.h>
   19.10  
   19.11  #include <xeno/event.h>
   19.12  
   19.13  #include <asm/uaccess.h>
   19.14  
   19.15 -/*
   19.16 - * Timekeeping variables
   19.17 - */
   19.18 -
   19.19 -long tick = (1000000 + HZ/2) / HZ;	/* timer interrupt period */
   19.20 -
   19.21 -/* The current time */
   19.22  struct timeval xtime __attribute__ ((aligned (16)));
   19.23 -
   19.24 -/* Don't completely fail for HZ > 500.  */
   19.25 -int tickadj = 500/HZ ? : 1;		/* microsecs */
   19.26 -
   19.27 -DECLARE_TASK_QUEUE(tq_timer);
   19.28 -DECLARE_TASK_QUEUE(tq_immediate);
   19.29 -
   19.30 -/*
   19.31 - * phase-lock loop variables
   19.32 - */
   19.33 -/* TIME_ERROR prevents overwriting the CMOS clock */
   19.34 -int time_state = TIME_OK;		/* clock synchronization status	*/
   19.35 -int time_status = STA_UNSYNC;		/* clock status bits		*/
   19.36 -long time_offset;			/* time adjustment (us)		*/
   19.37 -long time_constant = 2;			/* pll time constant		*/
   19.38 -long time_tolerance = MAXFREQ;		/* frequency tolerance (ppm)	*/
   19.39 -long time_precision = 1;		/* clock precision (us)		*/
   19.40 -long time_maxerror = NTP_PHASE_LIMIT;	/* maximum error (us)		*/
   19.41 -long time_esterror = NTP_PHASE_LIMIT;	/* estimated error (us)		*/
   19.42 -long time_phase;			/* phase offset (scaled us)	*/
   19.43 -long time_freq = ((1000000 + HZ/2) % HZ - HZ/2) << SHIFT_USEC;
   19.44 -					/* frequency offset (scaled ppm)*/
   19.45 -long time_adj;				/* tick adjust (scaled 1 / HZ)	*/
   19.46 -long time_reftime;			/* time at last adjustment (s)	*/
   19.47 -
   19.48 -long time_adjust;
   19.49 -long time_adjust_step;
   19.50 -
   19.51 -unsigned long event;
   19.52 -
   19.53  unsigned long volatile jiffies;
   19.54  
   19.55 -unsigned int * prof_buffer;
   19.56 -unsigned long prof_len;
   19.57 -unsigned long prof_shift;
   19.58 -
   19.59  /*
   19.60   * Event timer code
   19.61   */
   19.62 @@ -85,13 +42,13 @@ unsigned long prof_shift;
   19.63  #define TVR_MASK (TVR_SIZE - 1)
   19.64  
   19.65  struct timer_vec {
   19.66 -	int index;
   19.67 -	struct list_head vec[TVN_SIZE];
   19.68 +    int index;
   19.69 +    struct list_head vec[TVN_SIZE];
   19.70  };
   19.71  
   19.72  struct timer_vec_root {
   19.73 -	int index;
   19.74 -	struct list_head vec[TVR_SIZE];
   19.75 +    int index;
   19.76 +    struct list_head vec[TVR_SIZE];
   19.77  };
   19.78  
   19.79  static struct timer_vec tv5;
   19.80 @@ -101,65 +58,65 @@ static struct timer_vec tv2;
   19.81  static struct timer_vec_root tv1;
   19.82  
   19.83  static struct timer_vec * const tvecs[] = {
   19.84 -	(struct timer_vec *)&tv1, &tv2, &tv3, &tv4, &tv5
   19.85 +    (struct timer_vec *)&tv1, &tv2, &tv3, &tv4, &tv5
   19.86  };
   19.87  
   19.88  #define NOOF_TVECS (sizeof(tvecs) / sizeof(tvecs[0]))
   19.89  
   19.90  void init_timervecs (void)
   19.91  {
   19.92 -	int i;
   19.93 +    int i;
   19.94  
   19.95 -	for (i = 0; i < TVN_SIZE; i++) {
   19.96 -		INIT_LIST_HEAD(tv5.vec + i);
   19.97 -		INIT_LIST_HEAD(tv4.vec + i);
   19.98 -		INIT_LIST_HEAD(tv3.vec + i);
   19.99 -		INIT_LIST_HEAD(tv2.vec + i);
  19.100 -	}
  19.101 -	for (i = 0; i < TVR_SIZE; i++)
  19.102 -		INIT_LIST_HEAD(tv1.vec + i);
  19.103 +    for (i = 0; i < TVN_SIZE; i++) {
  19.104 +        INIT_LIST_HEAD(tv5.vec + i);
  19.105 +        INIT_LIST_HEAD(tv4.vec + i);
  19.106 +        INIT_LIST_HEAD(tv3.vec + i);
  19.107 +        INIT_LIST_HEAD(tv2.vec + i);
  19.108 +    }
  19.109 +    for (i = 0; i < TVR_SIZE; i++)
  19.110 +        INIT_LIST_HEAD(tv1.vec + i);
  19.111  }
  19.112  
  19.113  static unsigned long timer_jiffies;
  19.114  
  19.115  static inline void internal_add_timer(struct timer_list *timer)
  19.116  {
  19.117 -	/*
  19.118 -	 * must be cli-ed when calling this
  19.119 -	 */
  19.120 -	unsigned long expires = timer->expires;
  19.121 -	unsigned long idx = expires - timer_jiffies;
  19.122 -	struct list_head * vec;
  19.123 +    /*
  19.124 +     * must be cli-ed when calling this
  19.125 +     */
  19.126 +    unsigned long expires = timer->expires;
  19.127 +    unsigned long idx = expires - timer_jiffies;
  19.128 +    struct list_head * vec;
  19.129  
  19.130 -	if (idx < TVR_SIZE) {
  19.131 -		int i = expires & TVR_MASK;
  19.132 -		vec = tv1.vec + i;
  19.133 -	} else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
  19.134 -		int i = (expires >> TVR_BITS) & TVN_MASK;
  19.135 -		vec = tv2.vec + i;
  19.136 -	} else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {
  19.137 -		int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
  19.138 -		vec =  tv3.vec + i;
  19.139 -	} else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {
  19.140 -		int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;
  19.141 -		vec = tv4.vec + i;
  19.142 -	} else if ((signed long) idx < 0) {
  19.143 -		/* can happen if you add a timer with expires == jiffies,
  19.144 +    if (idx < TVR_SIZE) {
  19.145 +        int i = expires & TVR_MASK;
  19.146 +        vec = tv1.vec + i;
  19.147 +    } else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
  19.148 +        int i = (expires >> TVR_BITS) & TVN_MASK;
  19.149 +        vec = tv2.vec + i;
  19.150 +    } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {
  19.151 +        int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
  19.152 +        vec =  tv3.vec + i;
  19.153 +    } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {
  19.154 +        int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;
  19.155 +        vec = tv4.vec + i;
  19.156 +    } else if ((signed long) idx < 0) {
  19.157 +        /* can happen if you add a timer with expires == jiffies,
  19.158  		 * or you set a timer to go off in the past
  19.159  		 */
  19.160 -		vec = tv1.vec + tv1.index;
  19.161 -	} else if (idx <= 0xffffffffUL) {
  19.162 -		int i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
  19.163 -		vec = tv5.vec + i;
  19.164 -	} else {
  19.165 -		/* Can only get here on architectures with 64-bit jiffies */
  19.166 -		INIT_LIST_HEAD(&timer->list);
  19.167 -		return;
  19.168 -	}
  19.169 -	/*
  19.170 +        vec = tv1.vec + tv1.index;
  19.171 +    } else if (idx <= 0xffffffffUL) {
  19.172 +        int i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
  19.173 +        vec = tv5.vec + i;
  19.174 +    } else {
  19.175 +        /* Can only get here on architectures with 64-bit jiffies */
  19.176 +        INIT_LIST_HEAD(&timer->list);
  19.177 +        return;
  19.178 +    }
  19.179 +    /*
  19.180  	 * Timers are FIFO!
  19.181  	 */
  19.182 -	list_add(&timer->list, vec->prev);
  19.183 +    list_add(&timer->list, vec->prev);
  19.184  }
  19.185  
  19.186  /* Initialize both explicitly - let's try to have them in the same cache line */
  19.187 @@ -178,57 +135,57 @@ volatile struct timer_list * volatile ru
  19.188  
  19.189  void add_timer(struct timer_list *timer)
  19.190  {
  19.191 -	unsigned long flags;
  19.192 +    unsigned long flags;
  19.193  
  19.194 -	spin_lock_irqsave(&timerlist_lock, flags);
  19.195 -	if (timer_pending(timer))
  19.196 -		goto bug;
  19.197 -	internal_add_timer(timer);
  19.198 -	spin_unlock_irqrestore(&timerlist_lock, flags);
  19.199 -	return;
  19.200 -bug:
  19.201 -	spin_unlock_irqrestore(&timerlist_lock, flags);
  19.202 -	printk("bug: kernel timer added twice at %p.\n",
  19.203 -			__builtin_return_address(0));
  19.204 +    spin_lock_irqsave(&timerlist_lock, flags);
  19.205 +    if (timer_pending(timer))
  19.206 +        goto bug;
  19.207 +    internal_add_timer(timer);
  19.208 +    spin_unlock_irqrestore(&timerlist_lock, flags);
  19.209 +    return;
  19.210 + bug:
  19.211 +    spin_unlock_irqrestore(&timerlist_lock, flags);
  19.212 +    printk("bug: kernel timer added twice at %p.\n",
  19.213 +           __builtin_return_address(0));
  19.214  }
  19.215  
  19.216  static inline int detach_timer (struct timer_list *timer)
  19.217  {
  19.218 -	if (!timer_pending(timer))
  19.219 -		return 0;
  19.220 -	list_del(&timer->list);
  19.221 -	return 1;
  19.222 +    if (!timer_pending(timer))
  19.223 +        return 0;
  19.224 +    list_del(&timer->list);
  19.225 +    return 1;
  19.226  }
  19.227  
  19.228  int mod_timer(struct timer_list *timer, unsigned long expires)
  19.229  {
  19.230 -	int ret;
  19.231 -	unsigned long flags;
  19.232 +    int ret;
  19.233 +    unsigned long flags;
  19.234  
  19.235 -	spin_lock_irqsave(&timerlist_lock, flags);
  19.236 -	timer->expires = expires;
  19.237 -	ret = detach_timer(timer);
  19.238 -	internal_add_timer(timer);
  19.239 -	spin_unlock_irqrestore(&timerlist_lock, flags);
  19.240 -	return ret;
  19.241 +    spin_lock_irqsave(&timerlist_lock, flags);
  19.242 +    timer->expires = expires;
  19.243 +    ret = detach_timer(timer);
  19.244 +    internal_add_timer(timer);
  19.245 +    spin_unlock_irqrestore(&timerlist_lock, flags);
  19.246 +    return ret;
  19.247  }
  19.248  
  19.249  int del_timer(struct timer_list * timer)
  19.250  {
  19.251 -	int ret;
  19.252 -	unsigned long flags;
  19.253 +    int ret;
  19.254 +    unsigned long flags;
  19.255  
  19.256 -	spin_lock_irqsave(&timerlist_lock, flags);
  19.257 -	ret = detach_timer(timer);
  19.258 -	timer->list.next = timer->list.prev = NULL;
  19.259 -	spin_unlock_irqrestore(&timerlist_lock, flags);
  19.260 -	return ret;
  19.261 +    spin_lock_irqsave(&timerlist_lock, flags);
  19.262 +    ret = detach_timer(timer);
  19.263 +    timer->list.next = timer->list.prev = NULL;
  19.264 +    spin_unlock_irqrestore(&timerlist_lock, flags);
  19.265 +    return ret;
  19.266  }
  19.267  
  19.268  #ifdef CONFIG_SMP
  19.269  void sync_timers(void)
  19.270  {
  19.271 -	spin_unlock_wait(&global_bh_lock);
  19.272 +    spin_unlock_wait(&global_bh_lock);
  19.273  }
  19.274  
  19.275  /*
  19.276 @@ -241,269 +198,104 @@ void sync_timers(void)
  19.277  
  19.278  int del_timer_sync(struct timer_list * timer)
  19.279  {
  19.280 -	int ret = 0;
  19.281 +    int ret = 0;
  19.282  
  19.283 -	for (;;) {
  19.284 -		unsigned long flags;
  19.285 -		int running;
  19.286 +    for (;;) {
  19.287 +        unsigned long flags;
  19.288 +        int running;
  19.289  
  19.290 -		spin_lock_irqsave(&timerlist_lock, flags);
  19.291 -		ret += detach_timer(timer);
  19.292 -		timer->list.next = timer->list.prev = 0;
  19.293 -		running = timer_is_running(timer);
  19.294 -		spin_unlock_irqrestore(&timerlist_lock, flags);
  19.295 +        spin_lock_irqsave(&timerlist_lock, flags);
  19.296 +        ret += detach_timer(timer);
  19.297 +        timer->list.next = timer->list.prev = 0;
  19.298 +        running = timer_is_running(timer);
  19.299 +        spin_unlock_irqrestore(&timerlist_lock, flags);
  19.300  
  19.301 -		if (!running)
  19.302 -			break;
  19.303 +        if (!running)
  19.304 +            break;
  19.305  
  19.306 -		timer_synchronize(timer);
  19.307 -	}
  19.308 +        timer_synchronize(timer);
  19.309 +    }
  19.310  
  19.311 -	return ret;
  19.312 +    return ret;
  19.313  }
  19.314  #endif
  19.315  
  19.316  
  19.317  static inline void cascade_timers(struct timer_vec *tv)
  19.318  {
  19.319 -	/* cascade all the timers from tv up one level */
  19.320 -	struct list_head *head, *curr, *next;
  19.321 +    /* cascade all the timers from tv up one level */
  19.322 +    struct list_head *head, *curr, *next;
  19.323  
  19.324 -	head = tv->vec + tv->index;
  19.325 -	curr = head->next;
  19.326 -	/*
  19.327 -	 * We are removing _all_ timers from the list, so we don't  have to
  19.328 -	 * detach them individually, just clear the list afterwards.
  19.329 +    head = tv->vec + tv->index;
  19.330 +    curr = head->next;
  19.331 +    /*
  19.332 +     * We are removing _all_ timers from the list, so we don't  have to
  19.333 +     * detach them individually, just clear the list afterwards.
  19.334  	 */
  19.335 -	while (curr != head) {
  19.336 -		struct timer_list *tmp;
  19.337 +    while (curr != head) {
  19.338 +        struct timer_list *tmp;
  19.339  
  19.340 -		tmp = list_entry(curr, struct timer_list, list);
  19.341 -		next = curr->next;
  19.342 -		list_del(curr); /* not needed */
  19.343 -		internal_add_timer(tmp);
  19.344 -		curr = next;
  19.345 -	}
  19.346 -	INIT_LIST_HEAD(head);
  19.347 -	tv->index = (tv->index + 1) & TVN_MASK;
  19.348 +        tmp = list_entry(curr, struct timer_list, list);
  19.349 +        next = curr->next;
  19.350 +        list_del(curr); /* not needed */
  19.351 +        internal_add_timer(tmp);
  19.352 +        curr = next;
  19.353 +    }
  19.354 +    INIT_LIST_HEAD(head);
  19.355 +    tv->index = (tv->index + 1) & TVN_MASK;
  19.356  }
  19.357  
  19.358  static inline void run_timer_list(void)
  19.359  {
  19.360 -	spin_lock_irq(&timerlist_lock);
  19.361 -	while ((long)(jiffies - timer_jiffies) >= 0) {
  19.362 -		struct list_head *head, *curr;
  19.363 -		if (!tv1.index) {
  19.364 -			int n = 1;
  19.365 -			do {
  19.366 -				cascade_timers(tvecs[n]);
  19.367 -			} while (tvecs[n]->index == 1 && ++n < NOOF_TVECS);
  19.368 -		}
  19.369 -repeat:
  19.370 -		head = tv1.vec + tv1.index;
  19.371 -		curr = head->next;
  19.372 -		if (curr != head) {
  19.373 -			struct timer_list *timer;
  19.374 -			void (*fn)(unsigned long);
  19.375 -			unsigned long data;
  19.376 +    spin_lock_irq(&timerlist_lock);
  19.377 +    while ((long)(jiffies - timer_jiffies) >= 0) {
  19.378 +        struct list_head *head, *curr;
  19.379 +        if (!tv1.index) {
  19.380 +            int n = 1;
  19.381 +            do {
  19.382 +                cascade_timers(tvecs[n]);
  19.383 +            } while (tvecs[n]->index == 1 && ++n < NOOF_TVECS);
  19.384 +        }
  19.385 +    repeat:
  19.386 +        head = tv1.vec + tv1.index;
  19.387 +        curr = head->next;
  19.388 +        if (curr != head) {
  19.389 +            struct timer_list *timer;
  19.390 +            void (*fn)(unsigned long);
  19.391 +            unsigned long data;
  19.392  
  19.393 -			timer = list_entry(curr, struct timer_list, list);
  19.394 - 			fn = timer->function;
  19.395 - 			data= timer->data;
  19.396 +            timer = list_entry(curr, struct timer_list, list);
  19.397 +            fn = timer->function;
  19.398 +            data= timer->data;
  19.399  
  19.400 -			detach_timer(timer);
  19.401 -			timer->list.next = timer->list.prev = NULL;
  19.402 -			timer_enter(timer);
  19.403 -			spin_unlock_irq(&timerlist_lock);
  19.404 -			fn(data);
  19.405 -			spin_lock_irq(&timerlist_lock);
  19.406 -			timer_exit();
  19.407 -			goto repeat;
  19.408 -		}
  19.409 -		++timer_jiffies; 
  19.410 -		tv1.index = (tv1.index + 1) & TVR_MASK;
  19.411 -	}
  19.412 -	spin_unlock_irq(&timerlist_lock);
  19.413 +            detach_timer(timer);
  19.414 +            timer->list.next = timer->list.prev = NULL;
  19.415 +            timer_enter(timer);
  19.416 +            spin_unlock_irq(&timerlist_lock);
  19.417 +            fn(data);
  19.418 +            spin_lock_irq(&timerlist_lock);
  19.419 +            timer_exit();
  19.420 +            goto repeat;
  19.421 +        }
  19.422 +        ++timer_jiffies; 
  19.423 +        tv1.index = (tv1.index + 1) & TVR_MASK;
  19.424 +    }
  19.425 +    spin_unlock_irq(&timerlist_lock);
  19.426  }
  19.427  
  19.428  spinlock_t tqueue_lock = SPIN_LOCK_UNLOCKED;
  19.429  
  19.430 -void tqueue_bh(void)
  19.431 -{
  19.432 -	run_task_queue(&tq_timer);
  19.433 -}
  19.434 -
  19.435 -void immediate_bh(void)
  19.436 -{
  19.437 -	run_task_queue(&tq_immediate);
  19.438 -}
  19.439 -
  19.440 -/*
  19.441 - * this routine handles the overflow of the microsecond field
  19.442 - *
  19.443 - * The tricky bits of code to handle the accurate clock support
  19.444 - * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
  19.445 - * They were originally developed for SUN and DEC kernels.
  19.446 - * All the kudos should go to Dave for this stuff.
  19.447 - *
  19.448 - */
  19.449 -static void second_overflow(void)
  19.450 -{
  19.451 -    long ltemp;
  19.452 -
  19.453 -    /* Bump the maxerror field */
  19.454 -    time_maxerror += time_tolerance >> SHIFT_USEC;
  19.455 -    if ( time_maxerror > NTP_PHASE_LIMIT ) {
  19.456 -	time_maxerror = NTP_PHASE_LIMIT;
  19.457 -	time_status |= STA_UNSYNC;
  19.458 -    }
  19.459 -
  19.460 -    /*
  19.461 -     * Leap second processing. If in leap-insert state at
  19.462 -     * the end of the day, the system clock is set back one
  19.463 -     * second; if in leap-delete state, the system clock is
  19.464 -     * set ahead one second. The microtime() routine or
  19.465 -     * external clock driver will insure that reported time
  19.466 -     * is always monotonic. The ugly divides should be
  19.467 -     * replaced.
  19.468 -     */
  19.469 -    switch (time_state) {
  19.470 -
  19.471 -    case TIME_OK:
  19.472 -	if (time_status & STA_INS)
  19.473 -	    time_state = TIME_INS;
  19.474 -	else if (time_status & STA_DEL)
  19.475 -	    time_state = TIME_DEL;
  19.476 -	break;
  19.477 -
  19.478 -    case TIME_INS:
  19.479 -	if (xtime.tv_sec % 86400 == 0) {
  19.480 -	    xtime.tv_sec--;
  19.481 -	    time_state = TIME_OOP;
  19.482 -	    printk(KERN_NOTICE "Clock: inserting leap second 23:59:60 UTC\n");
  19.483 -	}
  19.484 -	break;
  19.485 -
  19.486 -    case TIME_DEL:
  19.487 -	if ((xtime.tv_sec + 1) % 86400 == 0) {
  19.488 -	    xtime.tv_sec++;
  19.489 -	    time_state = TIME_WAIT;
  19.490 -	    printk(KERN_NOTICE "Clock: deleting leap second 23:59:59 UTC\n");
  19.491 -	}
  19.492 -	break;
  19.493 -
  19.494 -    case TIME_OOP:
  19.495 -	time_state = TIME_WAIT;
  19.496 -	break;
  19.497 -
  19.498 -    case TIME_WAIT:
  19.499 -	if (!(time_status & (STA_INS | STA_DEL)))
  19.500 -	    time_state = TIME_OK;
  19.501 -    }
  19.502 -
  19.503 -    /*
  19.504 -     * Compute the phase adjustment for the next second. In
  19.505 -     * PLL mode, the offset is reduced by a fixed factor
  19.506 -     * times the time constant. In FLL mode the offset is
  19.507 -     * used directly. In either mode, the maximum phase
  19.508 -     * adjustment for each second is clamped so as to spread
  19.509 -     * the adjustment over not more than the number of
  19.510 -     * seconds between updates.
  19.511 -     */
  19.512 -    if (time_offset < 0) {
  19.513 -	ltemp = -time_offset;
  19.514 -	if (!(time_status & STA_FLL))
  19.515 -	    ltemp >>= SHIFT_KG + time_constant;
  19.516 -	if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
  19.517 -	    ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
  19.518 -	time_offset += ltemp;
  19.519 -	time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
  19.520 -    } else {
  19.521 -	ltemp = time_offset;
  19.522 -	if (!(time_status & STA_FLL))
  19.523 -	    ltemp >>= SHIFT_KG + time_constant;
  19.524 -	if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
  19.525 -	    ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
  19.526 -	time_offset -= ltemp;
  19.527 -	time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
  19.528 -    }
  19.529 -
  19.530 -    if (ltemp < 0)
  19.531 -	time_adj -= -ltemp >>
  19.532 -	    (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
  19.533 -    else
  19.534 -	time_adj += ltemp >>
  19.535 -	    (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
  19.536 -
  19.537 -#if HZ == 100
  19.538 -    /* Compensate for (HZ==100) != (1 << SHIFT_HZ).
  19.539 -     * Add 25% and 3.125% to get 128.125; => only 0.125% error (p. 14)
  19.540 -     */
  19.541 -    if (time_adj < 0)
  19.542 -	time_adj -= (-time_adj >> 2) + (-time_adj >> 5);
  19.543 -    else
  19.544 -	time_adj += (time_adj >> 2) + (time_adj >> 5);
  19.545 -#endif
  19.546 -}
  19.547 -
  19.548 -/* in the NTP reference this is called "hardclock()" */
  19.549 -static void update_wall_time_one_tick(void)
  19.550 -{
  19.551 -	if ( (time_adjust_step = time_adjust) != 0 ) {
  19.552 -	    /* We are doing an adjtime thing. 
  19.553 -	     *
  19.554 -	     * Prepare time_adjust_step to be within bounds.
  19.555 -	     * Note that a positive time_adjust means we want the clock
  19.556 -	     * to run faster.
  19.557 -	     *
  19.558 -	     * Limit the amount of the step to be in the range
  19.559 -	     * -tickadj .. +tickadj
  19.560 -	     */
  19.561 -	     if (time_adjust > tickadj)
  19.562 -		time_adjust_step = tickadj;
  19.563 -	     else if (time_adjust < -tickadj)
  19.564 -		time_adjust_step = -tickadj;
  19.565 -	     
  19.566 -	    /* Reduce by this step the amount of time left  */
  19.567 -	    time_adjust -= time_adjust_step;
  19.568 -	}
  19.569 -	xtime.tv_usec += tick + time_adjust_step;
  19.570 -	/*
  19.571 -	 * Advance the phase, once it gets to one microsecond, then
  19.572 -	 * advance the tick more.
  19.573 -	 */
  19.574 -	time_phase += time_adj;
  19.575 -	if (time_phase <= -FINEUSEC) {
  19.576 -		long ltemp = -time_phase >> SHIFT_SCALE;
  19.577 -		time_phase += ltemp << SHIFT_SCALE;
  19.578 -		xtime.tv_usec -= ltemp;
  19.579 -	}
  19.580 -	else if (time_phase >= FINEUSEC) {
  19.581 -		long ltemp = time_phase >> SHIFT_SCALE;
  19.582 -		time_phase -= ltemp << SHIFT_SCALE;
  19.583 -		xtime.tv_usec += ltemp;
  19.584 -	}
  19.585 -}
  19.586 -
  19.587 -/*
  19.588 - * Using a loop looks inefficient, but "ticks" is
  19.589 - * usually just one (we shouldn't be losing ticks,
  19.590 - * we're doing this this way mainly for interrupt
  19.591 - * latency reasons, not because we think we'll
  19.592 - * have lots of lost timer ticks
  19.593 - */
  19.594  static void update_wall_time(unsigned long ticks)
  19.595  {
  19.596 -	do {
  19.597 -		ticks--;
  19.598 -		update_wall_time_one_tick();
  19.599 -	} while (ticks);
  19.600 +    do {
  19.601 +        ticks--;
  19.602 +        xtime.tv_usec += 1000000/HZ;
  19.603 +    } while (ticks);
  19.604  
  19.605 -	if (xtime.tv_usec >= 1000000) {
  19.606 -	    xtime.tv_usec -= 1000000;
  19.607 -	    xtime.tv_sec++;
  19.608 -	    second_overflow();
  19.609 -	}
  19.610 +    if (xtime.tv_usec >= 1000000) {
  19.611 +        xtime.tv_usec -= 1000000;
  19.612 +        xtime.tv_sec++;
  19.613 +    }
  19.614  }
  19.615  
  19.616  /* jiffies at the most recent update of wall time */
  19.617 @@ -516,47 +308,31 @@ rwlock_t xtime_lock = RW_LOCK_UNLOCKED;
  19.618  
  19.619  static inline void update_times(void)
  19.620  {
  19.621 -	unsigned long ticks;
  19.622 +    unsigned long ticks;
  19.623  
  19.624 -	/*
  19.625 -	 * update_times() is run from the raw timer_bh handler so we
  19.626 -	 * just know that the irqs are locally enabled and so we don't
  19.627 -	 * need to save/restore the flags of the local CPU here. -arca
  19.628 -	 */
  19.629 -	write_lock_irq(&xtime_lock);
  19.630 +    /*
  19.631 +     * update_times() is run from the raw timer_bh handler so we
  19.632 +     * just know that the irqs are locally enabled and so we don't
  19.633 +     * need to save/restore the flags of the local CPU here. -arca
  19.634 +     */
  19.635 +    write_lock_irq(&xtime_lock);
  19.636  
  19.637 -	ticks = jiffies - wall_jiffies;
  19.638 -	if (ticks) {
  19.639 -		wall_jiffies += ticks;
  19.640 -		update_wall_time(ticks);
  19.641 -	}
  19.642 -	write_unlock_irq(&xtime_lock);
  19.643 +    ticks = jiffies - wall_jiffies;
  19.644 +    if (ticks) {
  19.645 +        wall_jiffies += ticks;
  19.646 +        update_wall_time(ticks);
  19.647 +    }
  19.648 +    write_unlock_irq(&xtime_lock);
  19.649  }
  19.650  
  19.651  void timer_bh(void)
  19.652  {
  19.653 -	update_times();
  19.654 -	run_timer_list();
  19.655 +    update_times();
  19.656 +    run_timer_list();
  19.657  }
  19.658  
  19.659 -#include <xeno/errno.h>
  19.660 -#include <xeno/sched.h>
  19.661 -#include <xeno/lib.h>
  19.662 -#include <xeno/config.h>
  19.663 -#include <xeno/smp.h>
  19.664 -#include <xeno/irq.h>
  19.665 -#include <asm/msr.h>
  19.666 -
  19.667  void do_timer(struct pt_regs *regs)
  19.668  {
  19.669      (*(unsigned long *)&jiffies)++;
  19.670 -
  19.671      mark_bh(TIMER_BH);
  19.672 -    if (TQ_ACTIVE(tq_timer))
  19.673 -        mark_bh(TQUEUE_BH);
  19.674  }
  19.675 -
  19.676 -void get_fast_time(struct timeval * tm)
  19.677 -{
  19.678 -        *tm=xtime;
  19.679 -}
    20.1 --- a/xen/include/hypervisor-ifs/dom0_ops.h	Fri Oct 17 10:22:54 2003 +0000
    20.2 +++ b/xen/include/hypervisor-ifs/dom0_ops.h	Mon Oct 27 16:44:00 2003 +0000
    20.3 @@ -21,7 +21,8 @@
    20.4  #define DOM0_BUILDDOMAIN   13
    20.5  #define DOM0_IOPL          14
    20.6  #define DOM0_MSR           15
    20.7 -#define DOM0_DEBUG         16                          /* pervasive debugger */
    20.8 +#define DOM0_DEBUG         16
    20.9 +#define DOM0_SETTIME       17
   20.10  
   20.11  #define MAX_CMD_LEN       256
   20.12  #define MAX_DOMAIN_NAME    16
   20.13 @@ -118,6 +119,17 @@ typedef struct dom0_debug_st
   20.14  
   20.15  } dom0_debug_t;
   20.16  
   20.17 +/*
   20.18 + * Set clock such that it would read <secs,usecs> after 00:00:00 UTC,
   20.19 + * 1 January, 1970 if the current system time was <system_time>.
   20.20 + */
   20.21 +typedef struct dom0_settime_st
   20.22 +{
   20.23 +    /* IN variables. */
   20.24 +    unsigned long secs, usecs;
   20.25 +    u64 system_time;
   20.26 +} dom0_settime_t;
   20.27 +
   20.28  typedef struct dom0_op_st
   20.29  {
   20.30      unsigned long cmd;
   20.31 @@ -133,6 +145,7 @@ typedef struct dom0_op_st
   20.32          dom0_iopl_t iopl;
   20.33  	dom0_msr_t msr;
   20.34  	dom0_debug_t debug;
   20.35 +	dom0_settime_t settime;
   20.36      }
   20.37      u;
   20.38  } dom0_op_t;
    21.1 --- a/xen/include/hypervisor-ifs/hypervisor-if.h	Fri Oct 17 10:22:54 2003 +0000
    21.2 +++ b/xen/include/hypervisor-ifs/hypervisor-if.h	Mon Oct 27 16:44:00 2003 +0000
    21.3 @@ -232,32 +232,35 @@ typedef struct shared_info_st {
    21.4      /*
    21.5       * Time: The following abstractions are exposed: System Time, Clock Time,
    21.6       * Domain Virtual Time. Domains can access Cycle counter time directly.
    21.7 -     * 
    21.8 -     * The following values are updated periodically (and atomically, from the
    21.9 -     * p.o.v. of the guest OS). Th eguest OS detects this because the wc_version
   21.10 -     * is incremented.
   21.11       */
   21.12 -    u32		       wc_version;      /* a version number for info below */
   21.13 -    unsigned int       rdtsc_bitshift;  /* use bits N:N+31 of TSC          */
   21.14 -    u64		       cpu_freq;        /* to calculate ticks -> real time */
   21.15 -    /* System Time */
   21.16 -    long long	       system_time;     /* in ns */
   21.17 -    unsigned long      st_timestamp;    /* cyclecounter at last update */
   21.18 -    /* Wall Clock Time */
   21.19 -    long	       tv_sec;          /* essentially a struct timeval */
   21.20 -    long	       tv_usec;
   21.21 -    long long	       wc_timestamp;    /* system time at last update */
   21.22 +
   21.23 +    unsigned int       rdtsc_bitshift;  /* tsc_timestamp uses N:N+31 of TSC. */
   21.24 +    u64                cpu_freq;        /* CPU frequency (Hz).               */
   21.25 +
   21.26 +    /*
   21.27 +     * The following values are updated periodically (and not necessarily
   21.28 +     * atomically!). The guest OS detects this because 'time_version1' is
   21.29 +     * incremented just before updating these values, and 'time_version2' is
   21.30 +     * incremented immediately after. See Xenolinux code for an example of how 
   21.31 +     * to read these values safely (arch/xeno/kernel/time.c).
   21.32 +     */
   21.33 +    unsigned long      time_version1;   /* A version number for info below.  */
   21.34 +    unsigned long      time_version2;   /* A version number for info below.  */
   21.35 +    unsigned long      tsc_timestamp;   /* TSC at last update of time vals.  */
   21.36 +    u64                system_time;     /* Time, in nanosecs, since boot.    */
   21.37 +    unsigned long      wc_sec;          /* Secs  00:00:00 UTC, Jan 1, 1970.  */
   21.38 +    unsigned long      wc_usec;         /* Usecs 00:00:00 UTC, Jan 1, 1970.  */
   21.39      
   21.40      /* Domain Virtual Time */
   21.41 -    unsigned long long domain_time;
   21.42 +    u64                domain_time;
   21.43  	
   21.44      /*
   21.45       * Timeout values:
   21.46       * Allow a domain to specify a timeout value in system time and 
   21.47       * domain virtual time.
   21.48       */
   21.49 -    unsigned long long wall_timeout;
   21.50 -    unsigned long long domain_timeout;
   21.51 +    u64                wall_timeout;
   21.52 +    u64                domain_timeout;
   21.53  
   21.54      /*
   21.55       * The index structures are all stored here for convenience. The rings 
    22.1 --- a/xen/include/xeno/sched.h	Fri Oct 17 10:22:54 2003 +0000
    22.2 +++ b/xen/include/xeno/sched.h	Mon Oct 27 16:44:00 2003 +0000
    22.3 @@ -23,6 +23,8 @@
    22.4  extern unsigned long volatile jiffies;
    22.5  extern rwlock_t tasklist_lock;
    22.6  
    22.7 +extern struct timeval xtime;
    22.8 +
    22.9  #include <xeno/spinlock.h>
   22.10  
   22.11  struct mm_struct {
    23.1 --- a/xen/include/xeno/time.h	Fri Oct 17 10:22:54 2003 +0000
    23.2 +++ b/xen/include/xeno/time.h	Mon Oct 27 16:44:00 2003 +0000
    23.3 @@ -50,44 +50,21 @@ extern int init_xeno_time();
    23.4  s_time_t get_s_time(void);
    23.5  
    23.6  #define NOW()				((s_time_t)get_s_time())
    23.7 -#define SECONDS(_s)			(((s_time_t)(_s))  * 1000000000UL )
    23.8 -#define TENTHS(_ts)			(((s_time_t)(_ts)) * 100000000UL )
    23.9 -#define HUNDREDTHS(_hs)		(((s_time_t)(_hs)) * 10000000UL )
   23.10 -#define MILLISECS(_ms)		(((s_time_t)(_ms)) * 1000000UL )
   23.11 -#define MICROSECS(_us)		(((s_time_t)(_us)) * 1000UL )
   23.12 +#define SECONDS(_s)			(((s_time_t)(_s))  * 1000000000ULL )
   23.13 +#define MILLISECS(_ms)		(((s_time_t)(_ms)) * 1000000ULL )
   23.14 +#define MICROSECS(_us)		(((s_time_t)(_us)) * 1000ULL )
   23.15  #define Time_Max			((s_time_t) 0x7fffffffffffffffLL)
   23.16  #define FOREVER				Time_Max
   23.17  
   23.18 -/*
   23.19 - * Wall Clock Time
   23.20 - */
   23.21 +/* Wall Clock Time */
   23.22  struct timeval {
   23.23      long            tv_sec;         /* seconds */
   23.24      long            tv_usec;        /* microseconds */
   23.25  };
   23.26    
   23.27 -struct timezone {
   23.28 -    int     tz_minuteswest; /* minutes west of Greenwich */
   23.29 -    int     tz_dsttime;     /* type of dst correction */
   23.30 -};
   23.31 -
   23.32 -#ifdef __KERNEL__
   23.33 -extern void do_gettimeofday(struct timeval *tv);
   23.34 -extern void do_settimeofday(struct timeval *tv);
   23.35 -extern void get_fast_time(struct timeval *tv);
   23.36 -extern void (*do_get_fast_time)(struct timeval *);
   23.37 -#endif
   23.38 -
   23.39 -/*
   23.40 - * Domain Virtual Time (defined in asm/time.h) 
   23.41 - */
   23.42 -/* XXX Interface for getting and setting still missing */
   23.43 -
   23.44 -
   23.45 -/* update the per domain time information */
   23.46  extern void update_dom_time(shared_info_t *si);
   23.47 -
   23.48 -/* XXX move this  */
   23.49 +extern void do_settime(unsigned long secs, unsigned long usecs, 
   23.50 +                       u64 system_time_base);
   23.51  extern void do_timer(struct pt_regs *regs);
   23.52  
   23.53  #endif /* __XENO_TIME_H__ */
    24.1 --- a/xen/include/xeno/tqueue.h	Fri Oct 17 10:22:54 2003 +0000
    24.2 +++ b/xen/include/xeno/tqueue.h	Mon Oct 27 16:44:00 2003 +0000
    24.3 @@ -66,7 +66,7 @@ typedef struct list_head task_queue;
    24.4  #define DECLARE_TASK_QUEUE(q)	LIST_HEAD(q)
    24.5  #define TQ_ACTIVE(q)		(!list_empty(&q))
    24.6  
    24.7 -extern task_queue tq_timer, tq_immediate, tq_disk;
    24.8 +extern task_queue tq_disk;
    24.9  
   24.10  /*
   24.11   * To implement your own list of active bottom halfs, use the following
    25.1 --- a/xenolinux-2.4.22-sparse/arch/xeno/kernel/time.c	Fri Oct 17 10:22:54 2003 +0000
    25.2 +++ b/xenolinux-2.4.22-sparse/arch/xeno/kernel/time.c	Mon Oct 27 16:44:00 2003 +0000
    25.3 @@ -1,23 +1,13 @@
    25.4  /* -*-  Mode:C; c-basic-offset:4; tab-width:4 -*-
    25.5   ****************************************************************************
    25.6 - * (C) 2002 - Rolf Neugebauer - Intel Research Cambridge
    25.7 + * (C) 2002-2003 - Rolf Neugebauer - Intel Research Cambridge
    25.8 + * (C) 2002-2003 - Keir Fraser - University of Cambridge
    25.9   ****************************************************************************
   25.10   *
   25.11 - *        File: arch.xeno/time.c
   25.12 - *      Author: Rolf Neugebauer
   25.13 - *     Changes: 
   25.14 - *              
   25.15 - *        Date: Nov 2002
   25.16 + *        File: arch/xeno/kernel/time.c
   25.17 + *      Author: Rolf Neugebauer and Keir Fraser
   25.18   * 
   25.19 - * Environment: XenoLinux
   25.20 - * Description: Interface with Hypervisor to get correct notion of time
   25.21 - *              Currently supports Systemtime and WallClock time.
   25.22 - *
   25.23 - * (This has hardly any resemblence with the Linux code but left the
   25.24 - *  copyright notice anyway. Ignore the comments in the copyright notice.)
   25.25 - ****************************************************************************
   25.26 - * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $
   25.27 - ****************************************************************************
   25.28 + * Description: Interface with Xen to get correct notion of time
   25.29   */
   25.30  
   25.31  /*
   25.32 @@ -62,7 +52,9 @@
   25.33  
   25.34  #include <asm/div64.h>
   25.35  #include <asm/hypervisor.h>
   25.36 +#include <asm/hypervisor-ifs/dom0_ops.h>
   25.37  
   25.38 +#include <linux/mc146818rtc.h>
   25.39  #include <linux/kernel.h>
   25.40  #include <linux/interrupt.h>
   25.41  #include <linux/time.h>
   25.42 @@ -70,214 +62,334 @@
   25.43  #include <linux/smp.h>
   25.44  #include <linux/irq.h>
   25.45  
   25.46 -#undef XENO_TIME_DEBUG	/* adds sanity checks and periodic printouts */
   25.47 -
   25.48  spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED;
   25.49  extern rwlock_t xtime_lock;
   25.50 +extern unsigned long wall_jiffies;
   25.51  
   25.52  unsigned long cpu_khz;	/* get this from Xen, used elsewhere */
   25.53 -static spinlock_t hyp_time_lock = SPIN_LOCK_UNLOCKED;
   25.54  
   25.55  static unsigned int rdtsc_bitshift;
   25.56 -static u32 st_scale_f;
   25.57 -static u32 st_scale_i;
   25.58 -static u32 shadow_st_pcc;
   25.59 -static s64 shadow_st;
   25.60 +static u32 st_scale_f; /* convert ticks -> usecs */
   25.61 +static u32 st_scale_i; /* convert ticks -> usecs */
   25.62 +
   25.63 +/* These are peridically updated in shared_info, and then copied here. */
   25.64 +static u32 shadow_tsc_stamp;
   25.65 +static s64 shadow_system_time;
   25.66 +static u32 shadow_time_version;
   25.67 +static struct timeval shadow_tv;
   25.68 +
   25.69 +#ifdef CONFIG_XENO_PRIV
   25.70 +/* Periodically propagate synchronised time to the RTC and to Xen. */
   25.71 +static long last_rtc_update, last_xen_update;
   25.72 +#endif
   25.73 +
   25.74 +static u64 processed_system_time;
   25.75  
   25.76 +#define HANDLE_USEC_UNDERFLOW(_tv)         \
   25.77 +    do {                                   \
   25.78 +        while ( (_tv).tv_usec < 0 )        \
   25.79 +        {                                  \
   25.80 +            (_tv).tv_usec += 1000000;      \
   25.81 +            (_tv).tv_sec--;                \
   25.82 +        }                                  \
   25.83 +    } while ( 0 )
   25.84 +#define HANDLE_USEC_OVERFLOW(_tv)          \
   25.85 +    do {                                   \
   25.86 +        while ( (_tv).tv_usec >= 1000000 ) \
   25.87 +        {                                  \
   25.88 +            (_tv).tv_usec -= 1000000;      \
   25.89 +            (_tv).tv_sec++;                \
   25.90 +        }                                  \
   25.91 +    } while ( 0 )
   25.92 +
   25.93 +
   25.94 +#ifdef CONFIG_XENO_PRIV
   25.95  /*
   25.96 - * System time.
   25.97 - * Although the rest of the Linux kernel doesn't know about this, we
   25.98 - * we use it to extrapolate passage of wallclock time.
   25.99 - * We need to read the values from the shared info page "atomically" 
  25.100 - * and use the cycle counter value as the "version" number. Clashes
  25.101 - * should be very rare.
  25.102 + * In order to set the CMOS clock precisely, set_rtc_mmss has to be
  25.103 + * called 500 ms after the second nowtime has started, because when
  25.104 + * nowtime is written into the registers of the CMOS clock, it will
  25.105 + * jump to the next second precisely 500 ms later. Check the Motorola
  25.106 + * MC146818A or Dallas DS12887 data sheet for details.
  25.107 + *
  25.108 + * BUG: This routine does not handle hour overflow properly; it just
  25.109 + *      sets the minutes. Usually you'll only notice that after reboot!
  25.110   */
  25.111 -static inline s64 __get_s_time(void)
  25.112 +static int set_rtc_mmss(unsigned long nowtime)
  25.113  {
  25.114 -    s32 delta_tsc;
  25.115 -    u32 low;
  25.116 -    u64 delta, tsc;
  25.117 +    int retval = 0;
  25.118 +    int real_seconds, real_minutes, cmos_minutes;
  25.119 +    unsigned char save_control, save_freq_select;
  25.120 +
  25.121 +    /* gets recalled with irq locally disabled */
  25.122 +    spin_lock(&rtc_lock);
  25.123 +    save_control = CMOS_READ(RTC_CONTROL);
  25.124 +    CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL);
  25.125 +
  25.126 +    save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
  25.127 +    CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT);
  25.128 +
  25.129 +    cmos_minutes = CMOS_READ(RTC_MINUTES);
  25.130 +    if ( !(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD )
  25.131 +        BCD_TO_BIN(cmos_minutes);
  25.132 +
  25.133 +    /*
  25.134 +     * since we're only adjusting minutes and seconds, don't interfere with
  25.135 +     * hour overflow. This avoids messing with unknown time zones but requires
  25.136 +     * your RTC not to be off by more than 15 minutes
  25.137 +     */
  25.138 +    real_seconds = nowtime % 60;
  25.139 +    real_minutes = nowtime / 60;
  25.140 +    if ( ((abs(real_minutes - cmos_minutes) + 15)/30) & 1 )
  25.141 +        real_minutes += 30;		/* correct for half hour time zone */
  25.142 +    real_minutes %= 60;
  25.143 +
  25.144 +    if ( abs(real_minutes - cmos_minutes) < 30 )
  25.145 +    {
  25.146 +        if ( !(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD )
  25.147 +        {
  25.148 +            BIN_TO_BCD(real_seconds);
  25.149 +            BIN_TO_BCD(real_minutes);
  25.150 +        }
  25.151 +        CMOS_WRITE(real_seconds,RTC_SECONDS);
  25.152 +        CMOS_WRITE(real_minutes,RTC_MINUTES);
  25.153 +    }
  25.154 +    else 
  25.155 +    {
  25.156 +        printk(KERN_WARNING
  25.157 +               "set_rtc_mmss: can't update from %d to %d\n",
  25.158 +               cmos_minutes, real_minutes);
  25.159 +        retval = -1;
  25.160 +    }
  25.161 +
  25.162 +    /* The following flags have to be released exactly in this order,
  25.163 +     * otherwise the DS12887 (popular MC146818A clone with integrated
  25.164 +     * battery and quartz) will not reset the oscillator and will not
  25.165 +     * update precisely 500 ms later. You won't find this mentioned in
  25.166 +     * the Dallas Semiconductor data sheets, but who believes data
  25.167 +     * sheets anyway ...                           -- Markus Kuhn
  25.168 +     */
  25.169 +    CMOS_WRITE(save_control, RTC_CONTROL);
  25.170 +    CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
  25.171 +    spin_unlock(&rtc_lock);
  25.172 +
  25.173 +    return retval;
  25.174 +}
  25.175 +#endif
  25.176 +
  25.177 +
  25.178 +/* Must be called with the xtime_lock held for writing. */
  25.179 +static void get_time_values_from_xen(void)
  25.180 +{
  25.181 +    do {
  25.182 +        shadow_time_version = HYPERVISOR_shared_info->time_version2;
  25.183 +        rmb();
  25.184 +        shadow_tv.tv_sec    = HYPERVISOR_shared_info->wc_sec;
  25.185 +        shadow_tv.tv_usec   = HYPERVISOR_shared_info->wc_usec;
  25.186 +        shadow_tsc_stamp    = HYPERVISOR_shared_info->tsc_timestamp;
  25.187 +        shadow_system_time  = HYPERVISOR_shared_info->system_time;
  25.188 +        rmb();
  25.189 +    }
  25.190 +    while ( shadow_time_version != HYPERVISOR_shared_info->time_version1 );
  25.191 +}
  25.192 +
  25.193 +#define TIME_VALUES_UP_TO_DATE \
  25.194 +    (shadow_time_version == HYPERVISOR_shared_info->time_version2)
  25.195 +
  25.196 +
  25.197 +static inline unsigned long get_time_delta_usecs(void)
  25.198 +{
  25.199 +    s32      delta_tsc;
  25.200 +    u32      low;
  25.201 +    u64      delta, tsc;
  25.202  
  25.203      rdtscll(tsc);
  25.204      low = (u32)(tsc >> rdtsc_bitshift);
  25.205 -    delta_tsc = (s32)(low - shadow_st_pcc);
  25.206 +    delta_tsc = (s32)(low - shadow_tsc_stamp);
  25.207      if ( unlikely(delta_tsc < 0) ) delta_tsc = 0;
  25.208      delta = ((u64)delta_tsc * st_scale_f);
  25.209      delta >>= 32;
  25.210      delta += ((u64)delta_tsc * st_scale_i);
  25.211  
  25.212 -    return shadow_st + delta;
  25.213 +    return (unsigned long)delta;
  25.214  }
  25.215  
  25.216 -/*
  25.217 - * Wallclock time.
  25.218 - * Based on what the hypervisor tells us, extrapolated using system time.
  25.219 - * Again need to read a number of values from the shared page "atomically".
  25.220 - * this time using a version number.
  25.221 - */
  25.222 -static u32        shadow_wc_version=0;
  25.223 -static long       shadow_tv_sec;
  25.224 -static long       shadow_tv_usec;
  25.225 -static long long  shadow_wc_timestamp;
  25.226 +
  25.227  void do_gettimeofday(struct timeval *tv)
  25.228  {
  25.229 -    unsigned long flags;
  25.230 -    long          usec, sec;
  25.231 -    u32	          version;
  25.232 -    u64           now, cpu_freq, scale;
  25.233 -
  25.234 -    spin_lock_irqsave(&hyp_time_lock, flags);
  25.235 -
  25.236 -    while ( (version = HYPERVISOR_shared_info->wc_version) != 
  25.237 -            shadow_wc_version )
  25.238 -    {
  25.239 -        barrier();
  25.240 -
  25.241 -        shadow_wc_version   = version;
  25.242 -        shadow_tv_sec       = HYPERVISOR_shared_info->tv_sec;
  25.243 -        shadow_tv_usec      = HYPERVISOR_shared_info->tv_usec;
  25.244 -        shadow_wc_timestamp = HYPERVISOR_shared_info->wc_timestamp;
  25.245 -        shadow_st_pcc       = HYPERVISOR_shared_info->st_timestamp;
  25.246 -        shadow_st           = HYPERVISOR_shared_info->system_time;
  25.247 -
  25.248 -        rdtsc_bitshift      = HYPERVISOR_shared_info->rdtsc_bitshift;
  25.249 -        cpu_freq            = HYPERVISOR_shared_info->cpu_freq;
  25.250 -
  25.251 -        /* XXX cpu_freq as u32 limits it to 4.29 GHz. Get a better do_div! */
  25.252 -        scale = 1000000000LL << (32 + rdtsc_bitshift);
  25.253 -        do_div(scale,(u32)cpu_freq);
  25.254 -        st_scale_f = scale & 0xffffffff;
  25.255 -        st_scale_i = scale >> 32;
  25.256 -
  25.257 -        barrier();
  25.258 -	}
  25.259 -
  25.260 -    now   = __get_s_time();
  25.261 -    usec  = ((unsigned long)(now-shadow_wc_timestamp))/1000;
  25.262 -    sec   = shadow_tv_sec;
  25.263 -    usec += shadow_tv_usec;
  25.264 +	unsigned long flags, lost;
  25.265 +    struct timeval _tv;
  25.266  
  25.267 -    while ( usec >= 1000000 ) 
  25.268 + again:
  25.269 +    read_lock_irqsave(&xtime_lock, flags);
  25.270 +    _tv.tv_usec = get_time_delta_usecs();
  25.271 +    if ( (lost = (jiffies - wall_jiffies)) != 0 )
  25.272 +        _tv.tv_usec += lost * (1000000 / HZ);
  25.273 +    _tv.tv_sec   = xtime.tv_sec;
  25.274 +    _tv.tv_usec += xtime.tv_usec;
  25.275 +    if ( unlikely(!TIME_VALUES_UP_TO_DATE) )
  25.276      {
  25.277 -        usec -= 1000000;
  25.278 -        sec++;
  25.279 +        /*
  25.280 +         * We may have blocked for a long time, rendering our calculations
  25.281 +         * invalid (e.g. the time delta may have overflowed). Detect that
  25.282 +         * and recalculate with fresh values.
  25.283 +         */
  25.284 +        read_unlock_irqrestore(&xtime_lock, flags);
  25.285 +        write_lock_irqsave(&xtime_lock, flags);
  25.286 +        get_time_values_from_xen();
  25.287 +        write_unlock_irqrestore(&xtime_lock, flags);
  25.288 +        goto again;
  25.289      }
  25.290 -
  25.291 -    tv->tv_sec = sec;
  25.292 -    tv->tv_usec = usec;
  25.293 -
  25.294 -    spin_unlock_irqrestore(&hyp_time_lock, flags);
  25.295 -
  25.296 -#ifdef XENO_TIME_DEBUG
  25.297 -    {
  25.298 -        static long long old_now=0;
  25.299 -        static long long wct=0, old_wct=0;
  25.300 +    read_unlock_irqrestore(&xtime_lock, flags);
  25.301  
  25.302 -        /* This debug code checks if time increase over two subsequent calls */
  25.303 -        wct=(((long long)sec) * 1000000) + usec;
  25.304 -        /* wall clock time going backwards */
  25.305 -        if ((wct < old_wct) ) {	
  25.306 -            printk("Urgh1: wc diff=%6ld, usec = %ld (0x%lX)\n",
  25.307 -                   (long)(wct-old_wct), usec, usec);		
  25.308 -            printk("       st diff=%lld cur st=0x%016llX old st=0x%016llX\n",
  25.309 -                   now-old_now, now, old_now);
  25.310 -        }
  25.311 +    HANDLE_USEC_OVERFLOW(_tv);
  25.312  
  25.313 -        /* system time going backwards */
  25.314 -        if (now<=old_now) {
  25.315 -            printk("Urgh2: st diff=%lld cur st=0x%016llX old st=0x%016llX\n",
  25.316 -                   now-old_now, now, old_now);
  25.317 -        }
  25.318 -        old_wct  = wct;
  25.319 -        old_now  = now;
  25.320 -    }
  25.321 -#endif
  25.322 +    *tv = _tv;
  25.323  }
  25.324  
  25.325  void do_settimeofday(struct timeval *tv)
  25.326  {
  25.327 -/* XXX RN: should do something special here for dom0 */
  25.328 -#if 0
  25.329 +#ifdef CONFIG_XENO_PRIV
  25.330 +    struct timeval newtv;
  25.331 +    dom0_op_t op;
  25.332 +    
  25.333 +    if ( start_info.dom_id != 0 )
  25.334 +        return;
  25.335 +    
  25.336      write_lock_irq(&xtime_lock);
  25.337 +    
  25.338      /*
  25.339 -     * This is revolting. We need to set "xtime" correctly. However, the
  25.340 -     * value in this location is the value at the most recent update of
  25.341 -     * wall time.  Discover what correction gettimeofday() would have
  25.342 -     * made, and then undo it!
  25.343 +     * Ensure we don't get blocked for a long time so that our time delta
  25.344 +     * overflows. If that were to happen then our shadow time values would
  25.345 +     * be stale, so we can retry with fresh ones.
  25.346       */
  25.347 -    tv->tv_usec -= do_gettimeoffset();
  25.348 + again:
  25.349 +    tv->tv_usec -= get_time_delta_usecs();
  25.350 +    if ( unlikely(!TIME_VALUES_UP_TO_DATE) )
  25.351 +    {
  25.352 +        get_time_values_from_xen();
  25.353 +        goto again;
  25.354 +    }
  25.355 +    
  25.356 +    HANDLE_USEC_UNDERFLOW(*tv);
  25.357 +    
  25.358 +    newtv = *tv;
  25.359 +    
  25.360      tv->tv_usec -= (jiffies - wall_jiffies) * (1000000 / HZ);
  25.361 -
  25.362 -    while ( tv->tv_usec < 0 )
  25.363 -    {
  25.364 -        tv->tv_usec += 1000000;
  25.365 -        tv->tv_sec--;
  25.366 -    }
  25.367 +    HANDLE_USEC_UNDERFLOW(*tv);
  25.368  
  25.369      xtime = *tv;
  25.370      time_adjust = 0;		/* stop active adjtime() */
  25.371      time_status |= STA_UNSYNC;
  25.372      time_maxerror = NTP_PHASE_LIMIT;
  25.373      time_esterror = NTP_PHASE_LIMIT;
  25.374 +
  25.375 +    last_rtc_update = last_xen_update = 0;
  25.376 +
  25.377 +    op.cmd = DOM0_SETTIME;
  25.378 +    op.u.settime.secs        = newtv.tv_sec;
  25.379 +    op.u.settime.usecs       = newtv.tv_usec;
  25.380 +    op.u.settime.system_time = shadow_system_time;
  25.381 +
  25.382      write_unlock_irq(&xtime_lock);
  25.383 +
  25.384 +    HYPERVISOR_dom0_op(&op);
  25.385  #endif
  25.386  }
  25.387  
  25.388 +asmlinkage long sys_stime(int *tptr)
  25.389 +{
  25.390 +	int value;
  25.391 +    struct timeval tv;
  25.392  
  25.393 -/*
  25.394 - * Timer ISR. 
  25.395 - * Unlike normal Linux these don't come in at a fixed rate of HZ. 
  25.396 - * In here we wrok out how often it should have been called and then call
  25.397 - * the architecture independent part (do_timer()) the appropriate number of
  25.398 - * times. A bit of a nasty hack, to keep the "other" notion of wallclock time
  25.399 - * happy.
  25.400 - */
  25.401 -static long long us_per_tick=1000000/HZ;
  25.402 -static long long last_irq;
  25.403 +	if ( !capable(CAP_SYS_TIME) )
  25.404 +		return -EPERM;
  25.405 +
  25.406 +	if ( get_user(value, tptr) )
  25.407 +		return -EFAULT;
  25.408 +
  25.409 +    tv.tv_sec  = value;
  25.410 +    tv.tv_usec = 0;
  25.411 +
  25.412 +    do_settimeofday(&tv);
  25.413 +
  25.414 +	return 0;
  25.415 +}
  25.416 +
  25.417 +#define NS_PER_TICK (1000000000ULL/HZ)
  25.418  static inline void do_timer_interrupt(int irq, void *dev_id,
  25.419                                        struct pt_regs *regs)
  25.420  {
  25.421 -    struct timeval tv;
  25.422 -    long long time, delta;
  25.423 +    s64 delta;
  25.424 +
  25.425 +    get_time_values_from_xen();
  25.426  
  25.427 -    /*
  25.428 -     * The next bit really sucks:
  25.429 -     * Linux not only uses do_gettimeofday() to keep a notion of
  25.430 -     * wallclock time, but also maintains the xtime struct and jiffies.
  25.431 -     * (Even worse some userland code accesses this via the sys_time()
  25.432 -     * system call)
  25.433 -     * Unfortunately, xtime is maintain in the architecture independent
  25.434 -     * part of the timer ISR (./kernel/timer.c sic!). So, although we have
  25.435 -     * perfectly valid notion of wallclock time from the hypervisor we here
  25.436 -     * fake missed timer interrupts so that the arch independent part of
  25.437 -     * the Timer ISR updates jiffies for us *and* once the bh gets run
  25.438 -     * updates xtime accordingly. Yuck!
  25.439 -     */
  25.440 -
  25.441 -    /* Work out the number of jiffy intervals passed and update them. */
  25.442 -    do_gettimeofday(&tv);
  25.443 -    time = (((long long)tv.tv_sec) * 1000000) + tv.tv_usec;
  25.444 -    delta = time - last_irq;
  25.445 -    if (delta <= 0) {
  25.446 -        printk ("Timer ISR: Time went backwards: %lld\n", delta);
  25.447 +    if ( (delta = (s64)(shadow_system_time - processed_system_time)) < 0 )
  25.448 +    {
  25.449 +        printk("Timer ISR: Time went backwards: %lld\n", delta);
  25.450          return;
  25.451      }
  25.452 -    while (delta >= us_per_tick) {
  25.453 +
  25.454 +    while ( delta >= NS_PER_TICK )
  25.455 +    {
  25.456          do_timer(regs);
  25.457 -        delta    -= us_per_tick;
  25.458 -        last_irq += us_per_tick;
  25.459 +        delta -= NS_PER_TICK;
  25.460 +        processed_system_time += NS_PER_TICK;
  25.461 +    }
  25.462 +    
  25.463 +    if ( (time_status & STA_UNSYNC) != 0 )
  25.464 +    {
  25.465 +        /* Adjust shadow timeval for jiffies that haven't updated xtime yet. */
  25.466 +        shadow_tv.tv_usec -= (jiffies - wall_jiffies) * (1000000/HZ);
  25.467 +        HANDLE_USEC_UNDERFLOW(shadow_tv);
  25.468 +
  25.469 +        /* Update our unsynchronised xtime appropriately. */
  25.470 +        xtime = shadow_tv;
  25.471      }
  25.472  
  25.473 -#if 0
  25.474 -    if (!user_mode(regs))
  25.475 -        x86_do_profile(regs->eip);
  25.476 +#ifdef CONFIG_XENO_PRIV
  25.477 +	if ( (start_info.dom_id == 0) && ((time_status & STA_UNSYNC) == 0) )
  25.478 +    {
  25.479 +        /* Send synchronised time to Xen approximately every minute. */
  25.480 +        if ( xtime.tv_sec > (last_xen_update + 60) )
  25.481 +        {
  25.482 +            dom0_op_t op;
  25.483 +            struct timeval tv = xtime;
  25.484 +
  25.485 +            tv.tv_usec += (jiffies - wall_jiffies) * (1000000/HZ);
  25.486 +            HANDLE_USEC_OVERFLOW(tv);
  25.487 +
  25.488 +            op.cmd = DOM0_SETTIME;
  25.489 +            op.u.settime.secs        = tv.tv_sec;
  25.490 +            op.u.settime.usecs       = tv.tv_usec;
  25.491 +            op.u.settime.system_time = shadow_system_time;
  25.492 +            HYPERVISOR_dom0_op(&op);
  25.493 +
  25.494 +            last_xen_update = xtime.tv_sec;
  25.495 +        }
  25.496 +
  25.497 +        /*
  25.498 +         * If we have an externally synchronized Linux clock, then update CMOS
  25.499 +         * clock accordingly every ~11 minutes. Set_rtc_mmss() has to be called
  25.500 +         * as close as possible to 500 ms before the new second starts.
  25.501 +         */
  25.502 +        if ( (xtime.tv_sec > (last_rtc_update + 660)) &&
  25.503 +             (xtime.tv_usec >= (500000 - ((unsigned) tick) / 2)) &&
  25.504 +             (xtime.tv_usec <= (500000 + ((unsigned) tick) / 2)) )
  25.505 +        {
  25.506 +            if ( set_rtc_mmss(xtime.tv_sec) == 0 )
  25.507 +                last_rtc_update = xtime.tv_sec;
  25.508 +            else
  25.509 +                last_rtc_update = xtime.tv_sec - 600;
  25.510 +        }
  25.511 +    }
  25.512  #endif
  25.513  }
  25.514  
  25.515  static void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
  25.516  {
  25.517      write_lock(&xtime_lock);
  25.518 -    do_timer_interrupt(irq, NULL, regs);
  25.519 +    while ( !TIME_VALUES_UP_TO_DATE )
  25.520 +        do_timer_interrupt(irq, NULL, regs);
  25.521      write_unlock(&xtime_lock);
  25.522  }
  25.523  
  25.524 @@ -293,7 +405,7 @@ static struct irqaction irq_timer = {
  25.525  void __init time_init(void)
  25.526  {
  25.527      unsigned long long alarm;
  25.528 -    u64 __cpu_khz;
  25.529 +    u64 __cpu_khz, cpu_freq, scale, scale2;
  25.530  
  25.531      __cpu_khz = HYPERVISOR_shared_info->cpu_freq;
  25.532      do_div(__cpu_khz, 1000);
  25.533 @@ -301,23 +413,29 @@ void __init time_init(void)
  25.534      printk("Xen reported: %lu.%03lu MHz processor.\n", 
  25.535             cpu_khz / 1000, cpu_khz % 1000);
  25.536  
  25.537 -    do_gettimeofday(&xtime);
  25.538 -    last_irq = (((long long)xtime.tv_sec) * 1000000) + xtime.tv_usec;
  25.539 +    xtime.tv_sec = HYPERVISOR_shared_info->wc_sec;
  25.540 +    xtime.tv_usec = HYPERVISOR_shared_info->wc_usec;
  25.541 +    processed_system_time = shadow_system_time;
  25.542 +
  25.543 +    rdtsc_bitshift      = HYPERVISOR_shared_info->rdtsc_bitshift;
  25.544 +    cpu_freq            = HYPERVISOR_shared_info->cpu_freq;
  25.545 +
  25.546 +    scale = 1000000LL << (32 + rdtsc_bitshift);
  25.547 +    do_div(scale, (u32)cpu_freq);
  25.548 +
  25.549 +    if ( (cpu_freq >> 32) != 0 )
  25.550 +    {
  25.551 +        scale2 = 1000000LL << rdtsc_bitshift;
  25.552 +        do_div(scale2, (u32)(cpu_freq>>32));
  25.553 +        scale += scale2;
  25.554 +    }
  25.555 +
  25.556 +    st_scale_f = scale & 0xffffffff;
  25.557 +    st_scale_i = scale >> 32;
  25.558  
  25.559      setup_irq(TIMER_IRQ, &irq_timer);
  25.560  
  25.561 -    /*
  25.562 -     * Start ticker. Note that timing runs of wall clock, not virtual 'domain' 
  25.563 -     * time. This means that clock sshould run at the correct rate. For things 
  25.564 -     * like scheduling, it's not clear whether it matters which sort of time 
  25.565 -     * we use. XXX RN: unimplemented.
  25.566 -     */
  25.567 +    rdtscll(alarm);
  25.568  
  25.569 -    rdtscll(alarm);
  25.570 -#if 0
  25.571 -    alarm += (1000/HZ)*HYPERVISOR_shared_info->ticks_per_ms;
  25.572 -    HYPERVISOR_shared_info->wall_timeout   = alarm;
  25.573 -    HYPERVISOR_shared_info->domain_timeout = ~0ULL;
  25.574 -#endif
  25.575      clear_bit(_EVENT_TIMER, &HYPERVISOR_shared_info->events);
  25.576  }
    26.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    26.2 +++ b/xenolinux-2.4.22-sparse/kernel/time.c	Mon Oct 27 16:44:00 2003 +0000
    26.3 @@ -0,0 +1,415 @@
    26.4 +/*
    26.5 + *  linux/kernel/time.c
    26.6 + *
    26.7 + *  Copyright (C) 1991, 1992  Linus Torvalds
    26.8 + *
    26.9 + *  This file contains the interface functions for the various
   26.10 + *  time related system calls: time, stime, gettimeofday, settimeofday,
   26.11 + *			       adjtime
   26.12 + */
   26.13 +/*
   26.14 + * Modification history kernel/time.c
   26.15 + * 
   26.16 + * 1993-09-02    Philip Gladstone
   26.17 + *      Created file with time related functions from sched.c and adjtimex() 
   26.18 + * 1993-10-08    Torsten Duwe
   26.19 + *      adjtime interface update and CMOS clock write code
   26.20 + * 1995-08-13    Torsten Duwe
   26.21 + *      kernel PLL updated to 1994-12-13 specs (rfc-1589)
   26.22 + * 1999-01-16    Ulrich Windl
   26.23 + *	Introduced error checking for many cases in adjtimex().
   26.24 + *	Updated NTP code according to technical memorandum Jan '96
   26.25 + *	"A Kernel Model for Precision Timekeeping" by Dave Mills
   26.26 + *	Allow time_constant larger than MAXTC(6) for NTP v4 (MAXTC == 10)
   26.27 + *	(Even though the technical memorandum forbids it)
   26.28 + */
   26.29 +
   26.30 +#include <linux/mm.h>
   26.31 +#include <linux/timex.h>
   26.32 +#include <linux/smp_lock.h>
   26.33 +
   26.34 +#include <asm/uaccess.h>
   26.35 +
   26.36 +/* 
   26.37 + * The timezone where the local system is located.  Used as a default by some
   26.38 + * programs who obtain this value by using gettimeofday.
   26.39 + */
   26.40 +struct timezone sys_tz;
   26.41 +
   26.42 +/* The xtime_lock is not only serializing the xtime read/writes but it's also
   26.43 +   serializing all accesses to the global NTP variables now. */
   26.44 +extern rwlock_t xtime_lock;
   26.45 +
   26.46 +#if !defined(__alpha__) && !defined(__ia64__)
   26.47 +
   26.48 +/*
   26.49 + * sys_time() can be implemented in user-level using
   26.50 + * sys_gettimeofday().  Is this for backwards compatibility?  If so,
   26.51 + * why not move it into the appropriate arch directory (for those
   26.52 + * architectures that need it).
   26.53 + *
   26.54 + * XXX This function is NOT 64-bit clean!
   26.55 + */
   26.56 +asmlinkage long sys_time(int * tloc)
   26.57 +{
   26.58 +	struct timeval now; 
   26.59 +	int i; 
   26.60 +
   26.61 +	do_gettimeofday(&now);
   26.62 +	i = now.tv_sec;
   26.63 +	if (tloc) {
   26.64 +		if (put_user(i,tloc))
   26.65 +			i = -EFAULT;
   26.66 +	}
   26.67 +	return i;
   26.68 +}
   26.69 +
   26.70 +#if !defined(CONFIG_XENO)
   26.71 +
   26.72 +/*
   26.73 + * sys_stime() can be implemented in user-level using
   26.74 + * sys_settimeofday().  Is this for backwards compatibility?  If so,
   26.75 + * why not move it into the appropriate arch directory (for those
   26.76 + * architectures that need it).
   26.77 + */
   26.78 + 
   26.79 +asmlinkage long sys_stime(int * tptr)
   26.80 +{
   26.81 +	int value;
   26.82 +
   26.83 +	if (!capable(CAP_SYS_TIME))
   26.84 +		return -EPERM;
   26.85 +	if (get_user(value, tptr))
   26.86 +		return -EFAULT;
   26.87 +	write_lock_irq(&xtime_lock);
   26.88 +	vxtime_lock();
   26.89 +	xtime.tv_sec = value;
   26.90 +	xtime.tv_usec = 0;
   26.91 +	vxtime_unlock();
   26.92 +	time_adjust = 0;	/* stop active adjtime() */
   26.93 +	time_status |= STA_UNSYNC;
   26.94 +	time_maxerror = NTP_PHASE_LIMIT;
   26.95 +	time_esterror = NTP_PHASE_LIMIT;
   26.96 +	write_unlock_irq(&xtime_lock);
   26.97 +	return 0;
   26.98 +}
   26.99 +
  26.100 +#endif
  26.101 +
  26.102 +#endif
  26.103 +
  26.104 +asmlinkage long sys_gettimeofday(struct timeval *tv, struct timezone *tz)
  26.105 +{
  26.106 +	if (tv) {
  26.107 +		struct timeval ktv;
  26.108 +		do_gettimeofday(&ktv);
  26.109 +		if (copy_to_user(tv, &ktv, sizeof(ktv)))
  26.110 +			return -EFAULT;
  26.111 +	}
  26.112 +	if (tz) {
  26.113 +		if (copy_to_user(tz, &sys_tz, sizeof(sys_tz)))
  26.114 +			return -EFAULT;
  26.115 +	}
  26.116 +	return 0;
  26.117 +}
  26.118 +
  26.119 +/*
  26.120 + * Adjust the time obtained from the CMOS to be UTC time instead of
  26.121 + * local time.
  26.122 + * 
  26.123 + * This is ugly, but preferable to the alternatives.  Otherwise we
  26.124 + * would either need to write a program to do it in /etc/rc (and risk
  26.125 + * confusion if the program gets run more than once; it would also be 
  26.126 + * hard to make the program warp the clock precisely n hours)  or
  26.127 + * compile in the timezone information into the kernel.  Bad, bad....
  26.128 + *
  26.129 + *              				- TYT, 1992-01-01
  26.130 + *
  26.131 + * The best thing to do is to keep the CMOS clock in universal time (UTC)
  26.132 + * as real UNIX machines always do it. This avoids all headaches about
  26.133 + * daylight saving times and warping kernel clocks.
  26.134 + */
  26.135 +inline static void warp_clock(void)
  26.136 +{
  26.137 +	write_lock_irq(&xtime_lock);
  26.138 +	vxtime_lock();
  26.139 +	xtime.tv_sec += sys_tz.tz_minuteswest * 60;
  26.140 +	vxtime_unlock();
  26.141 +	write_unlock_irq(&xtime_lock);
  26.142 +}
  26.143 +
  26.144 +/*
  26.145 + * In case for some reason the CMOS clock has not already been running
  26.146 + * in UTC, but in some local time: The first time we set the timezone,
  26.147 + * we will warp the clock so that it is ticking UTC time instead of
  26.148 + * local time. Presumably, if someone is setting the timezone then we
  26.149 + * are running in an environment where the programs understand about
  26.150 + * timezones. This should be done at boot time in the /etc/rc script,
  26.151 + * as soon as possible, so that the clock can be set right. Otherwise,
  26.152 + * various programs will get confused when the clock gets warped.
  26.153 + */
  26.154 +
  26.155 +int do_sys_settimeofday(struct timeval *tv, struct timezone *tz)
  26.156 +{
  26.157 +	static int firsttime = 1;
  26.158 +
  26.159 +	if (!capable(CAP_SYS_TIME))
  26.160 +		return -EPERM;
  26.161 +		
  26.162 +	if (tz) {
  26.163 +		/* SMP safe, global irq locking makes it work. */
  26.164 +		sys_tz = *tz;
  26.165 +		if (firsttime) {
  26.166 +			firsttime = 0;
  26.167 +			if (!tv)
  26.168 +				warp_clock();
  26.169 +		}
  26.170 +	}
  26.171 +	if (tv)
  26.172 +	{
  26.173 +		/* SMP safe, again the code in arch/foo/time.c should
  26.174 +		 * globally block out interrupts when it runs.
  26.175 +		 */
  26.176 +		do_settimeofday(tv);
  26.177 +	}
  26.178 +	return 0;
  26.179 +}
  26.180 +
  26.181 +asmlinkage long sys_settimeofday(struct timeval *tv, struct timezone *tz)
  26.182 +{
  26.183 +	struct timeval	new_tv;
  26.184 +	struct timezone new_tz;
  26.185 +
  26.186 +	if (tv) {
  26.187 +		if (copy_from_user(&new_tv, tv, sizeof(*tv)))
  26.188 +			return -EFAULT;
  26.189 +	}
  26.190 +	if (tz) {
  26.191 +		if (copy_from_user(&new_tz, tz, sizeof(*tz)))
  26.192 +			return -EFAULT;
  26.193 +	}
  26.194 +
  26.195 +	return do_sys_settimeofday(tv ? &new_tv : NULL, tz ? &new_tz : NULL);
  26.196 +}
  26.197 +
  26.198 +long pps_offset;		/* pps time offset (us) */
  26.199 +long pps_jitter = MAXTIME;	/* time dispersion (jitter) (us) */
  26.200 +
  26.201 +long pps_freq;			/* frequency offset (scaled ppm) */
  26.202 +long pps_stabil = MAXFREQ;	/* frequency dispersion (scaled ppm) */
  26.203 +
  26.204 +long pps_valid = PPS_VALID;	/* pps signal watchdog counter */
  26.205 +
  26.206 +int pps_shift = PPS_SHIFT;	/* interval duration (s) (shift) */
  26.207 +
  26.208 +long pps_jitcnt;		/* jitter limit exceeded */
  26.209 +long pps_calcnt;		/* calibration intervals */
  26.210 +long pps_errcnt;		/* calibration errors */
  26.211 +long pps_stbcnt;		/* stability limit exceeded */
  26.212 +
  26.213 +/* hook for a loadable hardpps kernel module */
  26.214 +void (*hardpps_ptr)(struct timeval *);
  26.215 +
  26.216 +/* adjtimex mainly allows reading (and writing, if superuser) of
  26.217 + * kernel time-keeping variables. used by xntpd.
  26.218 + */
  26.219 +int do_adjtimex(struct timex *txc)
  26.220 +{
  26.221 +        long ltemp, mtemp, save_adjust;
  26.222 +	int result;
  26.223 +
  26.224 +	/* In order to modify anything, you gotta be super-user! */
  26.225 +	if (txc->modes && !capable(CAP_SYS_TIME))
  26.226 +		return -EPERM;
  26.227 +		
  26.228 +	/* Now we validate the data before disabling interrupts */
  26.229 +
  26.230 +	if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT)
  26.231 +	  /* singleshot must not be used with any other mode bits */
  26.232 +		if (txc->modes != ADJ_OFFSET_SINGLESHOT)
  26.233 +			return -EINVAL;
  26.234 +
  26.235 +	if (txc->modes != ADJ_OFFSET_SINGLESHOT && (txc->modes & ADJ_OFFSET))
  26.236 +	  /* adjustment Offset limited to +- .512 seconds */
  26.237 +		if (txc->offset <= - MAXPHASE || txc->offset >= MAXPHASE )
  26.238 +			return -EINVAL;	
  26.239 +
  26.240 +	/* if the quartz is off by more than 10% something is VERY wrong ! */
  26.241 +	if (txc->modes & ADJ_TICK)
  26.242 +		if (txc->tick < 900000/HZ || txc->tick > 1100000/HZ)
  26.243 +			return -EINVAL;
  26.244 +
  26.245 +	write_lock_irq(&xtime_lock);
  26.246 +	result = time_state;	/* mostly `TIME_OK' */
  26.247 +
  26.248 +	/* Save for later - semantics of adjtime is to return old value */
  26.249 +	save_adjust = time_adjust;
  26.250 +
  26.251 +#if 0	/* STA_CLOCKERR is never set yet */
  26.252 +	time_status &= ~STA_CLOCKERR;		/* reset STA_CLOCKERR */
  26.253 +#endif
  26.254 +	/* If there are input parameters, then process them */
  26.255 +	if (txc->modes)
  26.256 +	{
  26.257 +	    if (txc->modes & ADJ_STATUS)	/* only set allowed bits */
  26.258 +		time_status =  (txc->status & ~STA_RONLY) |
  26.259 +			      (time_status & STA_RONLY);
  26.260 +
  26.261 +	    if (txc->modes & ADJ_FREQUENCY) {	/* p. 22 */
  26.262 +		if (txc->freq > MAXFREQ || txc->freq < -MAXFREQ) {
  26.263 +		    result = -EINVAL;
  26.264 +		    goto leave;
  26.265 +		}
  26.266 +		time_freq = txc->freq - pps_freq;
  26.267 +	    }
  26.268 +
  26.269 +	    if (txc->modes & ADJ_MAXERROR) {
  26.270 +		if (txc->maxerror < 0 || txc->maxerror >= NTP_PHASE_LIMIT) {
  26.271 +		    result = -EINVAL;
  26.272 +		    goto leave;
  26.273 +		}
  26.274 +		time_maxerror = txc->maxerror;
  26.275 +	    }
  26.276 +
  26.277 +	    if (txc->modes & ADJ_ESTERROR) {
  26.278 +		if (txc->esterror < 0 || txc->esterror >= NTP_PHASE_LIMIT) {
  26.279 +		    result = -EINVAL;
  26.280 +		    goto leave;
  26.281 +		}
  26.282 +		time_esterror = txc->esterror;
  26.283 +	    }
  26.284 +
  26.285 +	    if (txc->modes & ADJ_TIMECONST) {	/* p. 24 */
  26.286 +		if (txc->constant < 0) {	/* NTP v4 uses values > 6 */
  26.287 +		    result = -EINVAL;
  26.288 +		    goto leave;
  26.289 +		}
  26.290 +		time_constant = txc->constant;
  26.291 +	    }
  26.292 +
  26.293 +	    if (txc->modes & ADJ_OFFSET) {	/* values checked earlier */
  26.294 +		if (txc->modes == ADJ_OFFSET_SINGLESHOT) {
  26.295 +		    /* adjtime() is independent from ntp_adjtime() */
  26.296 +		    time_adjust = txc->offset;
  26.297 +		}
  26.298 +		else if ( time_status & (STA_PLL | STA_PPSTIME) ) {
  26.299 +		    ltemp = (time_status & (STA_PPSTIME | STA_PPSSIGNAL)) ==
  26.300 +		            (STA_PPSTIME | STA_PPSSIGNAL) ?
  26.301 +		            pps_offset : txc->offset;
  26.302 +
  26.303 +		    /*
  26.304 +		     * Scale the phase adjustment and
  26.305 +		     * clamp to the operating range.
  26.306 +		     */
  26.307 +		    if (ltemp > MAXPHASE)
  26.308 +		        time_offset = MAXPHASE << SHIFT_UPDATE;
  26.309 +		    else if (ltemp < -MAXPHASE)
  26.310 +			time_offset = -(MAXPHASE << SHIFT_UPDATE);
  26.311 +		    else
  26.312 +		        time_offset = ltemp << SHIFT_UPDATE;
  26.313 +
  26.314 +		    /*
  26.315 +		     * Select whether the frequency is to be controlled
  26.316 +		     * and in which mode (PLL or FLL). Clamp to the operating
  26.317 +		     * range. Ugly multiply/divide should be replaced someday.
  26.318 +		     */
  26.319 +
  26.320 +		    if (time_status & STA_FREQHOLD || time_reftime == 0)
  26.321 +		        time_reftime = xtime.tv_sec;
  26.322 +		    mtemp = xtime.tv_sec - time_reftime;
  26.323 +		    time_reftime = xtime.tv_sec;
  26.324 +		    if (time_status & STA_FLL) {
  26.325 +		        if (mtemp >= MINSEC) {
  26.326 +			    ltemp = (time_offset / mtemp) << (SHIFT_USEC -
  26.327 +							      SHIFT_UPDATE);
  26.328 +			    if (ltemp < 0)
  26.329 +			        time_freq -= -ltemp >> SHIFT_KH;
  26.330 +			    else
  26.331 +			        time_freq += ltemp >> SHIFT_KH;
  26.332 +			} else /* calibration interval too short (p. 12) */
  26.333 +				result = TIME_ERROR;
  26.334 +		    } else {	/* PLL mode */
  26.335 +		        if (mtemp < MAXSEC) {
  26.336 +			    ltemp *= mtemp;
  26.337 +			    if (ltemp < 0)
  26.338 +			        time_freq -= -ltemp >> (time_constant +
  26.339 +							time_constant +
  26.340 +							SHIFT_KF - SHIFT_USEC);
  26.341 +			    else
  26.342 +			        time_freq += ltemp >> (time_constant +
  26.343 +						       time_constant +
  26.344 +						       SHIFT_KF - SHIFT_USEC);
  26.345 +			} else /* calibration interval too long (p. 12) */
  26.346 +				result = TIME_ERROR;
  26.347 +		    }
  26.348 +		    if (time_freq > time_tolerance)
  26.349 +		        time_freq = time_tolerance;
  26.350 +		    else if (time_freq < -time_tolerance)
  26.351 +		        time_freq = -time_tolerance;
  26.352 +		} /* STA_PLL || STA_PPSTIME */
  26.353 +	    } /* txc->modes & ADJ_OFFSET */
  26.354 +	    if (txc->modes & ADJ_TICK) {
  26.355 +		/* if the quartz is off by more than 10% something is
  26.356 +		   VERY wrong ! */
  26.357 +		if (txc->tick < 900000/HZ || txc->tick > 1100000/HZ) {
  26.358 +		    result = -EINVAL;
  26.359 +		    goto leave;
  26.360 +		}
  26.361 +		tick = txc->tick;
  26.362 +	    }
  26.363 +	} /* txc->modes */
  26.364 +leave:	if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0
  26.365 +	    || ((time_status & (STA_PPSFREQ|STA_PPSTIME)) != 0
  26.366 +		&& (time_status & STA_PPSSIGNAL) == 0)
  26.367 +	    /* p. 24, (b) */
  26.368 +	    || ((time_status & (STA_PPSTIME|STA_PPSJITTER))
  26.369 +		== (STA_PPSTIME|STA_PPSJITTER))
  26.370 +	    /* p. 24, (c) */
  26.371 +	    || ((time_status & STA_PPSFREQ) != 0
  26.372 +		&& (time_status & (STA_PPSWANDER|STA_PPSERROR)) != 0))
  26.373 +	    /* p. 24, (d) */
  26.374 +		result = TIME_ERROR;
  26.375 +	
  26.376 +	if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT)
  26.377 +	    txc->offset	   = save_adjust;
  26.378 +	else {
  26.379 +	    if (time_offset < 0)
  26.380 +		txc->offset = -(-time_offset >> SHIFT_UPDATE);
  26.381 +	    else
  26.382 +		txc->offset = time_offset >> SHIFT_UPDATE;
  26.383 +	}
  26.384 +	txc->freq	   = time_freq + pps_freq;
  26.385 +	txc->maxerror	   = time_maxerror;
  26.386 +	txc->esterror	   = time_esterror;
  26.387 +	txc->status	   = time_status;
  26.388 +	txc->constant	   = time_constant;
  26.389 +	txc->precision	   = time_precision;
  26.390 +	txc->tolerance	   = time_tolerance;
  26.391 +	txc->tick	   = tick;
  26.392 +	txc->ppsfreq	   = pps_freq;
  26.393 +	txc->jitter	   = pps_jitter >> PPS_AVG;
  26.394 +	txc->shift	   = pps_shift;
  26.395 +	txc->stabil	   = pps_stabil;
  26.396 +	txc->jitcnt	   = pps_jitcnt;
  26.397 +	txc->calcnt	   = pps_calcnt;
  26.398 +	txc->errcnt	   = pps_errcnt;
  26.399 +	txc->stbcnt	   = pps_stbcnt;
  26.400 +	write_unlock_irq(&xtime_lock);
  26.401 +	do_gettimeofday(&txc->time);
  26.402 +	return(result);
  26.403 +}
  26.404 +
  26.405 +asmlinkage long sys_adjtimex(struct timex *txc_p)
  26.406 +{
  26.407 +	struct timex txc;		/* Local copy of parameter */
  26.408 +	int ret;
  26.409 +
  26.410 +	/* Copy the user data space into the kernel copy
  26.411 +	 * structure. But bear in mind that the structures
  26.412 +	 * may change
  26.413 +	 */
  26.414 +	if(copy_from_user(&txc, txc_p, sizeof(struct timex)))
  26.415 +		return -EFAULT;
  26.416 +	ret = do_adjtimex(&txc);
  26.417 +	return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : ret;
  26.418 +}