ia64/xen-unstable

changeset 113:b0d356ed774b

bitkeeper revision 1.23 (3e3c0371lW0pQGn9j_7hdF_Skyc2Sg)

Half way merge with Boris recent dom creation checkin (demonstrating my inability
to control BK tools)
author rn@wyvis.camb.intel-research.net
date Sat Feb 01 17:27:13 2003 +0000 (2003-02-01)
parents 033b5b739c6f 3b0238779bd5
children 350c8a939f8d
files .rootkeys BitKeeper/etc/logging_ok xen-2.4.16/arch/i386/apic.c xen-2.4.16/arch/i386/entry.S xen-2.4.16/arch/i386/setup.c xen-2.4.16/arch/i386/time.c xen-2.4.16/common/ac_timer.c xen-2.4.16/common/domain.c xen-2.4.16/common/lib.c xen-2.4.16/common/schedule.c xen-2.4.16/common/timer.c xen-2.4.16/include/asm-i386/apic.h xen-2.4.16/include/asm-i386/time.h xen-2.4.16/include/hypervisor-ifs/hypervisor-if.h xen-2.4.16/include/xeno/ac_timer.h xen-2.4.16/include/xeno/sched.h xen-2.4.16/include/xeno/time.h xen-2.4.16/include/xeno/timer.h xenolinux-2.4.16-sparse/arch/xeno/kernel/i386_ksyms.c xenolinux-2.4.16-sparse/arch/xeno/kernel/process.c xenolinux-2.4.16-sparse/arch/xeno/kernel/time.c xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor.h
line diff
     1.1 --- a/.rootkeys	Fri Jan 31 15:03:37 2003 +0000
     1.2 +++ b/.rootkeys	Sat Feb 01 17:27:13 2003 +0000
     1.3 @@ -35,6 +35,7 @@ 3ddb79bcOftONV9h4QCxXOfiT0h91w xen-2.4.1
     1.4  3ddb79bc4nTpGQOe6_-MbyZzkhlhFQ xen-2.4.16/arch/i386/usercopy.c
     1.5  3ddb79bcOMCu9-5mKpjIh5d0qqBDPg xen-2.4.16/arch/i386/xeno.lds
     1.6  3ddb79bdff-gj-jFGKjOejeHLqL8Lg xen-2.4.16/common/Makefile
     1.7 +3e397e66AyyD5fYraAySWuwi9uqSXg xen-2.4.16/common/ac_timer.c
     1.8  3ddb79bddEYJbcURvqqcx99Yl2iAhQ xen-2.4.16/common/block.c
     1.9  3ddb79bdrqnW93GR9gZk1OJe1qK-iQ xen-2.4.16/common/brlock.c
    1.10  3ddb79bdLX_P6iB7ILiblRLWvebapg xen-2.4.16/common/dom0_ops.c
    1.11 @@ -47,6 +48,7 @@ 3ddb79bdS39UXxUtZnaScie83-7VTQ xen-2.4.1
    1.12  3ddb79bdN51qpRC-6bOH-v5hl_AK6A xen-2.4.16/common/network.c
    1.13  3ddb79bdD4SLmmdMD7yLW5HcUWucXw xen-2.4.16/common/page_alloc.c
    1.14  3ddb79bdHqdQpATqC0rmUZNbsb6L6A xen-2.4.16/common/resource.c
    1.15 +3e397e6619PgAfBbw2XFbXkewvUWgw xen-2.4.16/common/schedule.c
    1.16  3ddb79bdB9RNMnkQnUyZ5C9hhMSQQw xen-2.4.16/common/slab.c
    1.17  3ddb79bd0gVQYmL2zvuJnldvD0AGxQ xen-2.4.16/common/softirq.c
    1.18  3ddb79bdQqFHtHRGEO2dsxGgo6eAhw xen-2.4.16/common/timer.c
    1.19 @@ -158,6 +160,7 @@ 3ddb79c3jn8ALV_S9W5aeTYUQRKBpg xen-2.4.1
    1.20  3ddb79c3e9DCEoR-WzNxcOQDzLu7BQ xen-2.4.16/include/asm-i386/softirq.h
    1.21  3ddb79c3NiyQE2vQnyGiaBnNjBO1rA xen-2.4.16/include/asm-i386/spinlock.h
    1.22  3ddb79c3ezddh34MdelJpa5tNR00Dw xen-2.4.16/include/asm-i386/system.h
    1.23 +3e397e66xPNc8eaSqC9pPbyAtRGzHA xen-2.4.16/include/asm-i386/time.h
    1.24  3ddb79c4HugMq7IYGxcQKFBpKwKhzA xen-2.4.16/include/asm-i386/types.h
    1.25  3ddb79c3M2n1ROZH6xk3HbyN4CPDqg xen-2.4.16/include/asm-i386/uaccess.h
    1.26  3ddb79c3uPGcP_l_2xyGgBSWd5aC-Q xen-2.4.16/include/asm-i386/unaligned.h
    1.27 @@ -168,6 +171,7 @@ 3ddb79c4qbCoOFHrv9sCGshbWzBVlQ xen-2.4.1
    1.28  3ddb79c4R4iVwqIIeychVQYmIH4FUg xen-2.4.16/include/scsi/scsi_ioctl.h
    1.29  3ddb79c4yw_mfd4Uikn3v_IOPRpa1Q xen-2.4.16/include/scsi/scsicam.h
    1.30  3ddb79c4HKPMLvDBP9LxzPi_szVxGA xen-2.4.16/include/scsi/sg.h
    1.31 +3e397e66m2tO3s-J8Jnr7Ws_tGoPTg xen-2.4.16/include/xeno/ac_timer.h
    1.32  3ddb79c0nTsjSpVK4ZVTI9WwN24xtQ xen-2.4.16/include/xeno/blk.h
    1.33  3ddb79c0dVhTHLsv6CPTf4baKix4mA xen-2.4.16/include/xeno/blkdev.h
    1.34  3ddb79c18ePBgitnOs7GiOCFilODVw xen-2.4.16/include/xeno/blkpg.h
     2.1 --- a/BitKeeper/etc/logging_ok	Fri Jan 31 15:03:37 2003 +0000
     2.2 +++ b/BitKeeper/etc/logging_ok	Sat Feb 01 17:27:13 2003 +0000
     2.3 @@ -6,5 +6,6 @@ kaf24@labyrinth.cl.cam.ac.uk
     2.4  kaf24@plym.cl.cam.ac.uk
     2.5  kaf24@striker.cl.cam.ac.uk
     2.6  lynx@idefix.cl.cam.ac.uk
     2.7 +rn@wyvis.camb.intel-research.net
     2.8  smh22@boulderdash.cl.cam.ac.uk
     2.9  smh22@uridium.cl.cam.ac.uk
     3.1 --- a/xen-2.4.16/arch/i386/apic.c	Fri Jan 31 15:03:37 2003 +0000
     3.2 +++ b/xen-2.4.16/arch/i386/apic.c	Sat Feb 01 17:27:13 2003 +0000
     3.3 @@ -1,3 +1,23 @@
     3.4 +/* -*-  Mode:C; c-basic-offset:4; tab-width:4 -*-
     3.5 + ****************************************************************************
     3.6 + * (C) 2002 - Rolf Neugebauer - Intel Research Cambridge
     3.7 + ****************************************************************************
     3.8 + *
     3.9 + *        File: apic.c
    3.10 + *      Author: 
    3.11 + *     Changes: 
    3.12 + *              
    3.13 + *        Date: Nov 2002
    3.14 + * 
    3.15 + * Environment: Xen Hypervisor
    3.16 + * Description: programmable APIC timer interface for accurate timers
    3.17 + *              modified version of Linux' apic.c
    3.18 + *
    3.19 + ****************************************************************************
    3.20 + * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $
    3.21 + ****************************************************************************
    3.22 + */
    3.23 +
    3.24  /*
    3.25   *	Local APIC handling, local APIC timers
    3.26   *
    3.27 @@ -10,6 +30,7 @@
    3.28   *					for testing these extensively.
    3.29   */
    3.30  
    3.31 +
    3.32  #include <xeno/config.h>
    3.33  #include <xeno/init.h>
    3.34  #include <xeno/sched.h>
    3.35 @@ -25,6 +46,17 @@
    3.36  #include <asm/pgalloc.h>
    3.37  #include <asm/hardirq.h>
    3.38  
    3.39 +#include <xeno/ac_timer.h>
    3.40 +
    3.41 +
    3.42 +#undef APIC_TIME_TRACE
    3.43 +#ifdef APIC_TIME_TRACE
    3.44 +#define TRC(_x) _x
    3.45 +#else
    3.46 +#define TRC(_x)
    3.47 +#endif
    3.48 +
    3.49 +
    3.50  /* Using APIC to generate smp_local_timer_interrupt? */
    3.51  int using_apic_timer = 0;
    3.52  
    3.53 @@ -39,7 +71,7 @@ int get_maxlvt(void)
    3.54      return maxlvt;
    3.55  }
    3.56  
    3.57 -void clear_local_APIC(void)
    3.58 +static void clear_local_APIC(void)
    3.59  {
    3.60      int maxlvt;
    3.61      unsigned long v;
    3.62 @@ -313,7 +345,6 @@ void __init setup_local_APIC (void)
    3.63       */
    3.64      value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
    3.65      if (!smp_processor_id()) { 
    3.66 -/* && (pic_mode || !value)) { */
    3.67          value = APIC_DM_EXTINT;
    3.68          printk("enabled ExtINT on CPU#%d\n", smp_processor_id());
    3.69      } else {
    3.70 @@ -340,11 +371,9 @@ void __init setup_local_APIC (void)
    3.71          value = apic_read(APIC_ESR);
    3.72          printk("ESR value before enabling vector: %08lx\n", value);
    3.73  
    3.74 -        value = ERROR_APIC_VECTOR;      // enables sending errors
    3.75 +        value = ERROR_APIC_VECTOR;      /* enables sending errors */
    3.76          apic_write_around(APIC_LVTERR, value);
    3.77 -        /*
    3.78 -         * spec says clear errors after enabling vector.
    3.79 -         */
    3.80 +        /* spec says clear errors after enabling vector. */
    3.81          if (maxlvt > 3)
    3.82              apic_write(APIC_ESR, 0);
    3.83          value = apic_read(APIC_ESR);
    3.84 @@ -416,9 +445,7 @@ static int __init detect_init_APIC (void
    3.85      boot_cpu_physical_apicid = 0;
    3.86  
    3.87      printk("Found and enabled local APIC!\n");
    3.88 -
    3.89      apic_pm_init1();
    3.90 -
    3.91      return 0;
    3.92  
    3.93   no_apic:
    3.94 @@ -467,14 +494,24 @@ void __init init_apic_mappings(void)
    3.95  #endif
    3.96  }
    3.97  
    3.98 -/*
    3.99 - * This part sets up the APIC 32 bit clock in LVTT1, with HZ interrupts
   3.100 - * per second. We assume that the caller has already set up the local
   3.101 - * APIC.
   3.102 - *
   3.103 - * The APIC timer is not exactly sync with the external timer chip, it
   3.104 - * closely follows bus clocks.
   3.105 - */
   3.106 +/*****************************************************************************
   3.107 + * APIC calibration
   3.108 + * 
   3.109 + * The APIC is programmed in bus cycles.
   3.110 + * Timeout values should specified in real time units.
   3.111 + * The "cheapest" time source is the cyclecounter.
   3.112 + * 
   3.113 + * Thus, we need a mappings from: bus cycles <- cycle counter <- system time
   3.114 + * 
   3.115 + * The calibration is currently a bit shoddy since it requires the external
   3.116 + * timer chip to generate periodic timer interupts. 
   3.117 + *****************************************************************************/
   3.118 +
   3.119 +/* used for system time scaling */
   3.120 +static unsigned int bus_freq;
   3.121 +static u32          bus_cycle;   /* length of one bus cycle in pico-seconds */
   3.122 +static u32          bus_scale;   /* scaling factor convert ns to bus cycles */
   3.123 +
   3.124  
   3.125  /*
   3.126   * The timer chip is already set up at HZ interrupts per second here,
   3.127 @@ -485,17 +522,12 @@ static unsigned int __init get_8254_time
   3.128  {
   3.129      /*extern spinlock_t i8253_lock;*/
   3.130      /*unsigned long flags;*/
   3.131 -
   3.132      unsigned int count;
   3.133 -
   3.134      /*spin_lock_irqsave(&i8253_lock, flags);*/
   3.135 -
   3.136      outb_p(0x00, 0x43);
   3.137      count = inb_p(0x40);
   3.138      count |= inb_p(0x40) << 8;
   3.139 -
   3.140      /*spin_unlock_irqrestore(&i8253_lock, flags);*/
   3.141 -
   3.142      return count;
   3.143  }
   3.144  
   3.145 @@ -503,112 +535,67 @@ void __init wait_8254_wraparound(void)
   3.146  {
   3.147      unsigned int curr_count, prev_count=~0;
   3.148      int delta;
   3.149 -
   3.150      curr_count = get_8254_timer_count();
   3.151 -
   3.152      do {
   3.153          prev_count = curr_count;
   3.154          curr_count = get_8254_timer_count();
   3.155          delta = curr_count-prev_count;
   3.156 -
   3.157  	/*
   3.158  	 * This limit for delta seems arbitrary, but it isn't, it's
   3.159  	 * slightly above the level of error a buggy Mercury/Neptune
   3.160  	 * chipset timer can cause.
   3.161  	 */
   3.162 -
   3.163      } while (delta < 300);
   3.164  }
   3.165  
   3.166  /*
   3.167   * This function sets up the local APIC timer, with a timeout of
   3.168   * 'clocks' APIC bus clock. During calibration we actually call
   3.169 - * this function twice on the boot CPU, once with a bogus timeout
   3.170 - * value, second time for real. The other (noncalibrating) CPUs
   3.171 - * call this function only once, with the real, calibrated value.
   3.172 + * this function with a very large value and read the current time after
   3.173 + * a well defined period of time as expired.
   3.174 + *
   3.175 + * Calibration is only performed once, for CPU0!
   3.176   *
   3.177   * We do reads before writes even if unnecessary, to get around the
   3.178   * P5 APIC double write bug.
   3.179   */
   3.180 -
   3.181 -#define APIC_DIVISOR 16
   3.182 -
   3.183 -void __setup_APIC_LVTT(unsigned int clocks)
   3.184 +#define APIC_DIVISOR 1
   3.185 +static void __setup_APIC_LVTT(unsigned int clocks)
   3.186  {
   3.187      unsigned int lvtt1_value, tmp_value;
   3.188 -
   3.189 -    lvtt1_value = SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV) |
   3.190 -        APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR;
   3.191 +    lvtt1_value = SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV)|LOCAL_TIMER_VECTOR;
   3.192      apic_write_around(APIC_LVTT, lvtt1_value);
   3.193 -
   3.194 -    /*
   3.195 -     * Divide PICLK by 16
   3.196 -     */
   3.197      tmp_value = apic_read(APIC_TDCR);
   3.198 -    apic_write_around(APIC_TDCR, (tmp_value
   3.199 -                                  & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE))
   3.200 -                      | APIC_TDR_DIV_16);
   3.201 -
   3.202 +    apic_write_around(APIC_TDCR, (tmp_value | APIC_TDR_DIV_1));
   3.203      apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR);
   3.204  }
   3.205  
   3.206 +/*
   3.207 + * this is done for every CPU from setup_APIC_clocks() below.
   3.208 + * We setup each local APIC with a zero timeout value for now.
   3.209 + * Unlike Linux, we don't have to wait for slices etc.
   3.210 + */
   3.211  void setup_APIC_timer(void * data)
   3.212  {
   3.213 -    unsigned int clocks = (unsigned int) data, slice, t0, t1;
   3.214      unsigned long flags;
   3.215 -    int delta;
   3.216 -
   3.217      __save_flags(flags);
   3.218      __sti();
   3.219 -    /*
   3.220 -     * ok, Intel has some smart code in their APIC that knows
   3.221 -     * if a CPU was in 'hlt' lowpower mode, and this increases
   3.222 -     * its APIC arbitration priority. To avoid the external timer
   3.223 -     * IRQ APIC event being in synchron with the APIC clock we
   3.224 -     * introduce an interrupt skew to spread out timer events.
   3.225 -     *
   3.226 -     * The number of slices within a 'big' timeslice is smp_num_cpus+1
   3.227 -     */
   3.228 -
   3.229 -    slice = clocks / (smp_num_cpus+1);
   3.230 -    printk("cpu: %d, clocks: %d, slice: %d\n",
   3.231 -           smp_processor_id(), clocks, slice);
   3.232 -
   3.233 -    /*
   3.234 -     * Wait for IRQ0's slice:
   3.235 -     */
   3.236 -    wait_8254_wraparound();
   3.237 -
   3.238 -    __setup_APIC_LVTT(clocks);
   3.239 -
   3.240 -    t0 = apic_read(APIC_TMICT)*APIC_DIVISOR;
   3.241 -    /* Wait till TMCCT gets reloaded from TMICT... */
   3.242 -    do {
   3.243 -        t1 = apic_read(APIC_TMCCT)*APIC_DIVISOR;
   3.244 -        delta = (int)(t0 - t1 - slice*(smp_processor_id()+1));
   3.245 -    } while (delta >= 0);
   3.246 -    /* Now wait for our slice for real. */
   3.247 -    do {
   3.248 -        t1 = apic_read(APIC_TMCCT)*APIC_DIVISOR;
   3.249 -        delta = (int)(t0 - t1 - slice*(smp_processor_id()+1));
   3.250 -    } while (delta < 0);
   3.251 -
   3.252 -    __setup_APIC_LVTT(clocks);
   3.253 -
   3.254 -    printk("CPU%d<T0:%d,T1:%d,D:%d,S:%d,C:%d>\n",
   3.255 -           smp_processor_id(), t0, t1, delta, slice, clocks);
   3.256 -
   3.257 +    printk("cpu: %d: setup timer.", smp_processor_id());
   3.258 +    __setup_APIC_LVTT(0);
   3.259 +    printk("done\n");
   3.260      __restore_flags(flags);
   3.261  }
   3.262  
   3.263  /*
   3.264   * In this function we calibrate APIC bus clocks to the external timer.
   3.265   *
   3.266 - * We want to do the calibration only once since we
   3.267 - * want to have local timer irqs syncron. CPUs connected
   3.268 - * by the same APIC bus have the very same bus frequency.
   3.269 - * And we want to have irqs off anyways, no accidental
   3.270 - * APIC irq that way.
   3.271 + * As a result we have the Bys Speed and CPU speed in Hz.
   3.272 + * 
   3.273 + * We want to do the calibration only once (for CPU0).  CPUs connected by the
   3.274 + * same APIC bus have the very same bus frequency.
   3.275 + *
   3.276 + * This bit is a bit shoddy since we use the very same periodic timer interrupt
   3.277 + * we try to eliminate to calibrate the APIC. 
   3.278   */
   3.279  
   3.280  int __init calibrate_APIC_clock(void)
   3.281 @@ -619,95 +606,152 @@ int __init calibrate_APIC_clock(void)
   3.282      int i;
   3.283      const int LOOPS = HZ/10;
   3.284  
   3.285 -    printk("calibrating APIC timer ...\n");
   3.286 +    printk("calibrating APIC timer for CPU%d...\n",  smp_processor_id());
   3.287  
   3.288 -    /*
   3.289 -     * Put whatever arbitrary (but long enough) timeout
   3.290 +    /* Put whatever arbitrary (but long enough) timeout
   3.291       * value into the APIC clock, we just want to get the
   3.292 -     * counter running for calibration.
   3.293 -     */
   3.294 +     * counter running for calibration. */
   3.295      __setup_APIC_LVTT(1000000000);
   3.296  
   3.297 -    /*
   3.298 -     * The timer chip counts down to zero. Let's wait
   3.299 +	/* The timer chip counts down to zero. Let's wait
   3.300       * for a wraparound to start exact measurement:
   3.301 -     * (the current tick might have been already half done)
   3.302 -     */
   3.303 -
   3.304 +     * (the current tick might have been already half done) */
   3.305      wait_8254_wraparound();
   3.306  
   3.307 -    /*
   3.308 -     * We wrapped around just now. Let's start:
   3.309 -     */
   3.310 +    /* We wrapped around just now. Let's start: */
   3.311      rdtscll(t1);
   3.312      tt1 = apic_read(APIC_TMCCT);
   3.313  
   3.314 -    /*
   3.315 -     * Let's wait LOOPS wraprounds:
   3.316 -     */
   3.317 +    /* Let's wait LOOPS wraprounds: */
   3.318      for (i = 0; i < LOOPS; i++)
   3.319          wait_8254_wraparound();
   3.320  
   3.321      tt2 = apic_read(APIC_TMCCT);
   3.322      rdtscll(t2);
   3.323  
   3.324 -    /*
   3.325 -     * The APIC bus clock counter is 32 bits only, it
   3.326 +    /* The APIC bus clock counter is 32 bits only, it
   3.327       * might have overflown, but note that we use signed
   3.328       * longs, thus no extra care needed.
   3.329 -     *
   3.330 -     * underflown to be exact, as the timer counts down ;)
   3.331 -     */
   3.332 -
   3.333 +     * underflown to be exact, as the timer counts down ;) */
   3.334      result = (tt1-tt2)*APIC_DIVISOR/LOOPS;
   3.335  
   3.336 -    printk("..... CPU clock speed is %ld.%04ld MHz.\n",
   3.337 +    printk("..... CPU speed is %ld.%04ld MHz.\n",
   3.338             ((long)(t2-t1)/LOOPS)/(1000000/HZ),
   3.339             ((long)(t2-t1)/LOOPS)%(1000000/HZ));
   3.340  
   3.341 -    printk("..... host bus clock speed is %ld.%04ld MHz.\n",
   3.342 +    printk("..... Bus speed is %ld.%04ld MHz.\n",
   3.343             result/(1000000/HZ),
   3.344             result%(1000000/HZ));
   3.345  
   3.346 +	/* set up multipliers for accurate timer code */
   3.347 +	bus_freq   = result*HZ;
   3.348 +	bus_cycle  = (u32) (1000000000000LL/bus_freq); /* in pico seconds */
   3.349 +	bus_scale  = (1000*262144)/bus_cycle;
   3.350 +
   3.351 +	/* print results */
   3.352 +	printk("..... bus_freq  = %u Hz\n",  bus_freq);
   3.353 +	printk("..... bus_cycle = %u ps\n",  bus_cycle);
   3.354 +	printk("..... bus_scale = %u \n",    bus_scale);
   3.355 +	/* reset APIC to zero timeout value */
   3.356 +    __setup_APIC_LVTT(0);
   3.357      return result;
   3.358  }
   3.359  
   3.360 -static unsigned int calibration_result;
   3.361 -
   3.362 +/*
   3.363 + * initialise the APIC timers for all CPUs
   3.364 + * we start with the first and find out processor frequency and bus speed
   3.365 + */
   3.366  void __init setup_APIC_clocks (void)
   3.367  {
   3.368      printk("Using local APIC timer interrupts.\n");
   3.369      using_apic_timer = 1;
   3.370 -
   3.371      __cli();
   3.372 -
   3.373 -    calibration_result = calibrate_APIC_clock();
   3.374 -    /*
   3.375 -     * Now set up the timer for real.
   3.376 -     */
   3.377 -    setup_APIC_timer((void *)calibration_result);
   3.378 -
   3.379 +	/* calibrate CPU0 for CPU speed and BUS speed */
   3.380 +    bus_freq = calibrate_APIC_clock();
   3.381 +    /* Now set up the timer for real. */
   3.382 +    setup_APIC_timer((void *)bus_freq);
   3.383      __sti();
   3.384 -
   3.385      /* and update all other cpus */
   3.386 -    smp_call_function(setup_APIC_timer, (void *)calibration_result, 1, 1);
   3.387 +    smp_call_function(setup_APIC_timer, (void *)bus_freq, 1, 1);
   3.388  }
   3.389  
   3.390  #undef APIC_DIVISOR
   3.391 +/*
   3.392 + * reprogram the APIC timer. Timeoutvalue is in ns from start of boot
   3.393 + * returns 1 on success
   3.394 + * returns 0 if the timeout value is too small or in the past.
   3.395 + */
   3.396 +
   3.397 +
   3.398 +int reprogram_ac_timer(s_time_t timeout)
   3.399 +{
   3.400 +	int 		cpu = smp_processor_id();
   3.401 +	s_time_t	now;
   3.402 +	s_time_t	expire;
   3.403 +	u64			apic_tmict;
   3.404 +
   3.405 +	now = NOW();
   3.406 +	expire = timeout - now;	/* value from now */
   3.407 +
   3.408 +
   3.409 +	if (expire <= 0) {
   3.410 +		printk("APICT[%02d] Timeout value in the past %lld > %lld\n", 
   3.411 +			   cpu, now, timeout);
   3.412 +		return 0;		/* timeout value in the past */
   3.413 +	}
   3.414 +
   3.415 +	/* conversion to bus units */
   3.416 +	apic_tmict = (((u64)bus_scale) * expire)>>18;
   3.417 +
   3.418 +	if (apic_tmict >= 0xffffffff) {
   3.419 +		printk("APICT[%02d] Timeout value too large\n", cpu);
   3.420 +		apic_tmict = 0xffffffff;
   3.421 +	}
   3.422 +	if (apic_tmict == 0) {
   3.423 +		printk("APICT[%02d] timeout value too small\n", cpu);
   3.424 +		return 0;
   3.425 +	}
   3.426 +
   3.427 +	/* programm timer */
   3.428 +	apic_write(APIC_TMICT, (unsigned long)apic_tmict);
   3.429 +
   3.430 +	TRC(printk("APICT[%02d] reprog(): expire=%lld %u\n",
   3.431 +			   cpu, expire, apic_tmict));
   3.432 +	return 1;
   3.433 +}
   3.434  
   3.435  /*
   3.436 - * Local timer interrupt handler. It does both profiling and
   3.437 - * process statistics/rescheduling.
   3.438 - *
   3.439 - * We do profiling in every local tick, statistics/rescheduling
   3.440 - * happen only every 'profiling multiplier' ticks. The default
   3.441 - * multiplier is 1 and it can be changed by writing the new multiplier
   3.442 - * value into /proc/profile.
   3.443 + * Local timer interrupt handler.
   3.444 + * here the programmable, accurate timers are executed.
   3.445 + * If we are on CPU0 and we should have updated jiffies, we do this 
   3.446 + * as well and and deal with traditional linux timers. Note, that of 
   3.447 + * the timer APIC on CPU does not go off every 10ms or so the linux 
   3.448 + * timers loose accuracy, but that shouldn't be a problem.
   3.449   */
   3.450  
   3.451 +static s_time_t last_cpu0_tirq = 0;
   3.452  inline void smp_local_timer_interrupt(struct pt_regs * regs)
   3.453  {
   3.454 -    update_process_times(user_mode(regs));
   3.455 +	int cpu = smp_processor_id();
   3.456 +	s_time_t diff, now;
   3.457 +
   3.458 +    /* if CPU 0 do old timer stuff  */
   3.459 +	if (cpu == 0) {
   3.460 +		update_time();
   3.461 +		now = NOW();
   3.462 +		diff = now - last_cpu0_tirq;
   3.463 +		/* this uses three 64bit divisions which should be avoided!! */
   3.464 +		if (diff >= MILLISECS(10)) {
   3.465 +			/* update jiffies */
   3.466 +			(*(unsigned long *)&jiffies) += diff / MILLISECS(10);
   3.467 +
   3.468 +			/* do traditional linux timers */
   3.469 +			do_timer(regs);
   3.470 +			last_cpu0_tirq = now;
   3.471 +		}
   3.472 +	}
   3.473 +	/* call timer function */
   3.474 +	do_ac_timer();
   3.475  }
   3.476  
   3.477  /*
   3.478 @@ -732,13 +776,11 @@ void smp_apic_timer_interrupt(struct pt_
   3.479      /*
   3.480       * NOTE! We'd better ACK the irq immediately,
   3.481       * because timer handling can be slow.
   3.482 +	 * XXX is this save?
   3.483       */
   3.484      ack_APIC_irq();
   3.485 -    /*
   3.486 -     * update_process_times() expects us to have done irq_enter().
   3.487 -     * Besides, if we don't timer interrupts ignore the global
   3.488 -     * interrupt lock, which is the WrongThing (tm) to do.
   3.489 -     */
   3.490 +
   3.491 +	/* call the local handler */
   3.492      irq_enter(cpu, 0);
   3.493      smp_local_timer_interrupt(regs);
   3.494      irq_exit(cpu, 0);
   3.495 @@ -809,7 +851,8 @@ int __init APIC_init_uniprocessor (void)
   3.496      /*
   3.497       * Complain if the BIOS pretends there is one.
   3.498       */
   3.499 -    if (!cpu_has_apic && APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
   3.500 +    if (!cpu_has_apic&&APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]))
   3.501 +	{
   3.502          printk("BIOS bug, local APIC #%d not detected!...\n",
   3.503                 boot_cpu_physical_apicid);
   3.504          return -1;
     5.1 --- a/xen-2.4.16/arch/i386/setup.c	Fri Jan 31 15:03:37 2003 +0000
     5.2 +++ b/xen-2.4.16/arch/i386/setup.c	Sat Feb 01 17:27:13 2003 +0000
     5.3 @@ -280,6 +280,7 @@ void __init start_of_day(void)
     5.4      extern void tqueue_bh(void);
     5.5      extern void immediate_bh(void);
     5.6      extern void init_timervecs(void);
     5.7 +	extern void ac_timer_init(void);
     5.8      extern int  setup_network_devices(void);
     5.9      extern void net_init(void);
    5.10  
    5.11 @@ -303,6 +304,7 @@ void __init start_of_day(void)
    5.12      paging_init();                /* not much here now, but sets up fixmap */
    5.13      if ( smp_found_config ) get_smp_config();
    5.14      domain_init();
    5.15 +	scheduler_init();	
    5.16      trap_init();
    5.17      init_IRQ();  /* installs simple interrupt wrappers. Starts HZ clock. */
    5.18      time_init(); /* installs software handler for HZ clock. */
    5.19 @@ -323,6 +325,9 @@ void __init start_of_day(void)
    5.20                        * fall thru to 8259A if we have to (but slower).
    5.21                        */
    5.22  #endif
    5.23 +	init_xeno_time();	/* initialise the time */
    5.24 +	ac_timer_init();    /* init accurate timers */
    5.25 +	schedulers_start(); /* start scheduler for each CPU */
    5.26  
    5.27      sti();
    5.28  
     6.1 --- a/xen-2.4.16/arch/i386/time.c	Fri Jan 31 15:03:37 2003 +0000
     6.2 +++ b/xen-2.4.16/arch/i386/time.c	Sat Feb 01 17:27:13 2003 +0000
     6.3 @@ -1,3 +1,22 @@
     6.4 +/* -*-  Mode:C; c-basic-offset:4; tab-width:4 -*-
     6.5 + ****************************************************************************
     6.6 + * (C) 2002 - Rolf Neugebauer - Intel Research Cambridge
     6.7 + ****************************************************************************
     6.8 + *
     6.9 + *        File: i386/time.c
    6.10 + *      Author: 
    6.11 + *     Changes: 
    6.12 + *              
    6.13 + *        Date: Jan 2003
    6.14 + * 
    6.15 + * Environment: Xen Hypervisor
    6.16 + * Description: modified version of Linux' time.c
    6.17 + *              implement system and wall clock time.
    6.18 + *
    6.19 + ****************************************************************************
    6.20 + * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $
    6.21 + ****************************************************************************
    6.22 + */
    6.23  /*
    6.24   *  linux/arch/i386/kernel/time.c
    6.25   *
    6.26 @@ -19,12 +38,22 @@
    6.27  #include <asm/mpspec.h>
    6.28  #include <asm/processor.h>
    6.29  #include <asm/fixmap.h>
    6.30 +#include <asm/mc146818rtc.h>
    6.31 +
    6.32 +#ifdef TIME_TRACE
    6.33 +#define TRC(_x) _x
    6.34 +#else
    6.35 +#define TRC(_x)
    6.36 +#endif
    6.37 +
    6.38 +
    6.39  
    6.40  unsigned long cpu_khz;	/* Detected as we calibrate the TSC */
    6.41  unsigned long ticks_per_usec; /* TSC ticks per microsecond. */
    6.42  
    6.43  spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED;
    6.44  
    6.45 +
    6.46  /*
    6.47   * timer_interrupt() needs to keep up the real-time clock,
    6.48   * as well as call the "do_timer()" routine every clocktick
    6.49 @@ -62,14 +91,15 @@ static void timer_interrupt(int irq, voi
    6.50      do_timer_interrupt(irq, NULL, regs);
    6.51  }
    6.52  
    6.53 -static struct irqaction irq0  = { timer_interrupt, SA_INTERRUPT, 0, "timer", NULL, NULL};
    6.54 +static struct irqaction irq0  = { timer_interrupt, SA_INTERRUPT, 0,
    6.55 +								  "timer", NULL, NULL};
    6.56  
    6.57  /* ------ Calibrate the TSC ------- 
    6.58   * Return processor ticks per second / CALIBRATE_FRAC.
    6.59   */
    6.60  
    6.61  #define CLOCK_TICK_RATE 1193180 /* system crystal frequency (Hz) */
    6.62 -#define CALIBRATE_FRAC  20     /* calibrate over 50ms */
    6.63 +#define CALIBRATE_FRAC  20      /* calibrate over 50ms */
    6.64  #define CALIBRATE_LATCH ((CLOCK_TICK_RATE+(CALIBRATE_FRAC/2))/CALIBRATE_FRAC)
    6.65  
    6.66  static unsigned long __init calibrate_tsc(void)
    6.67 @@ -126,6 +156,207 @@ static unsigned long __init calibrate_ts
    6.68      return 0;
    6.69  }
    6.70  
    6.71 +/***************************************************************************
    6.72 + * CMOS Timer functions
    6.73 + ***************************************************************************/
    6.74 +
    6.75 +/* Converts Gregorian date to seconds since 1970-01-01 00:00:00.
    6.76 + * Assumes input in normal date format, i.e. 1980-12-31 23:59:59
    6.77 + * => year=1980, mon=12, day=31, hour=23, min=59, sec=59.
    6.78 + *
    6.79 + * [For the Julian calendar (which was used in Russia before 1917,
    6.80 + * Britain & colonies before 1752, anywhere else before 1582,
    6.81 + * and is still in use by some communities) leave out the
    6.82 + * -year/100+year/400 terms, and add 10.]
    6.83 + *
    6.84 + * This algorithm was first published by Gauss (I think).
    6.85 + *
    6.86 + * WARNING: this function will overflow on 2106-02-07 06:28:16 on
    6.87 + * machines were long is 32-bit! (However, as time_t is signed, we
    6.88 + * will already get problems at other places on 2038-01-19 03:14:08)
    6.89 + */
    6.90 +static inline unsigned long
    6.91 +mktime (unsigned int year, unsigned int mon,
    6.92 +        unsigned int day, unsigned int hour,
    6.93 +        unsigned int min, unsigned int sec)
    6.94 +{
    6.95 +	if (0 >= (int) (mon -= 2)) {    /* 1..12 -> 11,12,1..10 */
    6.96 +		mon += 12;              /* Puts Feb last since it has leap day */
    6.97 +		year -= 1;
    6.98 +	}
    6.99 +	return ((((unsigned long)(year/4 - year/100 + year/400 + 367*mon/12 + day)+
   6.100 +			  year*365 - 719499
   6.101 +				 )*24 + hour /* now have hours */
   6.102 +				)*60 + min /* now have minutes */
   6.103 +        )*60 + sec; /* finally seconds */
   6.104 +}
   6.105 +
   6.106 +static unsigned long get_cmos_time(void)
   6.107 +{
   6.108 +	unsigned int year, mon, day, hour, min, sec;
   6.109 +	int i;
   6.110 +
   6.111 +	spin_lock(&rtc_lock);
   6.112 +	/* The Linux interpretation of the CMOS clock register contents:
   6.113 +	 * When the Update-In-Progress (UIP) flag goes from 1 to 0, the
   6.114 +	 * RTC registers show the second which has precisely just started.
   6.115 +	 * Let's hope other operating systems interpret the RTC the same way.
   6.116 +	 */
   6.117 +	/* read RTC exactly on falling edge of update flag */
   6.118 +	for (i = 0 ; i < 1000000 ; i++)	/* may take up to 1 second... */
   6.119 +		if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP)
   6.120 +			break;
   6.121 +	for (i = 0 ; i < 1000000 ; i++)	/* must try at least 2.228 ms */
   6.122 +		if (!(CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP))
   6.123 +			break;
   6.124 +	do { /* Isn't this overkill ? UIP above should guarantee consistency */
   6.125 +		sec = CMOS_READ(RTC_SECONDS);
   6.126 +		min = CMOS_READ(RTC_MINUTES);
   6.127 +		hour = CMOS_READ(RTC_HOURS);
   6.128 +		day = CMOS_READ(RTC_DAY_OF_MONTH);
   6.129 +		mon = CMOS_READ(RTC_MONTH);
   6.130 +		year = CMOS_READ(RTC_YEAR);
   6.131 +	} while (sec != CMOS_READ(RTC_SECONDS));
   6.132 +	if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
   6.133 +	  {
   6.134 +	    BCD_TO_BIN(sec);
   6.135 +	    BCD_TO_BIN(min);
   6.136 +	    BCD_TO_BIN(hour);
   6.137 +	    BCD_TO_BIN(day);
   6.138 +	    BCD_TO_BIN(mon);
   6.139 +	    BCD_TO_BIN(year);
   6.140 +	  }
   6.141 +	spin_unlock(&rtc_lock);
   6.142 +	if ((year += 1900) < 1970)
   6.143 +		year += 100;
   6.144 +	printk(".... CMOS Clock:  %02d/%02d/%04d %02d:%02d:%02d\n",
   6.145 +		   day, mon, year, hour, min, sec);
   6.146 +	return mktime(year, mon, day, hour, min, sec);
   6.147 +}
   6.148 +
   6.149 +/***************************************************************************
   6.150 + * System time
   6.151 + ***************************************************************************/
   6.152 +u32					stime_pcc;	 /* cycle counter value at last timer irq */
   6.153 +u32					stime_scale; /* scale factor for converting cc to ns */
   6.154 +s_time_t			stime_now;   /* time in ns at last timer IRQ */
   6.155 +
   6.156 +/***************************************************************************
   6.157 + * Wall Clock time 
   6.158 + ***************************************************************************/
   6.159 +static rwlock_t wctime_lock = RW_LOCK_UNLOCKED;
   6.160 +struct timeval  wall_clock_time;	/* wall clock time at last update */
   6.161 +s_time_t	    wctime_st;       /* system time at last update */
   6.162 +
   6.163 +void do_gettimeofday(struct timeval *tv)
   6.164 +{
   6.165 +	unsigned long flags;
   6.166 +	unsigned long usec, sec;
   6.167 +
   6.168 +	read_lock_irqsave(&wctime_lock, flags);
   6.169 +
   6.170 +	usec = ((unsigned long)(NOW() - wctime_st))/1000;
   6.171 +	sec = wall_clock_time.tv_sec;
   6.172 +	usec += wall_clock_time.tv_usec;
   6.173 +	read_unlock_irqrestore(&wctime_lock, flags);
   6.174 +
   6.175 +	while (usec >= 1000000) {
   6.176 +		usec -= 1000000;
   6.177 +		sec++;
   6.178 +	}
   6.179 +	tv->tv_sec = sec;
   6.180 +	tv->tv_usec = usec;
   6.181 +}
   6.182 +
   6.183 +void do_settimeofday(struct timeval *tv)
   6.184 +{
   6.185 +	printk("XXX: do_settimeofday not implemented\n");
   6.186 +}
   6.187 +
   6.188 +/***************************************************************************
   6.189 + * Update times
   6.190 + ***************************************************************************/
   6.191 +
   6.192 +/* update hypervisors notion of time */
   6.193 +void update_time(void) {
   6.194 +	u32		      new_pcc;
   6.195 +	s_time_t      new_st;
   6.196 +	unsigned long usec;
   6.197 +
   6.198 +	/* update system time */
   6.199 +	rdtscl(new_pcc);
   6.200 +	stime_now = stime_now+((((s_time_t)stime_scale)*
   6.201 +							(new_pcc-stime_pcc))>>10);
   6.202 +	stime_pcc=new_pcc;
   6.203 +
   6.204 +	/* update wall clock time  */
   6.205 +	write_lock(&wctime_lock);
   6.206 +	new_st = NOW();
   6.207 +	usec = ((unsigned long)(new_st - wctime_st))/1000;
   6.208 +	usec += wall_clock_time.tv_usec;
   6.209 +	while (usec >= 1000000) {
   6.210 +		usec -= 1000000;
   6.211 +		wall_clock_time.tv_sec++;
   6.212 +	}
   6.213 +	wall_clock_time.tv_usec = usec;
   6.214 +	wctime_st = new_st;
   6.215 +	write_unlock(&wctime_lock);
   6.216 +
   6.217 +	TRC(printk("TIME[%02d] update time: stime_now=%lld now=%lld, wct=%ld:%ld\n"
   6.218 +			   cpu, stime_now, new_st, wall_clock_time.tv_sec,
   6.219 +			   wall_clock_time.tv_usec));
   6.220 +}
   6.221 +
   6.222 +/* update a domains notion of time */
   6.223 +void update_dom_time(shared_info_t *si)
   6.224 +{
   6.225 +	unsigned long flags;
   6.226 +	read_lock_irqsave(&wctime_lock, flags);
   6.227 +	si->system_time  = stime_now;
   6.228 +	si->st_timestamp = stime_pcc;
   6.229 +	si->tv_sec       = wall_clock_time.tv_sec;
   6.230 +	si->tv_usec      = wall_clock_time.tv_usec;
   6.231 +	si->wc_timestamp = wctime_st;
   6.232 +	read_unlock_irqrestore(&wctime_lock, flags);	
   6.233 +}
   6.234 +
   6.235 +/***************************************************************************
   6.236 + * Init Xeno Time
   6.237 + * This has to be done after all CPUs have been booted
   6.238 + ***************************************************************************/
   6.239 +int __init init_xeno_time()
   6.240 +{
   6.241 +	int cpu = smp_processor_id();
   6.242 +	u32	cpu_cycle;	 /* time of one cpu cyle in pico-seconds */
   6.243 +
   6.244 +	/* System Time */
   6.245 +	cpu_cycle   = (u32) (1000000000LL/cpu_khz); /* in pico seconds */
   6.246 +	stime_scale = (cpu_cycle * 1024) / 1000;
   6.247 +
   6.248 +	stime_now = (s_time_t)0;
   6.249 +	rdtscl(stime_pcc);
   6.250 +	
   6.251 +	printk("Init Time[%02d]:\n", cpu);
   6.252 +	printk(".... System Time: %lldns\n", NOW());
   6.253 +	printk(".... stime_scale: %u\n",   stime_scale);
   6.254 +	printk(".... stime_pcc:   %u\n",   stime_pcc);
   6.255 +
   6.256 +	/* Wall Clock time */
   6.257 +	wall_clock_time.tv_sec  = get_cmos_time();
   6.258 +	wall_clock_time.tv_usec = 0;
   6.259 +	wctime_st = NOW();
   6.260 +
   6.261 +	printk(".... Wall Clock:  %lds %ldus\n", wall_clock_time.tv_sec,
   6.262 +		   wall_clock_time.tv_usec);
   6.263 +	printk(".... wctime_st:   %lld\n", wctime_st);
   6.264 +	return 0;
   6.265 +}
   6.266 +
   6.267 +
   6.268 +/***************************************************************************
   6.269 + * Init
   6.270 + ***************************************************************************/
   6.271 +
   6.272  void __init time_init(void)
   6.273  {
   6.274      unsigned long ticks_per_frac = calibrate_tsc();
   6.275 @@ -136,6 +367,7 @@ void __init time_init(void)
   6.276      ticks_per_usec = ticks_per_frac / (1000000/CALIBRATE_FRAC);
   6.277      cpu_khz = ticks_per_frac / (1000/CALIBRATE_FRAC);
   6.278  
   6.279 +
   6.280      printk("Detected %lu.%03lu MHz processor.\n", 
   6.281             cpu_khz / 1000, cpu_khz % 1000);
   6.282  
     7.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.2 +++ b/xen-2.4.16/common/ac_timer.c	Sat Feb 01 17:27:13 2003 +0000
     7.3 @@ -0,0 +1,304 @@
     7.4 +/* -*-  Mode:C; c-basic-offset:4; tab-width:4 -*-
     7.5 + ****************************************************************************
     7.6 + * (C) 2002 - Rolf Neugebauer - Intel Research Cambridge
     7.7 + ****************************************************************************
     7.8 + *
     7.9 + *        File: ac_timer.c
    7.10 + *      Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk)
    7.11 + *     Changes: 
    7.12 + *              
    7.13 + *        Date: Nov 2002
    7.14 + * 
    7.15 + * Environment: Xen Hypervisor
    7.16 + * Description: Accurate timer for the Hypervisor
    7.17 + *
    7.18 + ****************************************************************************
    7.19 + * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $
    7.20 + ****************************************************************************
    7.21 + */
    7.22 +
    7.23 +#include <xeno/config.h>
    7.24 +#include <xeno/init.h>
    7.25 +#include <xeno/types.h>
    7.26 +#include <xeno/errno.h>
    7.27 +#include <xeno/sched.h>
    7.28 +#include <xeno/lib.h>
    7.29 +#include <xeno/config.h>
    7.30 +#include <xeno/smp.h>
    7.31 +#include <xeno/init.h>
    7.32 +
    7.33 +#include <xeno/time.h>
    7.34 +#include <xeno/ac_timer.h>
    7.35 +
    7.36 +#include <asm/system.h>
    7.37 +#include <asm/desc.h>
    7.38 +
    7.39 +
    7.40 +#undef AC_TIMER_TRACE
    7.41 +#undef AC_TIMER_STATS
    7.42 +
    7.43 +#ifdef AC_TIMER_TRACE
    7.44 +#define TRC(_x) _x
    7.45 +#else
    7.46 +#define TRC(_x)
    7.47 +#endif
    7.48 +
    7.49 +/* A timer list per CPU */
    7.50 +typedef struct ac_timers_st
    7.51 +{
    7.52 +    spinlock_t lock;
    7.53 +    struct list_head timers;
    7.54 +    struct ac_timer *prev, *curr;
    7.55 +} __cacheline_aligned ac_timers_t;
    7.56 +static ac_timers_t ac_timers[NR_CPUS];
    7.57 +
    7.58 +#ifdef AC_TIMER_STATS
    7.59 +#define BUCKETS		1000
    7.60 +#define MAX_STATS
    7.61 +typedef struct act_stats_st
    7.62 +{
    7.63 +	u32 count;
    7.64 +	u32 times[2*(BUCKETS)];
    7.65 +} __cacheline_aligned act_stats_t;
    7.66 +static act_stats_t act_stats[NR_CPUS];
    7.67 +
    7.68 +#endif
    7.69 +
    7.70 +/* local prototypes */
    7.71 +static int  detach_ac_timer(struct ac_timer *timer);
    7.72 +static void ac_timer_debug(unsigned long);
    7.73 +
    7.74 +/*
    7.75 + * add a timer.
    7.76 + * return value:
    7.77 + *  0: success
    7.78 + *  1: failure, timer in the past or timeout value to small
    7.79 + * -1: failure, timer uninitialised
    7.80 + * fail
    7.81 + */
    7.82 +int add_ac_timer(struct ac_timer *timer)
    7.83 +{
    7.84 +	int 			 cpu = smp_processor_id();
    7.85 +	unsigned long 	 flags;
    7.86 +	struct list_head *tmp, *prev;
    7.87 +	struct ac_timer	 *t;
    7.88 +	s_time_t		 now;
    7.89 +
    7.90 +	/* sanity checks */
    7.91 +
    7.92 +	/* make sure timeout value is in the future */
    7.93 +	now = NOW();
    7.94 +	TRC(printk("ACT  [%02d] add(): now=%lld timo=%lld\n",
    7.95 +			   cpu, now, timer->expires));
    7.96 +	if (timer->expires <= now) {	
    7.97 +		printk("ACT[%02d] add_ac_timer(): timeout value in the past\n", cpu);
    7.98 +		return 1;
    7.99 +	}
   7.100 +
   7.101 +	local_irq_save(flags);
   7.102 +
   7.103 +	/* check if timer would be inserted at start of list */
   7.104 +	if ((list_empty(&ac_timers[cpu].timers)) ||
   7.105 +		(timer->expires <
   7.106 +		(list_entry(&ac_timers[cpu].timers, struct ac_timer, timer_list))->expires)) {
   7.107 +
   7.108 +		TRC(printk("ACT  [%02d] add(): add at head\n", cpu));
   7.109 +		/* Reprogramm and add to head of list */
   7.110 +		if (!reprogram_ac_timer(timer->expires)) {
   7.111 +			/* failed */
   7.112 +			TRC(printk("ACT  [%02d] add(): add at head failed\n", cpu));
   7.113 +			local_irq_restore(flags);
   7.114 +			return 1;
   7.115 +		}
   7.116 +		list_add(&timer->timer_list, &ac_timers[cpu].timers);
   7.117 +		
   7.118 +	} else {
   7.119 +		/* find correct entry and add timer */
   7.120 +		prev = &ac_timers[cpu].timers;
   7.121 +		list_for_each(tmp, &ac_timers[cpu].timers) {
   7.122 +			t = list_entry(tmp, struct ac_timer, timer_list);
   7.123 +			if (t->expires < timer->expires) {
   7.124 +				list_add(&timer->timer_list, prev);
   7.125 +				TRC(printk("ACT  [%02d] add(): added between %lld and %lld\n",
   7.126 +					   cpu,
   7.127 +					   list_entry(prev,struct ac_timer,timer_list)->expires,
   7.128 +					   list_entry(tmp,struct ac_timer,timer_list)->expires));
   7.129 +				break;
   7.130 +			}
   7.131 +			prev = tmp;
   7.132 +		}
   7.133 +	}
   7.134 +	local_irq_restore(flags);
   7.135 +	return 0;
   7.136 +}
   7.137 +
   7.138 +/*
   7.139 + * remove a timer
   7.140 + * return values:
   7.141 + *  0: success
   7.142 + * -1: bogus timer
   7.143 + */
   7.144 +static int detach_ac_timer(struct ac_timer *timer)
   7.145 +{  
   7.146 +	TRC(int 			 cpu = smp_processor_id());
   7.147 +	TRC(printk("ACT  [%02d] detach(): \n", cpu));
   7.148 +	list_del(&timer->timer_list);
   7.149 +	timer->timer_list.next = NULL;
   7.150 +	return 0;
   7.151 +}
   7.152 +
   7.153 +/*
   7.154 + * remove a timer
   7.155 + * return values:
   7.156 + *  0: success
   7.157 + * -1: bogus timer
   7.158 + */
   7.159 +int rem_ac_timer(struct ac_timer *timer)
   7.160 +{
   7.161 +	int res;
   7.162 +	unsigned long flags;
   7.163 +	TRC(int cpu = smp_processor_id());
   7.164 +
   7.165 +	TRC(printk("ACT  [%02d] remove(): timo=%lld \n", cpu, timer->expires));
   7.166 +	/* sanity checks */
   7.167 +
   7.168 +	local_irq_save(flags);
   7.169 +	res = detach_ac_timer(timer);	
   7.170 +	local_irq_restore(flags);
   7.171 +	return res;
   7.172 +}
   7.173 +
   7.174 +/*
   7.175 + * modify a timer, i.e., set a new timeout value
   7.176 + * return value:
   7.177 + *  0: sucess
   7.178 + * -1: error
   7.179 + */
   7.180 +int mod_ac_timer(struct ac_timer *timer, s_time_t new_time)
   7.181 +{
   7.182 +	if (rem_ac_timer(timer) != 0)
   7.183 +		return -1;
   7.184 +	timer->expires = new_time;
   7.185 +	if (add_ac_timer(timer) != 0)
   7.186 +		return -1;
   7.187 +	return 0;
   7.188 +}
   7.189 +
   7.190 +/*
   7.191 + * do_ac_timer
   7.192 + * deal with timeouts and run the handlers
   7.193 + */
   7.194 +void do_ac_timer(void)
   7.195 +{
   7.196 +	int 			 cpu = smp_processor_id();
   7.197 +	unsigned long 	 flags;
   7.198 +	s_time_t		 now;
   7.199 +	struct ac_timer	 *t;
   7.200 +	struct list_head *tmp;
   7.201 +
   7.202 +	local_irq_save(flags);
   7.203 +
   7.204 + do_timer_again:
   7.205 +
   7.206 +	now = NOW();
   7.207 +	TRC(printk("ACT  [%02d] do(): now=%lld\n", cpu, now));
   7.208 +		
   7.209 +	/* Sanity checks */
   7.210 +    /* empty time list  */
   7.211 +	if (list_empty(&ac_timers[cpu].timers)) {
   7.212 +		printk("ACT[%02d] do_ac_timer(): timer irq without timer\n", cpu);
   7.213 +		local_irq_restore(flags);
   7.214 +		return;
   7.215 +	}
   7.216 +
   7.217 +
   7.218 +	/* execute the head of timer queue */
   7.219 +	t = list_entry(ac_timers[cpu].timers.next, struct ac_timer, timer_list);
   7.220 +	detach_ac_timer(t);
   7.221 +
   7.222 +
   7.223 +#ifdef AC_TIMER_STATS
   7.224 +	{
   7.225 +		s32	diff;
   7.226 +		u32 i;
   7.227 +		diff = ((s32)(now - t->expires)) / 1000; /* delta in us */
   7.228 +		if (diff < -BUCKETS)
   7.229 +			diff = -BUCKETS;
   7.230 +		else if (diff > BUCKETS)
   7.231 +			diff = BUCKETS;
   7.232 +		act_stats[cpu].times[diff+BUCKETS]++;
   7.233 +		act_stats[cpu].count++;
   7.234 +
   7.235 +		if (act_stats[cpu].count >= 5000) {
   7.236 +			printk("ACT Stats\n");
   7.237 +			for (i=0; i < 2*BUCKETS; i++) {
   7.238 +				if (act_stats[cpu].times[i] != 0)
   7.239 +					printk("ACT [%02d]: %3dus: %5d\n",
   7.240 +						   cpu,i-BUCKETS, act_stats[cpu].times[i]);
   7.241 +				act_stats[cpu].times[i]=0;
   7.242 +			}
   7.243 +			act_stats[cpu].count = 0;
   7.244 +			printk("\n");
   7.245 +		}
   7.246 +	}
   7.247 +#endif
   7.248 +
   7.249 +
   7.250 +
   7.251 +	if (t->expires > now) {
   7.252 +		//printk("ACT  [%02d] do(): irq too early (%lld ns)\n",
   7.253 +		//	   cpu, now - t->expires );
   7.254 +	}
   7.255 +	if (t->function != NULL)
   7.256 +		t->function(t->data);
   7.257 +
   7.258 +
   7.259 +	/* check if there are other timer functions on the list */
   7.260 +	now = NOW();
   7.261 +	if (!list_empty(&ac_timers[cpu].timers)) {
   7.262 +		list_for_each(tmp, &ac_timers[cpu].timers) {
   7.263 +			t = list_entry(tmp, struct ac_timer, timer_list);
   7.264 +			TRC(printk("ACT  [%02d] do(): now=%lld timo=%lld\n",
   7.265 +					   cpu, now, t->expires));
   7.266 +			if (t->expires <= now) {
   7.267 +				detach_ac_timer(t);
   7.268 +				if (t->function != NULL)
   7.269 +					t->function(t->data);
   7.270 +				now = NOW();
   7.271 +			} else {
   7.272 +				TRC(printk("ACT  [%02d] do(): break1\n", cpu));
   7.273 +				break;
   7.274 +			}
   7.275 +		}
   7.276 +	}
   7.277 +		
   7.278 +	/* If list not empty reprogramm timer to new head of list */
   7.279 +	if (!list_empty(&ac_timers[cpu].timers)) {
   7.280 +		t = list_entry(ac_timers[cpu].timers.next,struct ac_timer,timer_list);
   7.281 +		if (t->expires > 0) {
   7.282 +			TRC(printk("ACT  [%02d] do(): reprog timo=%lld\n",cpu,t->expires));
   7.283 +			if (!reprogram_ac_timer(t->expires)) {
   7.284 +				TRC(printk("ACT  [%02d] do(): again\n", cpu));
   7.285 +				goto do_timer_again;
   7.286 +			}
   7.287 +		}
   7.288 +	}
   7.289 +	local_irq_restore(flags);
   7.290 +}
   7.291 +
   7.292 +/*
   7.293 + * init
   7.294 + */
   7.295 +void __init ac_timer_init(void)
   7.296 +{
   7.297 +    int i;
   7.298 +
   7.299 +	printk ("ACT: Initialising Accurate timers\n");
   7.300 +
   7.301 +    for (i = 0; i < NR_CPUS; i++)
   7.302 +    {
   7.303 +		INIT_LIST_HEAD(&ac_timers[i].timers);
   7.304 +		spin_lock_init(&ac_timers[i].lock);
   7.305 +    }
   7.306 +	/* ac_timer_debug(0); */
   7.307 +}
     8.1 --- a/xen-2.4.16/common/domain.c	Fri Jan 31 15:03:37 2003 +0000
     8.2 +++ b/xen-2.4.16/common/domain.c	Sat Feb 01 17:27:13 2003 +0000
     8.3 @@ -25,24 +25,9 @@ extern unsigned char *cmdline;
     8.4  
     8.5  rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED;
     8.6  
     8.7 -schedule_data_t schedule_data[NR_CPUS];
     8.8 -
     8.9 -int wake_up(struct task_struct *p)
    8.10 -{
    8.11 -    unsigned long flags;
    8.12 -    int ret = 0;
    8.13 -    spin_lock_irqsave(&schedule_data[p->processor].lock, flags);
    8.14 -    if ( __task_on_runqueue(p) ) goto out;
    8.15 -    p->state = TASK_RUNNING;
    8.16 -    __add_to_runqueue(p);
    8.17 -    ret = 1;
    8.18 -
    8.19 - out:
    8.20 -    spin_unlock_irqrestore(&schedule_data[p->processor].lock, flags);
    8.21 -    return ret;
    8.22 -}
    8.23 -
    8.24 -
    8.25 +/*
    8.26 + * create a new domain
    8.27 + */
    8.28  struct task_struct *do_newdomain(void)
    8.29  {
    8.30      int retval;
    8.31 @@ -80,97 +65,6 @@ struct task_struct *do_newdomain(void)
    8.32      return(p);
    8.33  }
    8.34  
    8.35 -
    8.36 -void reschedule(struct task_struct *p)
    8.37 -{
    8.38 -    int cpu = p->processor;
    8.39 -    struct task_struct *curr;
    8.40 -    unsigned long flags;
    8.41 -
    8.42 -    if ( p->has_cpu ) return;
    8.43 -
    8.44 -    spin_lock_irqsave(&schedule_data[cpu].lock, flags);
    8.45 -    curr = schedule_data[cpu].curr;
    8.46 -    if ( is_idle_task(curr) ) 
    8.47 -    {
    8.48 -        set_bit(_HYP_EVENT_NEED_RESCHED, &curr->hyp_events);
    8.49 -        spin_unlock_irqrestore(&schedule_data[cpu].lock, flags);
    8.50 -#ifdef CONFIG_SMP
    8.51 -        if ( cpu != smp_processor_id() ) smp_send_event_check_cpu(cpu);
    8.52 -#endif
    8.53 -    }
    8.54 -    else
    8.55 -    {
    8.56 -        spin_unlock_irqrestore(&schedule_data[cpu].lock, flags);
    8.57 -    }
    8.58 -}
    8.59 -
    8.60 -
    8.61 -static void process_timeout(unsigned long __data)
    8.62 -{
    8.63 -    struct task_struct * p = (struct task_struct *) __data;
    8.64 -    wake_up(p);
    8.65 -}
    8.66 -
    8.67 -long schedule_timeout(long timeout)
    8.68 -{
    8.69 -    struct timer_list timer;
    8.70 -    unsigned long expire;
    8.71 -    
    8.72 -    switch (timeout)
    8.73 -    {
    8.74 -    case MAX_SCHEDULE_TIMEOUT:
    8.75 -        /*
    8.76 -         * These two special cases are useful to be comfortable in the caller.
    8.77 -         * Nothing more. We could take MAX_SCHEDULE_TIMEOUT from one of the
    8.78 -         * negative value but I' d like to return a valid offset (>=0) to allow
    8.79 -         * the caller to do everything it want with the retval.
    8.80 -         */
    8.81 -        schedule();
    8.82 -        goto out;
    8.83 -    default:
    8.84 -        /*
    8.85 -         * Another bit of PARANOID. Note that the retval will be 0 since no
    8.86 -         * piece of kernel is supposed to do a check for a negative retval of
    8.87 -         * schedule_timeout() (since it should never happens anyway). You just
    8.88 -         * have the printk() that will tell you if something is gone wrong and
    8.89 -         * where.
    8.90 -         */
    8.91 -        if (timeout < 0)
    8.92 -        {
    8.93 -            printk(KERN_ERR "schedule_timeout: wrong timeout "
    8.94 -                   "value %lx from %p\n", timeout,
    8.95 -                   __builtin_return_address(0));
    8.96 -            current->state = TASK_RUNNING;
    8.97 -            goto out;
    8.98 -        }
    8.99 -    }
   8.100 -    
   8.101 -    expire = timeout + jiffies;
   8.102 -    
   8.103 -    init_timer(&timer);
   8.104 -    timer.expires = expire;
   8.105 -    timer.data = (unsigned long) current;
   8.106 -    timer.function = process_timeout;
   8.107 -    
   8.108 -    add_timer(&timer);
   8.109 -    schedule();
   8.110 -    del_timer_sync(&timer);
   8.111 -    
   8.112 -    timeout = expire - jiffies;
   8.113 -    
   8.114 - out:
   8.115 -    return timeout < 0 ? 0 : timeout;
   8.116 -}
   8.117 -
   8.118 -
   8.119 -long do_yield(void)
   8.120 -{
   8.121 -    current->state = TASK_INTERRUPTIBLE;
   8.122 -    schedule();
   8.123 -    return 0;
   8.124 -}
   8.125 -
   8.126  /* Get a pointer to the specified domain.  Consider replacing this
   8.127   * with a hash lookup later.
   8.128   *
   8.129 @@ -245,7 +139,7 @@ long kill_other_domain(unsigned int dom)
   8.130  /* Release resources belonging to task @p. */
   8.131  void release_task(struct task_struct *p)
   8.132  {
   8.133 -    ASSERT(!__task_on_runqueue(p));
   8.134 +    //ASSERT(!__task_on_runqueue(p));
   8.135      ASSERT(p->state == TASK_DYING);
   8.136      ASSERT(!p->has_cpu);
   8.137      write_lock_irq(&tasklist_lock);
   8.138 @@ -267,78 +161,7 @@ void release_task(struct task_struct *p)
   8.139  }
   8.140  
   8.141  
   8.142 -asmlinkage void schedule(void)
   8.143 -{
   8.144 -    struct task_struct *prev, *next;
   8.145 -    struct list_head *tmp;
   8.146 -    int this_cpu;
   8.147 -
   8.148 - need_resched_back:
   8.149 -    prev = current;
   8.150 -    this_cpu = prev->processor;
   8.151 -
   8.152 -    spin_lock_irq(&schedule_data[this_cpu].lock);
   8.153 -
   8.154 -    ASSERT(!in_interrupt());
   8.155 -    ASSERT(__task_on_runqueue(prev));
   8.156 -
   8.157 -    if ( !prev->counter )
   8.158 -    {
   8.159 -        prev->counter = 2;
   8.160 -        __move_last_runqueue(prev);
   8.161 -    }
   8.162 -
   8.163 -    switch ( prev->state )
   8.164 -    {
   8.165 -    case TASK_INTERRUPTIBLE:
   8.166 -        if ( signal_pending(prev) )
   8.167 -        {
   8.168 -            prev->state = TASK_RUNNING;
   8.169 -            break;
   8.170 -        }
   8.171 -    default:
   8.172 -        __del_from_runqueue(prev);
   8.173 -    case TASK_RUNNING:;
   8.174 -    }
   8.175 -    clear_bit(_HYP_EVENT_NEED_RESCHED, &prev->hyp_events);
   8.176 -
   8.177 -    /* Round-robin, skipping idle where possible. */
   8.178 -    next = NULL;
   8.179 -    list_for_each(tmp, &schedule_data[smp_processor_id()].runqueue) {
   8.180 -        next = list_entry(tmp, struct task_struct, run_list);
   8.181 -        if ( next->domain != IDLE_DOMAIN_ID ) break;
   8.182 -    }
   8.183 -
   8.184 -    prev->has_cpu = 0;
   8.185 -    next->has_cpu = 1;
   8.186 -
   8.187 -    schedule_data[this_cpu].prev = prev;
   8.188 -    schedule_data[this_cpu].curr = next;
   8.189 -
   8.190 -    spin_unlock_irq(&schedule_data[this_cpu].lock);
   8.191 -
   8.192 -    if ( unlikely(prev == next) )
   8.193 -    {
   8.194 -        /* We won't go through the normal tail, so do this by hand */
   8.195 -        prev->policy &= ~SCHED_YIELD;
   8.196 -        goto same_process;
   8.197 -    }
   8.198 -
   8.199 -    prepare_to_switch();
   8.200 -    switch_to(prev, next);
   8.201 -    prev = schedule_data[this_cpu].prev;
   8.202 -    
   8.203 -    prev->policy &= ~SCHED_YIELD;
   8.204 -    if ( prev->state == TASK_DYING ) release_task(prev);
   8.205 -
   8.206 - same_process:
   8.207 -    if ( test_bit(_HYP_EVENT_NEED_RESCHED, &current->hyp_events) )
   8.208 -        goto need_resched_back;
   8.209 -    return;
   8.210 -}
   8.211 -
   8.212 -
   8.213 -unsigned int alloc_new_dom_mem(struct task_struct *p, unsigned int kbytes)
   8.214 +static unsigned int alloc_new_dom_mem(struct task_struct *p, unsigned int kbytes)
   8.215  {
   8.216      struct list_head *temp;
   8.217      struct pfn_info *pf, *pf_head;
   8.218 @@ -813,14 +636,15 @@ int setup_guestos(struct task_struct *p,
   8.219      return 0;
   8.220  }
   8.221  
   8.222 +
   8.223  void __init domain_init(void)
   8.224  {
   8.225 -    int i;
   8.226 -    for ( i = 0; i < NR_CPUS; i++ )
   8.227 -    {
   8.228 -        INIT_LIST_HEAD(&schedule_data[i].runqueue);
   8.229 -        spin_lock_init(&schedule_data[i].lock);
   8.230 -        schedule_data[i].prev = &idle0_task;
   8.231 -        schedule_data[i].curr = &idle0_task;
   8.232 +	printk("Initialising domains\n");
   8.233 +//	scheduler_init();
   8.234 +}
   8.235 +
   8.236 +
   8.237 +
   8.238 +#if 0
   8.239      }
   8.240  }
     9.1 --- a/xen-2.4.16/common/lib.c	Fri Jan 31 15:03:37 2003 +0000
     9.2 +++ b/xen-2.4.16/common/lib.c	Sat Feb 01 17:27:13 2003 +0000
     9.3 @@ -169,3 +169,358 @@ unsigned char *quad_to_str(unsigned long
     9.4  }
     9.5     
     9.6  
     9.7 +/* a couple of 64 bit operations ported from freebsd */
     9.8 +
     9.9 +/*-
    9.10 + * Copyright (c) 1992, 1993
    9.11 + *	The Regents of the University of California.  All rights reserved.
    9.12 + *
    9.13 + * This software was developed by the Computer Systems Engineering group
    9.14 + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
    9.15 + * contributed to Berkeley.
    9.16 + *
    9.17 + * Redistribution and use in source and binary forms, with or without
    9.18 + * modification, are permitted provided that the following conditions
    9.19 + * are met:
    9.20 + * 1. Redistributions of source code must retain the above copyright
    9.21 + *    notice, this list of conditions and the following disclaimer.
    9.22 + * 2. Redistributions in binary form must reproduce the above copyright
    9.23 + *    notice, this list of conditions and the following disclaimer in the
    9.24 + *    documentation and/or other materials provided with the distribution.
    9.25 + * 3. All advertising materials mentioning features or use of this software
    9.26 + *    must display the following acknowledgement:
    9.27 + *	This product includes software developed by the University of
    9.28 + *	California, Berkeley and its contributors.
    9.29 + * 4. Neither the name of the University nor the names of its contributors
    9.30 + *    may be used to endorse or promote products derived from this software
    9.31 + *    without specific prior written permission.
    9.32 + *
    9.33 + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
    9.34 + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    9.35 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    9.36 + * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
    9.37 + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
    9.38 + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
    9.39 + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
    9.40 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
    9.41 + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
    9.42 + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
    9.43 + * SUCH DAMAGE.
    9.44 + *
    9.45 + * $FreeBSD: src/sys/libkern/divdi3.c,v 1.6 1999/08/28 00:46:31 peter Exp $
    9.46 + */
    9.47 +
    9.48 +#include <asm/types.h>
    9.49 +
    9.50 +
    9.51 +/*
    9.52 + * Depending on the desired operation, we view a `long long' (aka quad_t) in
    9.53 + * one or more of the following formats.
    9.54 + */
    9.55 +union uu {
    9.56 +        s64            q;              /* as a (signed) quad */
    9.57 +        s64            uq;             /* as an unsigned quad */
    9.58 +        long           sl[2];          /* as two signed longs */
    9.59 +        unsigned long  ul[2];          /* as two unsigned longs */
    9.60 +};
    9.61 +/* XXX RN: Yuck hardcoded endianess :) */
    9.62 +#define _QUAD_HIGHWORD 1
    9.63 +#define _QUAD_LOWWORD 0
    9.64 +/*
    9.65 + * Define high and low longwords.
    9.66 + */
    9.67 +#define H               _QUAD_HIGHWORD
    9.68 +#define L               _QUAD_LOWWORD
    9.69 +
    9.70 +/*
    9.71 + * Total number of bits in a quad_t and in the pieces that make it up.
    9.72 + * These are used for shifting, and also below for halfword extraction
    9.73 + * and assembly.
    9.74 + */
    9.75 +#define CHAR_BIT        8               /* number of bits in a char */
    9.76 +#define QUAD_BITS       (sizeof(s64) * CHAR_BIT)
    9.77 +#define LONG_BITS       (sizeof(long) * CHAR_BIT)
    9.78 +#define HALF_BITS       (sizeof(long) * CHAR_BIT / 2)
    9.79 +
    9.80 +/*
    9.81 + * Extract high and low shortwords from longword, and move low shortword of
    9.82 + * longword to upper half of long, i.e., produce the upper longword of
    9.83 + * ((quad_t)(x) << (number_of_bits_in_long/2)).  (`x' must actually be u_long.)
    9.84 + *
    9.85 + * These are used in the multiply code, to split a longword into upper
    9.86 + * and lower halves, and to reassemble a product as a quad_t, shifted left
    9.87 + * (sizeof(long)*CHAR_BIT/2).
    9.88 + */
    9.89 +#define HHALF(x)        ((x) >> HALF_BITS)
    9.90 +#define LHALF(x)        ((x) & ((1 << HALF_BITS) - 1))
    9.91 +#define LHUP(x)         ((x) << HALF_BITS)
    9.92 +
    9.93 +/*
    9.94 + * Multiprecision divide.  This algorithm is from Knuth vol. 2 (2nd ed),
    9.95 + * section 4.3.1, pp. 257--259.
    9.96 + */
    9.97 +#define	B	(1 << HALF_BITS)	/* digit base */
    9.98 +
    9.99 +/* Combine two `digits' to make a single two-digit number. */
   9.100 +#define	COMBINE(a, b) (((u_long)(a) << HALF_BITS) | (b))
   9.101 +
   9.102 +/* select a type for digits in base B: use unsigned short if they fit */
   9.103 +#if ULONG_MAX == 0xffffffff && USHRT_MAX >= 0xffff
   9.104 +typedef unsigned short digit;
   9.105 +#else
   9.106 +typedef u_long digit;
   9.107 +#endif
   9.108 +
   9.109 +/*
   9.110 + * Shift p[0]..p[len] left `sh' bits, ignoring any bits that
   9.111 + * `fall out' the left (there never will be any such anyway).
   9.112 + * We may assume len >= 0.  NOTE THAT THIS WRITES len+1 DIGITS.
   9.113 + */
   9.114 +static void
   9.115 +shl(register digit *p, register int len, register int sh)
   9.116 +{
   9.117 +	register int i;
   9.118 +
   9.119 +	for (i = 0; i < len; i++)
   9.120 +		p[i] = LHALF(p[i] << sh) | (p[i + 1] >> (HALF_BITS - sh));
   9.121 +	p[i] = LHALF(p[i] << sh);
   9.122 +}
   9.123 +
   9.124 +/*
   9.125 + * __qdivrem(u, v, rem) returns u/v and, optionally, sets *rem to u%v.
   9.126 + *
   9.127 + * We do this in base 2-sup-HALF_BITS, so that all intermediate products
   9.128 + * fit within u_long.  As a consequence, the maximum length dividend and
   9.129 + * divisor are 4 `digits' in this base (they are shorter if they have
   9.130 + * leading zeros).
   9.131 + */
   9.132 +u64
   9.133 +__qdivrem(uq, vq, arq)
   9.134 +	u64 uq, vq, *arq;
   9.135 +{
   9.136 +	union uu tmp;
   9.137 +	digit *u, *v, *q;
   9.138 +	register digit v1, v2;
   9.139 +	u_long qhat, rhat, t;
   9.140 +	int m, n, d, j, i;
   9.141 +	digit uspace[5], vspace[5], qspace[5];
   9.142 +
   9.143 +	/*
   9.144 +	 * Take care of special cases: divide by zero, and u < v.
   9.145 +	 */
   9.146 +	if (vq == 0) {
   9.147 +		/* divide by zero. */
   9.148 +		static volatile const unsigned int zero = 0;
   9.149 +
   9.150 +		tmp.ul[H] = tmp.ul[L] = 1 / zero;
   9.151 +		if (arq)
   9.152 +			*arq = uq;
   9.153 +		return (tmp.q);
   9.154 +	}
   9.155 +	if (uq < vq) {
   9.156 +		if (arq)
   9.157 +			*arq = uq;
   9.158 +		return (0);
   9.159 +	}
   9.160 +	u = &uspace[0];
   9.161 +	v = &vspace[0];
   9.162 +	q = &qspace[0];
   9.163 +
   9.164 +	/*
   9.165 +	 * Break dividend and divisor into digits in base B, then
   9.166 +	 * count leading zeros to determine m and n.  When done, we
   9.167 +	 * will have:
   9.168 +	 *	u = (u[1]u[2]...u[m+n]) sub B
   9.169 +	 *	v = (v[1]v[2]...v[n]) sub B
   9.170 +	 *	v[1] != 0
   9.171 +	 *	1 < n <= 4 (if n = 1, we use a different division algorithm)
   9.172 +	 *	m >= 0 (otherwise u < v, which we already checked)
   9.173 +	 *	m + n = 4
   9.174 +	 * and thus
   9.175 +	 *	m = 4 - n <= 2
   9.176 +	 */
   9.177 +	tmp.uq = uq;
   9.178 +	u[0] = 0;
   9.179 +	u[1] = HHALF(tmp.ul[H]);
   9.180 +	u[2] = LHALF(tmp.ul[H]);
   9.181 +	u[3] = HHALF(tmp.ul[L]);
   9.182 +	u[4] = LHALF(tmp.ul[L]);
   9.183 +	tmp.uq = vq;
   9.184 +	v[1] = HHALF(tmp.ul[H]);
   9.185 +	v[2] = LHALF(tmp.ul[H]);
   9.186 +	v[3] = HHALF(tmp.ul[L]);
   9.187 +	v[4] = LHALF(tmp.ul[L]);
   9.188 +	for (n = 4; v[1] == 0; v++) {
   9.189 +		if (--n == 1) {
   9.190 +			u_long rbj;	/* r*B+u[j] (not root boy jim) */
   9.191 +			digit q1, q2, q3, q4;
   9.192 +
   9.193 +			/*
   9.194 +			 * Change of plan, per exercise 16.
   9.195 +			 *	r = 0;
   9.196 +			 *	for j = 1..4:
   9.197 +			 *		q[j] = floor((r*B + u[j]) / v),
   9.198 +			 *		r = (r*B + u[j]) % v;
   9.199 +			 * We unroll this completely here.
   9.200 +			 */
   9.201 +			t = v[2];	/* nonzero, by definition */
   9.202 +			q1 = u[1] / t;
   9.203 +			rbj = COMBINE(u[1] % t, u[2]);
   9.204 +			q2 = rbj / t;
   9.205 +			rbj = COMBINE(rbj % t, u[3]);
   9.206 +			q3 = rbj / t;
   9.207 +			rbj = COMBINE(rbj % t, u[4]);
   9.208 +			q4 = rbj / t;
   9.209 +			if (arq)
   9.210 +				*arq = rbj % t;
   9.211 +			tmp.ul[H] = COMBINE(q1, q2);
   9.212 +			tmp.ul[L] = COMBINE(q3, q4);
   9.213 +			return (tmp.q);
   9.214 +		}
   9.215 +	}
   9.216 +
   9.217 +	/*
   9.218 +	 * By adjusting q once we determine m, we can guarantee that
   9.219 +	 * there is a complete four-digit quotient at &qspace[1] when
   9.220 +	 * we finally stop.
   9.221 +	 */
   9.222 +	for (m = 4 - n; u[1] == 0; u++)
   9.223 +		m--;
   9.224 +	for (i = 4 - m; --i >= 0;)
   9.225 +		q[i] = 0;
   9.226 +	q += 4 - m;
   9.227 +
   9.228 +	/*
   9.229 +	 * Here we run Program D, translated from MIX to C and acquiring
   9.230 +	 * a few minor changes.
   9.231 +	 *
   9.232 +	 * D1: choose multiplier 1 << d to ensure v[1] >= B/2.
   9.233 +	 */
   9.234 +	d = 0;
   9.235 +	for (t = v[1]; t < B / 2; t <<= 1)
   9.236 +		d++;
   9.237 +	if (d > 0) {
   9.238 +		shl(&u[0], m + n, d);		/* u <<= d */
   9.239 +		shl(&v[1], n - 1, d);		/* v <<= d */
   9.240 +	}
   9.241 +	/*
   9.242 +	 * D2: j = 0.
   9.243 +	 */
   9.244 +	j = 0;
   9.245 +	v1 = v[1];	/* for D3 -- note that v[1..n] are constant */
   9.246 +	v2 = v[2];	/* for D3 */
   9.247 +	do {
   9.248 +		register digit uj0, uj1, uj2;
   9.249 +
   9.250 +		/*
   9.251 +		 * D3: Calculate qhat (\^q, in TeX notation).
   9.252 +		 * Let qhat = min((u[j]*B + u[j+1])/v[1], B-1), and
   9.253 +		 * let rhat = (u[j]*B + u[j+1]) mod v[1].
   9.254 +		 * While rhat < B and v[2]*qhat > rhat*B+u[j+2],
   9.255 +		 * decrement qhat and increase rhat correspondingly.
   9.256 +		 * Note that if rhat >= B, v[2]*qhat < rhat*B.
   9.257 +		 */
   9.258 +		uj0 = u[j + 0];	/* for D3 only -- note that u[j+...] change */
   9.259 +		uj1 = u[j + 1];	/* for D3 only */
   9.260 +		uj2 = u[j + 2];	/* for D3 only */
   9.261 +		if (uj0 == v1) {
   9.262 +			qhat = B;
   9.263 +			rhat = uj1;
   9.264 +			goto qhat_too_big;
   9.265 +		} else {
   9.266 +			u_long nn = COMBINE(uj0, uj1);
   9.267 +			qhat = nn / v1;
   9.268 +			rhat = nn % v1;
   9.269 +		}
   9.270 +		while (v2 * qhat > COMBINE(rhat, uj2)) {
   9.271 +	qhat_too_big:
   9.272 +			qhat--;
   9.273 +			if ((rhat += v1) >= B)
   9.274 +				break;
   9.275 +		}
   9.276 +		/*
   9.277 +		 * D4: Multiply and subtract.
   9.278 +		 * The variable `t' holds any borrows across the loop.
   9.279 +		 * We split this up so that we do not require v[0] = 0,
   9.280 +		 * and to eliminate a final special case.
   9.281 +		 */
   9.282 +		for (t = 0, i = n; i > 0; i--) {
   9.283 +			t = u[i + j] - v[i] * qhat - t;
   9.284 +			u[i + j] = LHALF(t);
   9.285 +			t = (B - HHALF(t)) & (B - 1);
   9.286 +		}
   9.287 +		t = u[j] - t;
   9.288 +		u[j] = LHALF(t);
   9.289 +		/*
   9.290 +		 * D5: test remainder.
   9.291 +		 * There is a borrow if and only if HHALF(t) is nonzero;
   9.292 +		 * in that (rare) case, qhat was too large (by exactly 1).
   9.293 +		 * Fix it by adding v[1..n] to u[j..j+n].
   9.294 +		 */
   9.295 +		if (HHALF(t)) {
   9.296 +			qhat--;
   9.297 +			for (t = 0, i = n; i > 0; i--) { /* D6: add back. */
   9.298 +				t += u[i + j] + v[i];
   9.299 +				u[i + j] = LHALF(t);
   9.300 +				t = HHALF(t);
   9.301 +			}
   9.302 +			u[j] = LHALF(u[j] + t);
   9.303 +		}
   9.304 +		q[j] = qhat;
   9.305 +	} while (++j <= m);		/* D7: loop on j. */
   9.306 +
   9.307 +	/*
   9.308 +	 * If caller wants the remainder, we have to calculate it as
   9.309 +	 * u[m..m+n] >> d (this is at most n digits and thus fits in
   9.310 +	 * u[m+1..m+n], but we may need more source digits).
   9.311 +	 */
   9.312 +	if (arq) {
   9.313 +		if (d) {
   9.314 +			for (i = m + n; i > m; --i)
   9.315 +				u[i] = (u[i] >> d) |
   9.316 +				    LHALF(u[i - 1] << (HALF_BITS - d));
   9.317 +			u[i] = 0;
   9.318 +		}
   9.319 +		tmp.ul[H] = COMBINE(uspace[1], uspace[2]);
   9.320 +		tmp.ul[L] = COMBINE(uspace[3], uspace[4]);
   9.321 +		*arq = tmp.q;
   9.322 +	}
   9.323 +
   9.324 +	tmp.ul[H] = COMBINE(qspace[1], qspace[2]);
   9.325 +	tmp.ul[L] = COMBINE(qspace[3], qspace[4]);
   9.326 +	return (tmp.q);
   9.327 +}
   9.328 +
   9.329 +
   9.330 +/*
   9.331 + * Divide two signed quads.
   9.332 + * ??? if -1/2 should produce -1 on this machine, this code is wrong
   9.333 + */
   9.334 +s64
   9.335 +__divdi3(s64 a, s64 b)
   9.336 +{
   9.337 +	u64 ua, ub, uq;
   9.338 +	int neg;
   9.339 +
   9.340 +	if (a < 0)
   9.341 +		ua = -(u64)a, neg = 1;
   9.342 +	else
   9.343 +		ua = a, neg = 0;
   9.344 +	if (b < 0)
   9.345 +		ub = -(u64)b, neg ^= 1;
   9.346 +	else
   9.347 +		ub = b;
   9.348 +	uq = __qdivrem(ua, ub, (u64 *)0);
   9.349 +	return (neg ? -uq : uq);
   9.350 +}
   9.351 +
   9.352 +/*
   9.353 + * Divide two unsigned quads.
   9.354 + */
   9.355 +u64
   9.356 +__udivdi3(a, b)
   9.357 +        u64 a, b;
   9.358 +{
   9.359 +
   9.360 +        return (__qdivrem(a, b, (u64 *)0));
   9.361 +}
    10.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    10.2 +++ b/xen-2.4.16/common/schedule.c	Sat Feb 01 17:27:13 2003 +0000
    10.3 @@ -0,0 +1,345 @@
    10.4 +/* -*-  Mode:C; c-basic-offset:4; tab-width:4 -*-
    10.5 + ****************************************************************************
    10.6 + * (C) 2002 - Rolf Neugebauer - Intel Research Cambridge
    10.7 + ****************************************************************************
    10.8 + *
    10.9 + *        File: schedule.c
   10.10 + *      Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk)
   10.11 + *     Changes: 
   10.12 + *              
   10.13 + *        Date: Nov 2002
   10.14 + * 
   10.15 + * Environment: Xen Hypervisor
   10.16 + * Description: CPU scheduling
   10.17 + *				partially moved from domain.c
   10.18 + *
   10.19 + ****************************************************************************
   10.20 + * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $
   10.21 + ****************************************************************************
   10.22 + */
   10.23 +
   10.24 +#include <xeno/config.h>
   10.25 +#include <xeno/init.h>
   10.26 +#include <xeno/lib.h>
   10.27 +#include <xeno/sched.h>
   10.28 +#include <xeno/delay.h>
   10.29 +#include <xeno/event.h>
   10.30 +#include <xeno/time.h>
   10.31 +#include <xeno/ac_timer.h>
   10.32 +
   10.33 +#undef SCHEDULER_TRACE
   10.34 +#ifdef SCHEDULER_TRACE
   10.35 +#define TRC(_x) _x
   10.36 +#else
   10.37 +#define TRC(_x)
   10.38 +#endif
   10.39 +
   10.40 +/*
   10.41 + * per CPU data for the scheduler.
   10.42 + */
   10.43 +typedef struct schedule_data_st
   10.44 +{
   10.45 +    spinlock_t lock;
   10.46 +    struct list_head runqueue;
   10.47 +    struct task_struct *prev, *curr;
   10.48 +} __cacheline_aligned schedule_data_t;
   10.49 +schedule_data_t schedule_data[NR_CPUS];
   10.50 +
   10.51 +static __cacheline_aligned struct ac_timer s_timer[NR_CPUS];
   10.52 +
   10.53 +/*
   10.54 + * Some convenience functions
   10.55 + */
   10.56 +
   10.57 +static inline void __add_to_runqueue(struct task_struct * p)
   10.58 +{
   10.59 +    list_add(&p->run_list, &schedule_data[p->processor].runqueue);
   10.60 +}
   10.61 +
   10.62 +static inline void __move_last_runqueue(struct task_struct * p)
   10.63 +{
   10.64 +    list_del(&p->run_list);
   10.65 +    list_add_tail(&p->run_list, &schedule_data[p->processor].runqueue);
   10.66 +}
   10.67 +
   10.68 +static inline void __move_first_runqueue(struct task_struct * p)
   10.69 +{
   10.70 +    list_del(&p->run_list);
   10.71 +    list_add(&p->run_list, &schedule_data[p->processor].runqueue);
   10.72 +}
   10.73 +
   10.74 +static inline void __del_from_runqueue(struct task_struct * p)
   10.75 +{
   10.76 +    list_del(&p->run_list);
   10.77 +    p->run_list.next = NULL;
   10.78 +}
   10.79 +
   10.80 +static inline int __task_on_runqueue(struct task_struct *p)
   10.81 +{
   10.82 +    return (p->run_list.next != NULL);
   10.83 +}
   10.84 +
   10.85 +
   10.86 +/*
   10.87 + * Add a new domain to the scheduler
   10.88 + */
   10.89 +void sched_add_domain(struct task_struct *p) 
   10.90 +{
   10.91 +    p->state      = TASK_UNINTERRUPTIBLE;
   10.92 +}
   10.93 +
   10.94 +/*
   10.95 + * Remove domain to the scheduler
   10.96 + */
   10.97 +void sched_rem_domain(struct task_struct *p) 
   10.98 +{
   10.99 +	p->state = TASK_DYING;
  10.100 +}
  10.101 +
  10.102 +
  10.103 +/*
  10.104 + * wake up a domain which had been sleeping
  10.105 + */
  10.106 +int wake_up(struct task_struct *p)
  10.107 +{
  10.108 +    unsigned long flags;
  10.109 +    int ret = 0;
  10.110 +    spin_lock_irqsave(&schedule_data[p->processor].lock, flags);
  10.111 +    if ( __task_on_runqueue(p) ) goto out;
  10.112 +    p->state = TASK_RUNNING;
  10.113 +    __add_to_runqueue(p);
  10.114 +    ret = 1;
  10.115 +
  10.116 + out:
  10.117 +    spin_unlock_irqrestore(&schedule_data[p->processor].lock, flags);
  10.118 +    return ret;
  10.119 +}
  10.120 +
  10.121 +static void process_timeout(unsigned long __data)
  10.122 +{
  10.123 +    struct task_struct * p = (struct task_struct *) __data;
  10.124 +    wake_up(p);
  10.125 +}
  10.126 +
  10.127 +long schedule_timeout(long timeout)
  10.128 +{
  10.129 +    struct timer_list timer;
  10.130 +    unsigned long expire;
  10.131 +    
  10.132 +    switch (timeout)
  10.133 +    {
  10.134 +    case MAX_SCHEDULE_TIMEOUT:
  10.135 +        /*
  10.136 +         * These two special cases are useful to be comfortable in the caller.
  10.137 +         * Nothing more. We could take MAX_SCHEDULE_TIMEOUT from one of the
  10.138 +         * negative value but I' d like to return a valid offset (>=0) to allow
  10.139 +         * the caller to do everything it want with the retval.
  10.140 +         */
  10.141 +        schedule();
  10.142 +        goto out;
  10.143 +    default:
  10.144 +        /*
  10.145 +         * Another bit of PARANOID. Note that the retval will be 0 since no
  10.146 +         * piece of kernel is supposed to do a check for a negative retval of
  10.147 +         * schedule_timeout() (since it should never happens anyway). You just
  10.148 +         * have the printk() that will tell you if something is gone wrong and
  10.149 +         * where.
  10.150 +         */
  10.151 +        if (timeout < 0)
  10.152 +        {
  10.153 +            printk(KERN_ERR "schedule_timeout: wrong timeout "
  10.154 +                   "value %lx from %p\n", timeout,
  10.155 +                   __builtin_return_address(0));
  10.156 +            current->state = TASK_RUNNING;
  10.157 +            goto out;
  10.158 +        }
  10.159 +    }
  10.160 +    
  10.161 +    expire = timeout + jiffies;
  10.162 +    
  10.163 +    init_timer(&timer);
  10.164 +    timer.expires = expire;
  10.165 +    timer.data = (unsigned long) current;
  10.166 +    timer.function = process_timeout;
  10.167 +    
  10.168 +    add_timer(&timer);
  10.169 +    schedule();
  10.170 +    del_timer_sync(&timer);
  10.171 +    
  10.172 +    timeout = expire - jiffies;
  10.173 +    
  10.174 + out:
  10.175 +    return timeout < 0 ? 0 : timeout;
  10.176 +}
  10.177 +
  10.178 +/* RN: XXX turn this into do_halt() */
  10.179 +/*
  10.180 + * yield the current process
  10.181 + */
  10.182 +long do_sched_op(void)
  10.183 +{
  10.184 +    current->state = TASK_INTERRUPTIBLE;
  10.185 +    schedule();
  10.186 +    return 0;
  10.187 +}
  10.188 +
  10.189 +/*
  10.190 + * 
  10.191 + */
  10.192 +void reschedule(struct task_struct *p)
  10.193 +{
  10.194 +    int cpu = p->processor;
  10.195 +    struct task_struct *curr;
  10.196 +    unsigned long flags;
  10.197 +
  10.198 +    if (p->has_cpu)
  10.199 +		return;
  10.200 +
  10.201 +    spin_lock_irqsave(&schedule_data[cpu].lock, flags);
  10.202 +    curr = schedule_data[cpu].curr;
  10.203 +    if (is_idle_task(curr)) {
  10.204 +        set_bit(_HYP_EVENT_NEED_RESCHED, &curr->hyp_events);
  10.205 +        spin_unlock_irqrestore(&schedule_data[cpu].lock, flags);
  10.206 +#ifdef CONFIG_SMP
  10.207 +        if (cpu != smp_processor_id())
  10.208 +			smp_send_event_check_cpu(cpu);
  10.209 +#endif
  10.210 +    } else {
  10.211 +        spin_unlock_irqrestore(&schedule_data[cpu].lock, flags);
  10.212 +    }
  10.213 +}
  10.214 +
  10.215 +
  10.216 +/*
  10.217 + * Pick the next domain to run
  10.218 + */
  10.219 +
  10.220 +asmlinkage void schedule(void)
  10.221 +{
  10.222 +    struct task_struct *prev, *next, *p;
  10.223 +    struct list_head *tmp;
  10.224 +    int this_cpu;
  10.225 +
  10.226 + need_resched_back:
  10.227 +    prev = current;
  10.228 +    this_cpu = prev->processor;
  10.229 +
  10.230 +    spin_lock_irq(&schedule_data[this_cpu].lock);
  10.231 +
  10.232 +    //ASSERT(!in_interrupt());
  10.233 +    ASSERT(__task_on_runqueue(prev));
  10.234 +
  10.235 +	__move_last_runqueue(prev);
  10.236 +
  10.237 +    switch ( prev->state )
  10.238 +    {
  10.239 +    case TASK_INTERRUPTIBLE:
  10.240 +        if ( signal_pending(prev) )
  10.241 +        {
  10.242 +            prev->state = TASK_RUNNING;
  10.243 +            break;
  10.244 +        }
  10.245 +    default:
  10.246 +        __del_from_runqueue(prev);
  10.247 +    case TASK_RUNNING:;
  10.248 +    }
  10.249 +    clear_bit(_HYP_EVENT_NEED_RESCHED, &prev->hyp_events);
  10.250 +
  10.251 +    next = NULL;
  10.252 +    list_for_each(tmp, &schedule_data[smp_processor_id()].runqueue) {
  10.253 +        p = list_entry(tmp, struct task_struct, run_list);
  10.254 +        next = p;
  10.255 +        break;
  10.256 +    }
  10.257 +
  10.258 +    prev->has_cpu = 0;
  10.259 +    next->has_cpu = 1;
  10.260 +
  10.261 +    schedule_data[this_cpu].prev = prev;
  10.262 +    schedule_data[this_cpu].curr = next;
  10.263 +
  10.264 +    spin_unlock_irq(&schedule_data[this_cpu].lock);
  10.265 +
  10.266 +    if ( unlikely(prev == next) )
  10.267 +    {
  10.268 +        /* We won't go through the normal tail, so do this by hand */
  10.269 +        prev->policy &= ~SCHED_YIELD;
  10.270 +        goto same_process;
  10.271 +    }
  10.272 +
  10.273 +    prepare_to_switch();
  10.274 +    switch_to(prev, next);
  10.275 +    prev = schedule_data[this_cpu].prev;
  10.276 +    
  10.277 +    prev->policy &= ~SCHED_YIELD;
  10.278 +    if ( prev->state == TASK_DYING ) release_task(prev);
  10.279 +
  10.280 + same_process:
  10.281 +
  10.282 +	update_dom_time(next->shared_info);
  10.283 +
  10.284 +    if ( test_bit(_HYP_EVENT_NEED_RESCHED, &current->hyp_events) )
  10.285 +        goto need_resched_back;
  10.286 +    return;
  10.287 +}
  10.288 +
  10.289 +static __cacheline_aligned int count[NR_CPUS];
  10.290 +static void sched_timer(unsigned long foo)
  10.291 +{
  10.292 +	int 				cpu  = smp_processor_id();
  10.293 +    struct task_struct *curr = schedule_data[cpu].curr;
  10.294 +	s_time_t			now;
  10.295 +	int 				res;
  10.296 +
  10.297 +	if (count[cpu] >= 5) {
  10.298 +		set_bit(_HYP_EVENT_NEED_RESCHED, &curr->hyp_events);
  10.299 +		count[cpu] = 0;
  10.300 +	}
  10.301 +	count[cpu]++;
  10.302 +
  10.303 + again:
  10.304 +	now = NOW();
  10.305 +	s_timer[cpu].expires  = now + MILLISECS(10);
  10.306 +
  10.307 + 	TRC(printk("SCHED[%02d] timer(): now=0x%08X%08X timo=0x%08X%08X\n",
  10.308 + 			   cpu, (u32)(now>>32), (u32)now,
  10.309 + 			   (u32)(s_timer[cpu].expires>>32), (u32)s_timer[cpu].expires));
  10.310 +	res=add_ac_timer(&s_timer[cpu]);
  10.311 +	if (res==1) {
  10.312 +		goto again;
  10.313 +	}
  10.314 +}
  10.315 +/*
  10.316 + * Initialise the data structures
  10.317 + */
  10.318 +void __init scheduler_init(void)
  10.319 +{
  10.320 +    int i;
  10.321 +
  10.322 +	printk("Initialising schedulers\n");
  10.323 +
  10.324 +    for ( i = 0; i < NR_CPUS; i++ )
  10.325 +    {
  10.326 +        INIT_LIST_HEAD(&schedule_data[i].runqueue);
  10.327 +        spin_lock_init(&schedule_data[i].lock);
  10.328 +        schedule_data[i].prev = &idle0_task;
  10.329 +        schedule_data[i].curr = &idle0_task;
  10.330 +		
  10.331 +		/* a timer for each CPU  */
  10.332 +		init_ac_timer(&s_timer[i]);
  10.333 +		s_timer[i].function = &sched_timer;
  10.334 +    }
  10.335 +}
  10.336 +
  10.337 +/*
  10.338 + * Start a scheduler for each CPU
  10.339 + * This has to be done *after* the timers, e.g., APICs, have been initialised
  10.340 + */
  10.341 +void schedulers_start(void) {
  10.342 +	
  10.343 +	printk("Start schedulers\n");
  10.344 +	__cli();
  10.345 +	sched_timer(0);
  10.346 +	smp_call_function(sched_timer, (void*)0, 1, 1);
  10.347 +	__sti();
  10.348 +}
    11.1 --- a/xen-2.4.16/common/timer.c	Fri Jan 31 15:03:37 2003 +0000
    11.2 +++ b/xen-2.4.16/common/timer.c	Sat Feb 01 17:27:13 2003 +0000
    11.3 @@ -602,7 +602,6 @@ void do_timer(struct pt_regs *regs)
    11.4      p = &idle0_task;
    11.5      do {
    11.6          s = p->shared_info;
    11.7 -        s->wall_time = s->domain_time = wall;
    11.8          cpu_mask |= mark_guest_event(p, _EVENT_TIMER);
    11.9      }
   11.10      while ( (p = p->next_task) != &idle0_task );
    12.1 --- a/xen-2.4.16/include/asm-i386/apic.h	Fri Jan 31 15:03:37 2003 +0000
    12.2 +++ b/xen-2.4.16/include/asm-i386/apic.h	Sat Feb 01 17:27:13 2003 +0000
    12.3 @@ -55,17 +55,14 @@ static inline void ack_APIC_irq(void)
    12.4  }
    12.5  
    12.6  extern int get_maxlvt(void);
    12.7 -extern void clear_local_APIC(void);
    12.8  extern void connect_bsp_APIC (void);
    12.9  extern void disconnect_bsp_APIC (void);
   12.10  extern void disable_local_APIC (void);
   12.11  extern int verify_local_APIC (void);
   12.12 -extern void cache_APIC_registers (void);
   12.13  extern void sync_Arb_IDs (void);
   12.14  extern void init_bsp_APIC (void);
   12.15  extern void setup_local_APIC (void);
   12.16  extern void init_apic_mappings (void);
   12.17 -extern void smp_local_timer_interrupt (struct pt_regs * regs);
   12.18  extern void setup_APIC_clocks (void);
   12.19  extern int APIC_init_uniprocessor (void);
   12.20  
    13.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    13.2 +++ b/xen-2.4.16/include/asm-i386/time.h	Sat Feb 01 17:27:13 2003 +0000
    13.3 @@ -0,0 +1,80 @@
    13.4 +/* -*-  Mode:C; c-basic-offset:4; tab-width:4 -*-
    13.5 + ****************************************************************************
    13.6 + * (C) 2002 - Rolf Neugebauer - Intel Research Cambridge
    13.7 + ****************************************************************************
    13.8 + *
    13.9 + *        File: time.h
   13.10 + *      Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk)
   13.11 + *     Changes: 
   13.12 + *              
   13.13 + *        Date: Nov 2002
   13.14 + * 
   13.15 + * Environment: Xen Hypervisor
   13.16 + * Description: Architecture dependent definition of time variables
   13.17 + *
   13.18 + ****************************************************************************
   13.19 + * $Id: h-insert.h,v 1.4 2002/11/08 16:03:55 rn Exp $
   13.20 + ****************************************************************************
   13.21 + */
   13.22 +
   13.23 +#ifndef _ASM_TIME_H_
   13.24 +#define _ASM_TIME_H_
   13.25 +
   13.26 +#include <asm/types.h>
   13.27 +#include <asm/msr.h>
   13.28 +
   13.29 +/*
   13.30 + * Cycle Counter Time
   13.31 + */
   13.32 +typedef u64 cc_time_t;
   13.33 +static inline cc_time_t get_cc_time()
   13.34 +{
   13.35 +	u64 ret;
   13.36 +	rdtscll(ret);
   13.37 +	return ret;
   13.38 +}
   13.39 +
   13.40 +/*
   13.41 + * System Time
   13.42 + */
   13.43 +typedef s64      s_time_t;	   /* System time */
   13.44 +extern  u32      stime_pcc;    /* cycle counter value at last timer irq */
   13.45 +extern  s_time_t stime_now;    /* time in ns at last timer IRQ */
   13.46 +extern  u32      stime_scale;  /* scale factur for converting cc to ns */
   13.47 +
   13.48 +
   13.49 +/*
   13.50 + * This is the Nemesis implementation.
   13.51 + * The variables are all set in apic.c
   13.52 + * Every timer IRQ time_now and time_pcc is set to the current values
   13.53 + * At callibration time_scale is set
   13.54 + */
   13.55 +static s_time_t get_s_time(void)
   13.56 +{
   13.57 +    u32 	 delta, low, pcc;
   13.58 +	s_time_t now;
   13.59 +	s_time_t incr;
   13.60 +
   13.61 +	/* read two values (pcc, now) "atomically" */
   13.62 +again:
   13.63 +    pcc = stime_pcc;		
   13.64 +    now = stime_now;
   13.65 +	if (stime_pcc != pcc) goto again;
   13.66 +
   13.67 +    /* only use bottom 32bits of TSC. This should be sufficient */
   13.68 +	rdtscl(low);
   13.69 +    delta = low - pcc;
   13.70 +
   13.71 +	incr = ((s_time_t)(stime_scale) * delta) >> 10;
   13.72 +    return now + incr; 
   13.73 +}
   13.74 +
   13.75 +/* update time variables once in a while */
   13.76 +extern void update_time(void);
   13.77 +
   13.78 +/*
   13.79 + * Domain Virtual Time
   13.80 + */
   13.81 +typedef u64 dv_time_t;
   13.82 +
   13.83 +#endif /* _ASM_TIME_H_ */
    14.1 --- a/xen-2.4.16/include/hypervisor-ifs/hypervisor-if.h	Fri Jan 31 15:03:37 2003 +0000
    14.2 +++ b/xen-2.4.16/include/hypervisor-ifs/hypervisor-if.h	Sat Feb 01 17:27:13 2003 +0000
    14.3 @@ -155,36 +155,33 @@ typedef struct shared_info_st {
    14.4       */
    14.5      unsigned long failsafe_address;
    14.6  
    14.7 -    /*
    14.8 -     * CPU ticks since start of day.
    14.9 -     * `wall_time' counts CPU ticks in real time.
   14.10 -     * `domain_time' counts CPU ticks during which this domain has run.
   14.11 +	/*
   14.12 +     * Time:
   14.13 +     * The following abstractions are exposed: System Time, Wall Clock 
   14.14 +     * Time, Domain Virtual Time. Domains can access Cycle counter time
   14.15 +     * directly. 
   14.16       */
   14.17 -    unsigned long ticks_per_ms; /* CPU ticks per millisecond */
   14.18 -    /*
   14.19 -     * Current wall_time can be found by rdtsc. Only possible use of
   14.20 -     * variable below is that it provides a timestamp for last update
   14.21 -     * of domain_time.
   14.22 -     */
   14.23 -    unsigned long long wall_time;
   14.24 -    unsigned long long domain_time;
   14.25  
   14.26 -    /*
   14.27 -     * Timeouts for points at which guest OS would like a callback.
   14.28 -     * This will probably be backed up by a timer heap in the guest OS.
   14.29 -     * In Linux we use timeouts to update 'jiffies'.
   14.30 +	/* System Time */
   14.31 +	long long          system_time;		/* in ns */
   14.32 +	unsigned long      st_timestamp;	/* cyclecounter at last update */
   14.33 +	unsigned long      ticks_per_ms;    /* CPU ticks per millisecond */
   14.34 +	/* Wall Clock Time */
   14.35 +	long	  tv_sec;					/* essentially a struct timeval */
   14.36 +	long	  tv_usec;
   14.37 +	long long wc_timestamp;				/* system time at last update */
   14.38 +
   14.39 +	/* Domain Virtual Time */
   14.40 +	unsigned long long domain_time;
   14.41 +	
   14.42 +	/*
   14.43 +     * Timeout values:
   14.44 +     * Allow a domain to specify a timeout value in system time and 
   14.45 +     * domain virtual time.
   14.46       */
   14.47      unsigned long long wall_timeout;
   14.48      unsigned long long domain_timeout;
   14.49  
   14.50 -    /*
   14.51 -     * Real-Time Clock. This shows time, in seconds, since 1.1.1980.
   14.52 -     * The timestamp shows the CPU 'wall time' when RTC was last read.
   14.53 -     * Thus it allows a mapping between 'real time' and 'wall time'.
   14.54 -     */
   14.55 -    unsigned long      rtc_time;
   14.56 -    unsigned long long rtc_timestamp;
   14.57 -
   14.58  } shared_info_t;
   14.59  
   14.60  /*
    15.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    15.2 +++ b/xen-2.4.16/include/xeno/ac_timer.h	Sat Feb 01 17:27:13 2003 +0000
    15.3 @@ -0,0 +1,65 @@
    15.4 +/* -*-  Mode:C; c-basic-offset:4; tab-width:4 -*-
    15.5 + ****************************************************************************
    15.6 + * (C) 2002 - Rolf Neugebauer - Intel Research Cambridge
    15.7 + ****************************************************************************
    15.8 + *
    15.9 + *        File: ac_timer.h
   15.10 + *      Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk)
   15.11 + *     Changes: 
   15.12 + *              
   15.13 + *        Date: Nov 2002
   15.14 + * 
   15.15 + * Environment: Xen Hypervisor
   15.16 + * Description: Accurate timer for the Hypervisor
   15.17 + * 
   15.18 + ****************************************************************************
   15.19 + * $Id: h-insert.h,v 1.4 2002/11/08 16:03:55 rn Exp $
   15.20 + ****************************************************************************
   15.21 + */
   15.22 +
   15.23 +#ifndef _AC_TIMER_H_
   15.24 +#define _AC_TIMER_H_
   15.25 +
   15.26 +#include <time.h> /* include notion of time */
   15.27 +
   15.28 +/*
   15.29 + * The Xen Hypervisor provides two types of timers:
   15.30 + *
   15.31 + * - Linux style, jiffy based timers for legacy code and coarse grain timeouts
   15.32 + *   These are defined in ./include/xeno/timer.h and implemented in
   15.33 + *   ./common/timer.c. Unlike in Linux they are executed not on a periodic
   15.34 + *   timer interrupt but "occasionally" with somewhat lesser accuracy.
   15.35 + *  
   15.36 + * - accurate timers defined in this file and implemented in
   15.37 + *   ./common/ac_timer.c. These are implemented using a programmable timer
   15.38 + *   interrupt and are thus as accurate as the hardware allows. Where possible
   15.39 + *   we use the local APIC for this purpose. However, this fact is hidden
   15.40 + *   behind a architecture independent layer.
   15.41 + *   accurate timers are programmed using system time.
   15.42 + * 
   15.43 + * The interface to accurate timers is very similar to Linux timers with the
   15.44 + * exception that the expires value is not expressed in jiffies but in ns from
   15.45 + * boot time.  Its implementation however, is entirely different.
   15.46 + */
   15.47 +
   15.48 +struct ac_timer {
   15.49 +	struct list_head timer_list;
   15.50 +	s_time_t         expires;	/* system time time out value */
   15.51 +	unsigned long    data;
   15.52 +	void             (*function)(unsigned long);
   15.53 +};
   15.54 +
   15.55 +/* interface for "clients" */
   15.56 +extern int add_ac_timer(struct ac_timer *timer);
   15.57 +extern int rem_ac_timer(struct ac_timer *timer);
   15.58 +extern int mod_ac_timer(struct ac_timer *timer, s_time_t new_time);
   15.59 +static inline void init_ac_timer(struct ac_timer *timer)
   15.60 +{
   15.61 +	//timer->next = NULL;
   15.62 +}
   15.63 +
   15.64 +/* interface used by programmable timer, implemented hardware dependent */
   15.65 +extern int  reprogram_ac_timer(s_time_t timeout);
   15.66 +extern void do_ac_timer(void);
   15.67 +
   15.68 +#endif /* _AC_TIMER_H_ */
    16.1 --- a/xen-2.4.16/include/xeno/sched.h	Fri Jan 31 15:03:37 2003 +0000
    16.2 +++ b/xen-2.4.16/include/xeno/sched.h	Sat Feb 01 17:27:13 2003 +0000
    16.3 @@ -60,7 +60,8 @@ extern struct mm_struct init_mm;
    16.4  struct task_struct {
    16.5  
    16.6      int processor;
    16.7 -    int state, hyp_events;
    16.8 +    int state;
    16.9 +	int hyp_events;
   16.10      unsigned int domain;
   16.11  
   16.12      /* index into frame_table threading pages belonging to this
   16.13 @@ -105,11 +106,24 @@ struct task_struct {
   16.14      unsigned long flags;
   16.15  };
   16.16  
   16.17 +/*
   16.18 + * domain states 
   16.19 + * TASK_RUNNING:         Domain is runable and should be on a run queue
   16.20 + * TASK_INTERRUPTIBLE:   Domain is blocked by may be woken up by an event
   16.21 + *                       or expiring timer
   16.22 + * TASK_UNINTERRUPTIBLE: Domain is blocked but may not be woken up by an
   16.23 + *                       arbitrary event or timer.
   16.24 + * TASK_WAIT:            Domains CPU allocation expired.
   16.25 + * TASK_STOPPED:         not really used in Xen
   16.26 + * TASK_DYING:           Domain is about to cross over to the land of the dead.
   16.27 + */
   16.28 +
   16.29  #define TASK_RUNNING            0
   16.30  #define TASK_INTERRUPTIBLE      1
   16.31  #define TASK_UNINTERRUPTIBLE    2
   16.32 -#define TASK_STOPPED            4
   16.33 -#define TASK_DYING              8
   16.34 +#define TASK_WAIT				4
   16.35 +#define TASK_DYING              16
   16.36 +/* #define TASK_STOPPED            8  not really used */
   16.37  
   16.38  #define SCHED_YIELD             0x10
   16.39  
   16.40 @@ -171,52 +185,23 @@ extern void free_irq(unsigned int, void 
   16.41  extern unsigned long wait_init_idle;
   16.42  #define init_idle() clear_bit(smp_processor_id(), &wait_init_idle);
   16.43  
   16.44 -#define set_current_state(_s) do { current->state = (_s); } while (0)
   16.45 -#define MAX_SCHEDULE_TIMEOUT LONG_MAX
   16.46 -long schedule_timeout(long timeout);
   16.47 -asmlinkage void schedule(void);
   16.48 -
   16.49 -void reschedule(struct task_struct *p);
   16.50 -
   16.51 -typedef struct schedule_data_st
   16.52 -{
   16.53 -    spinlock_t lock;
   16.54 -    struct list_head runqueue;
   16.55 -    struct task_struct *prev, *curr;
   16.56 -} __cacheline_aligned schedule_data_t;
   16.57 -extern schedule_data_t schedule_data[NR_CPUS];
   16.58 -
   16.59 -static inline void __add_to_runqueue(struct task_struct * p)
   16.60 -{
   16.61 -    list_add(&p->run_list, &schedule_data[p->processor].runqueue);
   16.62 -}
   16.63  
   16.64  
   16.65 -static inline void __move_last_runqueue(struct task_struct * p)
   16.66 -{
   16.67 -    list_del(&p->run_list);
   16.68 -    list_add_tail(&p->run_list, &schedule_data[p->processor].runqueue);
   16.69 -}
   16.70 -
   16.71 -
   16.72 -static inline void __move_first_runqueue(struct task_struct * p)
   16.73 -{
   16.74 -    list_del(&p->run_list);
   16.75 -    list_add(&p->run_list, &schedule_data[p->processor].runqueue);
   16.76 -}
   16.77 +/*
   16.78 + * Scheduler functions (in schedule.c)
   16.79 + */
   16.80 +#define set_current_state(_s) do { current->state = (_s); } while (0)
   16.81 +#define MAX_SCHEDULE_TIMEOUT LONG_MAX
   16.82 +void scheduler_init(void);
   16.83 +void start_scheduler(void);
   16.84 +void sched_add_domain(struct task_struct *p);
   16.85 +void sched_rem_domain(struct task_struct *p);
   16.86 +int  wake_up(struct task_struct *p);
   16.87 +long schedule_timeout(long timeout);
   16.88 +long do_yield(void);
   16.89 +void reschedule(struct task_struct *p);
   16.90 +asmlinkage void schedule(void);
   16.91  
   16.92 -static inline void __del_from_runqueue(struct task_struct * p)
   16.93 -{
   16.94 -    list_del(&p->run_list);
   16.95 -    p->run_list.next = NULL;
   16.96 -}
   16.97 -
   16.98 -static inline int __task_on_runqueue(struct task_struct *p)
   16.99 -{
  16.100 -    return (p->run_list.next != NULL);
  16.101 -}
  16.102 -
  16.103 -int wake_up(struct task_struct *p);
  16.104  
  16.105  #define signal_pending(_p) ((_p)->hyp_events || \
  16.106                              (_p)->shared_info->events)
    17.1 --- a/xen-2.4.16/include/xeno/time.h	Fri Jan 31 15:03:37 2003 +0000
    17.2 +++ b/xen-2.4.16/include/xeno/time.h	Sat Feb 01 17:27:13 2003 +0000
    17.3 @@ -1,13 +1,61 @@
    17.4 -/******************************************************************************
    17.5 - * time.h
    17.6 +/* -*-  Mode:C; c-basic-offset:4; tab-width:4 -*-
    17.7 + ****************************************************************************
    17.8 + * (C) 2002 - Rolf Neugebauer - Intel Research Cambridge
    17.9 + ****************************************************************************
   17.10 + *
   17.11 + *        File: time.h
   17.12 + *      Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk)
   17.13 + *     Changes: 
   17.14 + *              
   17.15 + *        Date: Nov 2002
   17.16 + * 
   17.17 + * Environment: Xen Hypervisor
   17.18 + * Description: This file provides a one stop shop for all time related
   17.19 + *              issues within the hypervisor. 
   17.20 + * 
   17.21 + *              The Hypervisor provides the following notions of time:
   17.22 + *              Cycle Counter Time, System Time, Wall Clock Time, and 
   17.23 + *              Domain Virtual Time.
   17.24 + *
   17.25 + ****************************************************************************
   17.26 + * $Id: h-insert.h,v 1.4 2002/11/08 16:03:55 rn Exp $
   17.27 + ****************************************************************************
   17.28   */
   17.29  
   17.30 +
   17.31 +
   17.32  #ifndef __XENO_TIME_H__
   17.33  #define __XENO_TIME_H__
   17.34  
   17.35 +#include <asm/ptrace.h>  /* XXX Only used for do_timer which should be moved */
   17.36 +#include <asm/time.h>    /* pull in architecture specific time definition */
   17.37  #include <xeno/types.h>
   17.38 -#include <asm/ptrace.h>
   17.39 +
   17.40 +/*
   17.41 + * Cycle Counter Time (defined in asm/time.h)
   17.42 + */
   17.43 +
   17.44  
   17.45 +/*
   17.46 + * System Time
   17.47 + * 64 bit value containing the nanoseconds elapsed since boot time.
   17.48 + * This value is adjusted by frequency drift.
   17.49 + * NOW() returns the current time.
   17.50 + * The other macros are for convenience to approximate short intervals
   17.51 + * of real time into system time 
   17.52 + */
   17.53 +#define NOW()				((s_time_t)get_s_time())
   17.54 +#define SECONDS(_s)			(((s_time_t)(_s))  * 1000000000UL )
   17.55 +#define TENTHS(_ts)			(((s_time_t)(_ts)) * 100000000UL )
   17.56 +#define HUNDREDTHS(_hs)		(((s_time_t)(_hs)) * 10000000UL )
   17.57 +#define MILLISECS(_ms)		(((s_time_t)(_ms)) * 1000000UL )
   17.58 +#define MICROSECS(_us)		(((s_time_t)(_us)) * 1000UL )
   17.59 +#define Time_Max			((s_time_t) 0x7fffffffffffffffLL)
   17.60 +#define FOREVER				Time_Max
   17.61 +
   17.62 +/*
   17.63 + * Wall Clock Time
   17.64 + */
   17.65  struct timeval {
   17.66      long            tv_sec;         /* seconds */
   17.67      long            tv_usec;        /* microseconds */
   17.68 @@ -25,6 +73,13 @@ extern void get_fast_time(struct timeval
   17.69  extern void (*do_get_fast_time)(struct timeval *);
   17.70  #endif
   17.71  
   17.72 +/*
   17.73 + * Domain Virtual Time (defined in asm/time.h) 
   17.74 + */
   17.75 +/* XXX Interface for getting and setting still missing */
   17.76 +
   17.77 +
   17.78 +/* XXX move this  */
   17.79  extern void do_timer(struct pt_regs *regs);
   17.80  
   17.81  #endif /* __XENO_TIME_H__ */
    18.1 --- a/xen-2.4.16/include/xeno/timer.h	Fri Jan 31 15:03:37 2003 +0000
    18.2 +++ b/xen-2.4.16/include/xeno/timer.h	Sat Feb 01 17:27:13 2003 +0000
    18.3 @@ -12,10 +12,17 @@
    18.4   * The "data" field enables use of a common timeout function for several
    18.5   * timeouts. You can use this field to distinguish between the different
    18.6   * invocations.
    18.7 + *
    18.8 + * RN: Unlike the Linux timers, which are executed at the periodic timer
    18.9 + *     interrupt, in Xen, the timer list is only checked "occasionally", thus
   18.10 + *     its accuracy might be somewhat worse than under Linux. However, the
   18.11 + *     hypervisor should be purely event-driven and, in fact, in the current
   18.12 + *     implementation, timers are only used for watchdog purpose at a very
   18.13 + *     coarse granularity anyway. Thus this is not a problem.
   18.14   */
   18.15  struct timer_list {
   18.16  	struct list_head list;
   18.17 -	unsigned long expires;
   18.18 +	unsigned long expires;		/* jiffies */
   18.19  	unsigned long data;
   18.20  	void (*function)(unsigned long);
   18.21  };
    19.1 --- a/xenolinux-2.4.16-sparse/arch/xeno/kernel/i386_ksyms.c	Fri Jan 31 15:03:37 2003 +0000
    19.2 +++ b/xenolinux-2.4.16-sparse/arch/xeno/kernel/i386_ksyms.c	Sat Feb 01 17:27:13 2003 +0000
    19.3 @@ -42,7 +42,7 @@ extern struct drive_info_struct drive_in
    19.4  EXPORT_SYMBOL(drive_info);
    19.5  #endif
    19.6  
    19.7 -extern unsigned long get_cmos_time(void);
    19.8 +//extern unsigned long get_cmos_time(void);
    19.9  
   19.10  /* platform dependent support */
   19.11  EXPORT_SYMBOL(boot_cpu_data);
   19.12 @@ -58,7 +58,7 @@ EXPORT_SYMBOL(probe_irq_mask);
   19.13  EXPORT_SYMBOL(kernel_thread);
   19.14  EXPORT_SYMBOL(pm_idle);
   19.15  EXPORT_SYMBOL(pm_power_off);
   19.16 -EXPORT_SYMBOL(get_cmos_time);
   19.17 +//EXPORT_SYMBOL(get_cmos_time);
   19.18  EXPORT_SYMBOL(apm_info);
   19.19  
   19.20  #ifdef CONFIG_DEBUG_IOVIRT
    20.1 --- a/xenolinux-2.4.16-sparse/arch/xeno/kernel/process.c	Fri Jan 31 15:03:37 2003 +0000
    20.2 +++ b/xenolinux-2.4.16-sparse/arch/xeno/kernel/process.c	Sat Feb 01 17:27:13 2003 +0000
    20.3 @@ -86,7 +86,7 @@ void cpu_idle (void)
    20.4  
    20.5      while (1) {
    20.6          while (!current->need_resched)
    20.7 -            HYPERVISOR_yield();
    20.8 +            HYPERVISOR_do_sched_op(NULL);
    20.9          schedule();
   20.10          check_pgt_cache();
   20.11      }
    21.1 --- a/xenolinux-2.4.16-sparse/arch/xeno/kernel/time.c	Fri Jan 31 15:03:37 2003 +0000
    21.2 +++ b/xenolinux-2.4.16-sparse/arch/xeno/kernel/time.c	Sat Feb 01 17:27:13 2003 +0000
    21.3 @@ -102,47 +102,26 @@ static inline unsigned long ticks_to_us(
    21.4      return(hi);
    21.5  }
    21.6  
    21.7 -static inline unsigned long do_gettimeoffset(void)
    21.8 +static long long get_s_time(void)
    21.9  {
   21.10 -#if 0
   21.11 -    register unsigned long eax, edx;
   21.12 -
   21.13 -    /* Read the Time Stamp Counter */
   21.14 -
   21.15 -    rdtsc(eax,edx);
   21.16 -
   21.17 -    /* .. relative to previous jiffy (32 bits is enough) */
   21.18 -    eax -= last_tsc_low;	/* tsc_low delta */
   21.19 +    u32 	  delta, low, pcc;
   21.20 +	long long now;
   21.21 +	long long incr;
   21.22  
   21.23 -    /*
   21.24 -     * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient
   21.25 -     *             = (tsc_low delta) * (usecs_per_clock)
   21.26 -     *             = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy)
   21.27 -     *
   21.28 -     * Using a mull instead of a divl saves up to 31 clock cycles
   21.29 -     * in the critical path.
   21.30 -     */
   21.31 -    
   21.32 -    edx = ticks_to_us(eax);
   21.33 +	/* read two values (pcc, now) "atomically" */
   21.34 +again:
   21.35 +    pcc = HYPERVISOR_shared_info->st_timestamp;
   21.36 +    now = HYPERVISOR_shared_info->system_time;
   21.37 +	if (HYPERVISOR_shared_info->st_timestamp != pcc) goto again;
   21.38  
   21.39 -    /* our adjusted time offset in microseconds */
   21.40 -    return delay_at_last_interrupt + edx;
   21.41 -#else
   21.42 -    /*
   21.43 -     * We should keep a 'last_tsc_low' thing which incorporates 
   21.44 -     * delay_at_last_interrupt, adjusted in timer_interrupt after
   21.45 -     * do_timer_interrupt. It would look at change in xtime, and
   21.46 -     * make appropriate adjustment to a last_tsc variable.
   21.47 -     * 
   21.48 -     * We'd be affected by rounding error in ticks_per_usec, and by
   21.49 -     * processor clock drift (which should be no more than in an
   21.50 -     * external interrupt source anyhow). 
   21.51 -     * 
   21.52 -     * Perhaps a bit rough and ready, but never mind!
   21.53 -     */
   21.54 -    return 0;
   21.55 -#endif
   21.56 +    /* only use bottom 32bits of TSC. This should be sufficient */
   21.57 +	rdtscl(low);
   21.58 +    delta = low - pcc;
   21.59 +
   21.60 +	incr = ((long long)(ticks_to_us(delta)*1000));
   21.61 +    return now + incr; 
   21.62  }
   21.63 +#define NOW()				((long long)get_s_time())
   21.64  
   21.65  /*
   21.66   * This version of gettimeofday has microsecond resolution
   21.67 @@ -151,15 +130,15 @@ static inline unsigned long do_gettimeof
   21.68  void do_gettimeofday(struct timeval *tv)
   21.69  {
   21.70      unsigned long flags;
   21.71 -    unsigned long usec, sec, lost;
   21.72 +    unsigned long usec, sec;
   21.73  
   21.74      read_lock_irqsave(&xtime_lock, flags);
   21.75 -    usec = do_gettimeoffset();
   21.76 -    lost = jiffies - wall_jiffies;
   21.77 -    if ( lost != 0 ) usec += lost * (1000000 / HZ);
   21.78 -    sec = xtime.tv_sec;
   21.79 -    usec += xtime.tv_usec;
   21.80 -    read_unlock_irqrestore(&xtime_lock, flags);
   21.81 +
   21.82 +	usec  = ((unsigned long)(NOW()-HYPERVISOR_shared_info->wc_timestamp))/1000;
   21.83 +	sec   = HYPERVISOR_shared_info->tv_sec;
   21.84 +	usec += HYPERVISOR_shared_info->tv_usec;
   21.85 +
   21.86 +	read_unlock_irqrestore(&xtime_lock, flags);
   21.87  
   21.88      while ( usec >= 1000000 ) 
   21.89      {
   21.90 @@ -173,6 +152,8 @@ void do_gettimeofday(struct timeval *tv)
   21.91  
   21.92  void do_settimeofday(struct timeval *tv)
   21.93  {
   21.94 +/* XXX RN: shoudl do something special here for dom0 */
   21.95 +#if 0
   21.96      write_lock_irq(&xtime_lock);
   21.97      /*
   21.98       * This is revolting. We need to set "xtime" correctly. However, the
   21.99 @@ -195,6 +176,7 @@ void do_settimeofday(struct timeval *tv)
  21.100      time_maxerror = NTP_PHASE_LIMIT;
  21.101      time_esterror = NTP_PHASE_LIMIT;
  21.102      write_unlock_irq(&xtime_lock);
  21.103 +#endif
  21.104  }
  21.105  
  21.106  
  21.107 @@ -235,19 +217,6 @@ static struct irqaction irq_timer = {
  21.108  };
  21.109  
  21.110  
  21.111 -unsigned long get_cmos_time(void)
  21.112 -{
  21.113 -    unsigned long secs = HYPERVISOR_shared_info->rtc_time;
  21.114 -    unsigned long diff;
  21.115 -
  21.116 -    rdtscl(diff);
  21.117 -    diff -= (unsigned long)HYPERVISOR_shared_info->rtc_timestamp;
  21.118 -
  21.119 -    secs += ticks_to_us(diff);
  21.120 -
  21.121 -    return(secs + ticks_to_secs(diff));
  21.122 -}
  21.123 -
  21.124  
  21.125  /* Return 2^32 * (1 / (TSC clocks per usec)) for do_fast_gettimeoffset(). */
  21.126  static unsigned long __init calibrate_tsc(void)
  21.127 @@ -268,7 +237,6 @@ void __init time_init(void)
  21.128      unsigned long long alarm;
  21.129  	
  21.130      fast_gettimeoffset_quotient = calibrate_tsc();
  21.131 -    do_get_fast_time = do_gettimeofday;
  21.132  
  21.133      /* report CPU clock rate in Hz.
  21.134       * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
  21.135 @@ -299,6 +267,5 @@ void __init time_init(void)
  21.136      HYPERVISOR_shared_info->domain_timeout = ~0ULL;
  21.137      clear_bit(_EVENT_TIMER, &HYPERVISOR_shared_info->events);
  21.138  
  21.139 -    xtime.tv_sec = get_cmos_time();
  21.140 -    xtime.tv_usec = 0;
  21.141 +	do_gettimeofday(&xtime);
  21.142  }
    22.1 --- a/xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor.h	Fri Jan 31 15:03:37 2003 +0000
    22.2 +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor.h	Sat Feb 01 17:27:13 2003 +0000
    22.3 @@ -215,12 +215,13 @@ static inline int HYPERVISOR_fpu_taskswi
    22.4      return ret;
    22.5  }
    22.6  
    22.7 -static inline int HYPERVISOR_yield(void)
    22.8 +static inline int HYPERVISOR_do_sched_op(void *sched_op)
    22.9  {
   22.10      int ret;
   22.11      __asm__ __volatile__ (
   22.12          TRAP_INSTR
   22.13 -        : "=a" (ret) : "0" (__HYPERVISOR_yield) );
   22.14 +        : "=a" (ret) : "0" (__HYPERVISOR_sched_op),
   22.15 +        "b" (sched_op) );
   22.16  
   22.17      return ret;
   22.18  }