ia64/xen-unstable

changeset 302:18f05ecf3fef

bitkeeper revision 1.124 (3e71f8a4QvveKwitZNAJi1H3BJpPEQ)

ac_timer.c:
rewrite of do timer/add_timer + perfcounters
apic.c:
added perfcounter and try to disable APIC when no timeout value is zero.
irq.c:
count interrupts and cycles spent in them
sched.h:
added fields for BVT
schedule.c:
BVT without warping
keyhandler.c:
added handler for dumping run queues
moved handler for ac_timers here
.del-dom0_ops.h~f77c7a14cfa618f8:
Delete: tools/domain_builder/dom0_ops.h
author rn@wyvis.research.intel-research.net
date Fri Mar 14 15:43:32 2003 +0000 (2003-03-14)
parents 314d67fab116
children 4ace704a143f
files .rootkeys tools/domain_builder/dom0_ops.h xen/arch/i386/apic.c xen/arch/i386/irq.c xen/common/ac_timer.c xen/common/keyhandler.c xen/common/schedule.c xen/include/xeno/sched.h
line diff
     1.1 --- a/.rootkeys	Tue Mar 11 10:34:08 2003 +0000
     1.2 +++ b/.rootkeys	Fri Mar 14 15:43:32 2003 +0000
     1.3 @@ -182,7 +182,6 @@ 3e6377dbGcgnisKw16DPCaND7oGO3Q tools/bal
     1.4  3e4d00468-FN2VDeEHo96zxrMHK_mA tools/domain_builder/Makefile
     1.5  3e4d0046SPau_y0sw2WLJz8QkqNoRA tools/domain_builder/README
     1.6  3e4d0046bbdH0GsI9J_1Eb4ZQHfIiQ tools/domain_builder/dom0_defs.h
     1.7 -3e4d0046RgYCfGOw6qGz_7kYLMV2Vw tools/domain_builder/dom0_ops.h
     1.8  3e4d0046ouLij_CMN_j7-dUHZIBI_A tools/domain_builder/dom_builder.c
     1.9  3e4d0046EKs06fY0CWDEgZQcn7DYUg tools/domain_builder/dom_kill.c
    1.10  3e4d0046aPbGiRTtdWxqY5b3ytWurA tools/domain_builder/hypervisor_defs.h
     2.1 --- a/tools/domain_builder/dom0_ops.h	Tue Mar 11 10:34:08 2003 +0000
     2.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.3 @@ -1,81 +0,0 @@
     2.4 -/******************************************************************************
     2.5 - * dom0_ops.h
     2.6 - * 
     2.7 - * Process command requests from domain-0 guest OS.
     2.8 - * 
     2.9 - * Copyright (c) 2002, K A Fraser, B Dragovic
    2.10 - */
    2.11 -
    2.12 -#define DOM0_NEWDOMAIN   0
    2.13 -#define DOM0_KILLDOMAIN  1
    2.14 -#define DOM0_GETMEMLIST  2
    2.15 -#define DOM0_STARTDOM    4
    2.16 -#define MAP_DOM_MEM      6 /* Not passed down to Xen */
    2.17 -#define DO_PGUPDATES     7 /* Not passed down to Xen */
    2.18 -#define MAX_CMD          8
    2.19 -
    2.20 -#define MAX_CMD_LEN     256
    2.21 -
    2.22 -typedef struct dom0_newdomain_st
    2.23 -{
    2.24 -    unsigned int domain;
    2.25 -    unsigned int memory_kb;
    2.26 -    unsigned int num_vifs;  // temporary
    2.27 -    unsigned long pg_head;  // return parameter
    2.28 -} dom0_newdomain_t;
    2.29 -
    2.30 -typedef struct dom0_killdomain_st
    2.31 -{
    2.32 -    unsigned int domain;
    2.33 -    int          force;
    2.34 -} dom0_killdomain_t;
    2.35 -
    2.36 -typedef struct dom0_getmemlist_st
    2.37 -{
    2.38 -    unsigned long start_pfn;
    2.39 -    unsigned long num_pfns;
    2.40 -    void *buffer;
    2.41 -} dom0_getmemlist_t;
    2.42 -
    2.43 -/* This is entirely processed by XenoLinux */
    2.44 -typedef struct dom_mem 
    2.45 -{
    2.46 -    unsigned int domain;
    2.47 -    unsigned long vaddr;
    2.48 -    unsigned long start_pfn;
    2.49 -    int tot_pages;
    2.50 -} dom_mem_t;
    2.51 -
    2.52 -/* This is entirely processed by XenoLinux */
    2.53 -typedef struct dom_pgupdate
    2.54 -{
    2.55 -    unsigned long pgt_update_arr;
    2.56 -    unsigned long num_pgt_updates;
    2.57 -} dom_pgupdate_t;
    2.58 -
    2.59 -typedef struct domain_launch
    2.60 -{
    2.61 -    unsigned int domain;
    2.62 -    unsigned long l2_pgt_addr;
    2.63 -    unsigned long virt_load_addr;
    2.64 -    unsigned long virt_shinfo_addr;
    2.65 -    unsigned long virt_startinfo_addr;
    2.66 -    unsigned int num_vifs;
    2.67 -    char cmd_line[MAX_CMD_LEN];
    2.68 -} dom_meminfo_t;
    2.69 -
    2.70 -typedef struct dom0_op_st
    2.71 -{
    2.72 -    unsigned long cmd;
    2.73 -    union
    2.74 -    {
    2.75 -        dom0_newdomain_t newdomain;
    2.76 -        dom0_killdomain_t killdomain;
    2.77 -        dom0_getmemlist_t getmemlist;
    2.78 -        dom_mem_t dommem;
    2.79 -        dom_pgupdate_t pgupdate;
    2.80 -        dom_meminfo_t meminfo;
    2.81 -    }
    2.82 -    u;
    2.83 -} dom0_op_t;
    2.84 -
     3.1 --- a/xen/arch/i386/apic.c	Tue Mar 11 10:34:08 2003 +0000
     3.2 +++ b/xen/arch/i386/apic.c	Fri Mar 14 15:43:32 2003 +0000
     3.3 @@ -659,6 +659,13 @@ int reprogram_ac_timer(s_time_t timeout)
     3.4      s_time_t	expire;
     3.5      u64			apic_tmict;
     3.6  
     3.7 +    if (timeout  == 0) {
     3.8 +        /* XXX RN: not sure if this disables it or cause interruptto 
     3.9 +         * go off imediately */
    3.10 +        apic_tmict = 0;
    3.11 +        goto reprogram;
    3.12 +    }
    3.13 +
    3.14      now = NOW();
    3.15      expire = timeout - now;	/* value from now */
    3.16  
    3.17 @@ -680,6 +687,7 @@ int reprogram_ac_timer(s_time_t timeout)
    3.18          return 0;
    3.19      }
    3.20  
    3.21 + reprogram:
    3.22      /* programm timer */
    3.23      apic_write(APIC_TMICT, (unsigned long)apic_tmict);
    3.24  
     4.1 --- a/xen/arch/i386/irq.c	Tue Mar 11 10:34:08 2003 +0000
     4.2 +++ b/xen/arch/i386/irq.c	Fri Mar 14 15:43:32 2003 +0000
     4.3 @@ -36,6 +36,7 @@
     4.4  #include <asm/pgalloc.h>
     4.5  #include <xeno/delay.h>
     4.6  
     4.7 +#include <xeno/perfc.h>
     4.8  
     4.9  /*
    4.10   * Linux has a controller-independent x86 interrupt architecture.
    4.11 @@ -469,6 +470,11 @@ asmlinkage unsigned int do_IRQ(struct pt
    4.12      struct irqaction * action;
    4.13      unsigned int status;
    4.14  
    4.15 +    u32     cc_start, cc_end;
    4.16 +
    4.17 +    perfc_incra(irqs, cpu);
    4.18 +    rdtscl(cc_start);
    4.19 +
    4.20      spin_lock(&desc->lock);
    4.21      desc->handler->ack(irq);
    4.22      /*
    4.23 @@ -530,6 +536,9 @@ asmlinkage unsigned int do_IRQ(struct pt
    4.24      if (softirq_pending(cpu))
    4.25          do_softirq();
    4.26  
    4.27 +    rdtscl(cc_end);
    4.28 +    perfc_adda(irq_time, cpu, cc_end - cc_start);
    4.29 +
    4.30      return 1;
    4.31  }
    4.32  
     5.1 --- a/xen/common/ac_timer.c	Tue Mar 11 10:34:08 2003 +0000
     5.2 +++ b/xen/common/ac_timer.c	Fri Mar 14 15:43:32 2003 +0000
     5.3 @@ -23,9 +23,9 @@
     5.4  #include <xeno/errno.h>
     5.5  #include <xeno/sched.h>
     5.6  #include <xeno/lib.h>
     5.7 -#include <xeno/config.h>
     5.8  #include <xeno/smp.h>
     5.9 -#include <xeno/init.h>
    5.10 +
    5.11 +#include <xeno/perfc.h>
    5.12  
    5.13  #include <xeno/time.h>
    5.14  #include <xeno/ac_timer.h>
    5.15 @@ -34,20 +34,16 @@
    5.16  #include <asm/system.h>
    5.17  #include <asm/desc.h>
    5.18  
    5.19 -
    5.20 -#undef AC_TIMER_TRACE
    5.21 -#undef AC_TIMER_STATS
    5.22 -
    5.23  #ifdef AC_TIMER_TRACE
    5.24  #define TRC(_x) _x
    5.25  #else
    5.26  #define TRC(_x)
    5.27  #endif
    5.28  
    5.29 -/*
    5.30 +/*****************************************************************************
    5.31   * We pull handlers off the timer list this far in future,
    5.32   * rather than reprogramming the time hardware.
    5.33 - */
    5.34 + *****************************************************************************/
    5.35  #define TIMER_SLOP (50*1000) /* ns */
    5.36  
    5.37  /* A timer list per CPU */
    5.38 @@ -55,47 +51,35 @@ typedef struct ac_timers_st
    5.39  {
    5.40      spinlock_t lock;
    5.41      struct list_head timers;
    5.42 -    struct ac_timer *prev, *curr;
    5.43 +    s_time_t    max_diff;
    5.44  } __cacheline_aligned ac_timers_t;
    5.45  static ac_timers_t ac_timers[NR_CPUS];
    5.46  
    5.47 -#ifdef AC_TIMER_STATS
    5.48 -#define BUCKETS		1000
    5.49 -#define MAX_STATS
    5.50 -typedef struct act_stats_st
    5.51 -{
    5.52 -    u32 count;
    5.53 -    u32 times[2*(BUCKETS)];
    5.54 -} __cacheline_aligned act_stats_t;
    5.55 -static act_stats_t act_stats[NR_CPUS];
    5.56 -
    5.57 -#endif
    5.58 -
    5.59  /* local prototypes */
    5.60  static int  detach_ac_timer(struct ac_timer *timer);
    5.61 -/*static void ac_timer_debug(unsigned long);*/
    5.62 +
    5.63  
    5.64 -/*
    5.65 +/*****************************************************************************
    5.66   * add a timer.
    5.67   * return value:
    5.68   *  0: success
    5.69   *  1: failure, timer in the past or timeout value to small
    5.70   * -1: failure, timer uninitialised
    5.71   * fail
    5.72 - */
    5.73 + *****************************************************************************/
    5.74  int add_ac_timer(struct ac_timer *timer)
    5.75  {
    5.76 -    int 			 cpu = smp_processor_id();
    5.77 -    unsigned long 	 flags;
    5.78 -    s_time_t		 now;
    5.79 +    int              cpu = smp_processor_id();
    5.80 +    unsigned long    flags;
    5.81 +    s_time_t         now;
    5.82  
    5.83      /* make sure timeout value is in the future */
    5.84 -	
    5.85 +    
    5.86      now = NOW();
    5.87 -    if (timer->expires <= now) {	
    5.88 +    if (timer->expires <= now) {    
    5.89          TRC(printk("ACT[%02d] add_ac_timer:now=0x%08X%08X>expire=0x%08X%08X\n",
    5.90 -				   cpu, (u32)(now>>32), (u32)now,
    5.91 -				   (u32)(timer->expires>>32), (u32)timer->expires));
    5.92 +                   cpu, (u32)(now>>32), (u32)now,
    5.93 +                   (u32)(timer->expires>>32), (u32)timer->expires));
    5.94          return 1;
    5.95      }
    5.96      spin_lock_irqsave(&ac_timers[cpu].lock, flags);
    5.97 @@ -104,79 +88,89 @@ int add_ac_timer(struct ac_timer *timer)
    5.98       * reprogramm the timer
    5.99       */
   5.100      if (list_empty(&ac_timers[cpu].timers)) {
   5.101 -        /* Reprogramm and add to head of list */
   5.102          if (!reprogram_ac_timer(timer->expires)) {
   5.103 +            printk("ACT[%02d] add at head failed\n", cpu);
   5.104              spin_unlock_irqrestore(&ac_timers[cpu].lock, flags);
   5.105              return 1; /* failed */
   5.106          }
   5.107          list_add(&timer->timer_list, &ac_timers[cpu].timers);
   5.108      } else {
   5.109          struct list_head *pos;
   5.110 -        struct ac_timer	 *t;
   5.111 +        struct ac_timer  *t;
   5.112  
   5.113 -		list_for_each(pos, &ac_timers[cpu].timers) {
   5.114 -			t = list_entry(pos, struct ac_timer, timer_list);
   5.115 -			if (t->expires > timer->expires)
   5.116 +        list_for_each(pos, &ac_timers[cpu].timers) {
   5.117 +            t = list_entry(pos, struct ac_timer, timer_list);
   5.118 +            if (t->expires > timer->expires)
   5.119                  break;
   5.120 -		}
   5.121 -		list_add (&(timer->timer_list), pos->prev);
   5.122 +        }
   5.123 +        list_add (&(timer->timer_list), pos->prev);
   5.124  
   5.125 -		if (timer->timer_list.prev == &ac_timers[cpu].timers) {
   5.126 -			/* added at head */
   5.127 +        if (timer->timer_list.prev == &ac_timers[cpu].timers) {
   5.128 +            /* added at head */
   5.129              if (!reprogram_ac_timer(timer->expires)) {
   5.130 -				detach_ac_timer(timer);
   5.131 +                printk("ACT[%02d] add at head failed\n", cpu);
   5.132 +                detach_ac_timer(timer);
   5.133                  spin_unlock_irqrestore(&ac_timers[cpu].lock, flags);
   5.134                  return 1; /* failed */
   5.135              }
   5.136 -		}
   5.137 +        }
   5.138      }
   5.139      spin_unlock_irqrestore(&ac_timers[cpu].lock, flags);
   5.140      return 0;
   5.141  }
   5.142  
   5.143 -/*
   5.144 - * remove a timer
   5.145 +/*****************************************************************************
   5.146 + * detach a timer (no locking)
   5.147   * return values:
   5.148   *  0: success
   5.149   * -1: bogus timer
   5.150 - */
   5.151 + *****************************************************************************/
   5.152  static int detach_ac_timer(struct ac_timer *timer)
   5.153  {  
   5.154 -    TRC(int 			 cpu = smp_processor_id());
   5.155      TRC(printk("ACT  [%02d] detach(): \n", cpu));
   5.156      list_del(&timer->timer_list);
   5.157      timer->timer_list.next = NULL;
   5.158      return 0;
   5.159  }
   5.160  
   5.161 -/*
   5.162 +/*****************************************************************************
   5.163   * remove a timer
   5.164   * return values:
   5.165   *  0: success
   5.166   * -1: bogus timer
   5.167 - */
   5.168 + *****************************************************************************/
   5.169  int rem_ac_timer(struct ac_timer *timer)
   5.170  {
   5.171 -    int 		  cpu = smp_processor_id();
   5.172 +    int           cpu = smp_processor_id();
   5.173      int           res = 0;
   5.174      unsigned long flags;
   5.175  
   5.176      TRC(printk("ACT  [%02d] remove(): timo=%lld \n", cpu, timer->expires));
   5.177 +    spin_lock_irqsave(&ac_timers[cpu].lock, flags);
   5.178 +    if (timer->timer_list.next) {
   5.179 +        res = detach_ac_timer(timer);
   5.180  
   5.181 -    spin_lock_irqsave(&ac_timers[cpu].lock, flags);
   5.182 -	if (!timer->timer_list.next == NULL)
   5.183 -		res = detach_ac_timer(timer);	
   5.184 +        if (timer->timer_list.prev == &ac_timers[cpu].timers) {
   5.185 +            /* just removed the head */
   5.186 +            if (list_empty(&ac_timers[cpu].timers)) {
   5.187 +                reprogram_ac_timer((s_time_t) 0);
   5.188 +            }
   5.189 +            /* XXX should actaully reprogramm APIC to new head */
   5.190 +        }
   5.191 +    } else
   5.192 +        res = -1;
   5.193 +
   5.194      spin_unlock_irqrestore(&ac_timers[cpu].lock, flags);
   5.195  
   5.196      return res;
   5.197  }
   5.198  
   5.199 -/*
   5.200 +/*****************************************************************************
   5.201   * modify a timer, i.e., set a new timeout value
   5.202   * return value:
   5.203   *  0: sucess
   5.204   * -1: error
   5.205 - */
   5.206 + *****************************************************************************/
   5.207  int mod_ac_timer(struct ac_timer *timer, s_time_t new_time)
   5.208  {
   5.209      if (rem_ac_timer(timer) != 0)
   5.210 @@ -187,69 +181,59 @@ int mod_ac_timer(struct ac_timer *timer,
   5.211      return 0;
   5.212  }
   5.213  
   5.214 -/*
   5.215 +/*****************************************************************************
   5.216   * do_ac_timer
   5.217   * deal with timeouts and run the handlers
   5.218 - */
   5.219 + *****************************************************************************/
   5.220  void do_ac_timer(void)
   5.221  {
   5.222 -    int 			 cpu = smp_processor_id();
   5.223 -    unsigned long 	 flags;
   5.224 -    struct ac_timer	 *t;
   5.225 +    int              cpu = smp_processor_id();
   5.226 +    unsigned long    flags;
   5.227 +    struct ac_timer  *t;
   5.228 +    s_time_t diff, now = NOW();
   5.229 +    long max;
   5.230  
   5.231      spin_lock_irqsave(&ac_timers[cpu].lock, flags);
   5.232  
   5.233   do_timer_again:
   5.234  
   5.235      TRC(printk("ACT  [%02d] do(): now=%lld\n", cpu, NOW()));
   5.236 -		
   5.237 -	/* Sanity: is the timer list empty? */
   5.238 -    if ( list_empty(&ac_timers[cpu].timers) )
   5.239 -        printk("ACT[%02d] do_ac_timer(): timer irq without timer\n", cpu);
   5.240 -
   5.241 -#ifdef AC_TIMER_STATS
   5.242 -    {
   5.243 -        s32	diff;
   5.244 -        u32 i;
   5.245 -        diff = ((s32)(NOW() - t->expires)) / 1000; /* delta in us */
   5.246 -        if (diff < -BUCKETS)
   5.247 -            diff = -BUCKETS;
   5.248 -        else if (diff > BUCKETS)
   5.249 -            diff = BUCKETS;
   5.250 -        act_stats[cpu].times[diff+BUCKETS]++;
   5.251 -        act_stats[cpu].count++;
   5.252 -
   5.253 -        if (act_stats[cpu].count >= 5000) {
   5.254 -            printk("ACT Stats\n");
   5.255 -			for (i=0; i < 2*BUCKETS; i++) {
   5.256 -				if (act_stats[cpu].times[i] != 0)
   5.257 -                    printk("ACT [%02d]: %3dus: %5d\n",
   5.258 -                           cpu,i-BUCKETS, act_stats[cpu].times[i]);
   5.259 -                act_stats[cpu].times[i]=0;
   5.260 -            }
   5.261 -            act_stats[cpu].count = 0;
   5.262 -            printk("\n");
   5.263 -        }
   5.264 +        
   5.265 +    /* Sanity: is the timer list empty? */
   5.266 +    if ( list_empty(&ac_timers[cpu].timers) ) {
   5.267 +        /*
   5.268 +         * XXX RN: This shouldn't happen, but does! Two possibilities:
   5.269 +         * - Race condition between removing and reseting APIC
   5.270 +         * - setting an APIC timeout value of 0 causes an immediate
   5.271 +         *   timer interrupt to fire.
   5.272 +         * None of these should be critical!
   5.273 +         */
   5.274 +        spin_unlock_irqrestore(&ac_timers[cpu].lock, flags);
   5.275 +        return;
   5.276      }
   5.277 -#endif
   5.278  
   5.279      /* Handle all timeouts in the near future. */
   5.280      while ( !list_empty(&ac_timers[cpu].timers) )
   5.281      {
   5.282 -        t = list_entry(ac_timers[cpu].timers.next, 
   5.283 -                       struct ac_timer, timer_list);
   5.284 +        t = list_entry(ac_timers[cpu].timers.next,struct ac_timer, timer_list);
   5.285          if ( t->expires > (NOW() + TIMER_SLOP) ) break;
   5.286 +
   5.287 +        /* do some stats */
   5.288 +        diff = (now - t->expires);
   5.289 +        if (diff > 0x7fffffff) diff =  0x7fffffff; /* THIS IS BAD! */
   5.290 +        max = perfc_valuea(ac_timer_max, cpu);
   5.291 +        if (diff > max) perfc_seta(ac_timer_max, cpu, diff);
   5.292 +
   5.293          detach_ac_timer(t);
   5.294          spin_unlock_irqrestore(&ac_timers[cpu].lock, flags);
   5.295          if ( t->function != NULL ) t->function(t->data);
   5.296          spin_lock_irqsave(&ac_timers[cpu].lock, flags);
   5.297      }
   5.298 -		
   5.299 +        
   5.300      /* If list not empty then reprogram timer to new head of list */
   5.301      if ( !list_empty(&ac_timers[cpu].timers) )
   5.302      {
   5.303 -        t = list_entry(ac_timers[cpu].timers.next, 
   5.304 -                       struct ac_timer, timer_list);
   5.305 +        t = list_entry(ac_timers[cpu].timers.next,struct ac_timer, timer_list);
   5.306          if ( t->expires > 0 )
   5.307          {
   5.308              TRC(printk("ACT  [%02d] do(): reprog timo=%lld\n",cpu,t->expires));
   5.309 @@ -259,21 +243,23 @@ void do_ac_timer(void)
   5.310                  goto do_timer_again;
   5.311              }
   5.312          }
   5.313 +    } else {
   5.314 +        reprogram_ac_timer((s_time_t) 0);
   5.315      }
   5.316  
   5.317      spin_unlock_irqrestore(&ac_timers[cpu].lock, flags);
   5.318      TRC(printk("ACT  [%02d] do(): end\n", cpu));
   5.319  }
   5.320  
   5.321 -/*
   5.322 +/*****************************************************************************
   5.323   * debug dump_queue
   5.324   * arguments: queue head, name of queue
   5.325 - */
   5.326 + *****************************************************************************/
   5.327  static void dump_tqueue(struct list_head *queue, char *name)
   5.328  {
   5.329      struct list_head *list;
   5.330      int loop = 0;
   5.331 -    struct ac_timer	 *t;
   5.332 +    struct ac_timer  *t;
   5.333  
   5.334      printk ("QUEUE %s %lx   n: %lx, p: %lx\n", name,  (unsigned long)queue,
   5.335              (unsigned long) queue->next, (unsigned long) queue->prev);
   5.336 @@ -288,19 +274,21 @@ static void dump_tqueue(struct list_head
   5.337      return; 
   5.338  }
   5.339  
   5.340 -
   5.341 -static void dump_timerq(u_char key, void *dev_id, struct pt_regs *regs)
   5.342 +void dump_timerq(u_char key, void *dev_id, struct pt_regs *regs)
   5.343  {
   5.344      u_long   flags; 
   5.345      s_time_t now = NOW();
   5.346 +    int i;
   5.347  
   5.348 -    printk("Dumping ac_timer queues for cpu 0: NOW=0x%08X%08X\n",
   5.349 +    printk("Dumping ac_timer queues: NOW=0x%08X%08X\n",
   5.350             (u32)(now>>32), (u32)now); 
   5.351 -	
   5.352 -    spin_lock_irqsave(&ac_timers[0].lock, flags);
   5.353 -    dump_tqueue(&ac_timers[0].timers, "ac_time"); 
   5.354 -    spin_unlock_irqrestore(&ac_timers[0].lock, flags);
   5.355 -    printk("\n");
   5.356 +    for (i = 0; i < smp_num_cpus; i++) {
   5.357 +        printk("CPU[%02d] ", i);
   5.358 +        spin_lock_irqsave(&ac_timers[i].lock, flags);
   5.359 +        dump_tqueue(&ac_timers[i].timers, "ac_time"); 
   5.360 +        spin_unlock_irqrestore(&ac_timers[i].lock, flags);
   5.361 +        printk("\n");
   5.362 +    }
   5.363      return; 
   5.364  }
   5.365  
   5.366 @@ -316,6 +304,51 @@ void __init ac_timer_init(void)
   5.367          INIT_LIST_HEAD(&ac_timers[i].timers);
   5.368          spin_lock_init(&ac_timers[i].lock);
   5.369      }
   5.370 +}
   5.371  
   5.372 -    add_key_handler('a', dump_timerq, "dump ac_timer queues");
   5.373 -}
   5.374 +/*****************************************************************************
   5.375 + * GRAVEYARD
   5.376 + *****************************************************************************/
   5.377 +
   5.378 +#if 0
   5.379 +
   5.380 +#ifdef AC_TIMER_STATS
   5.381 +#define BUCKETS     1000
   5.382 +#define MAX_STATS
   5.383 +typedef struct act_stats_st
   5.384 +{
   5.385 +    u32 count;
   5.386 +    u32 times[2*(BUCKETS)];
   5.387 +} __cacheline_aligned act_stats_t;
   5.388 +static act_stats_t act_stats[NR_CPUS];
   5.389 +
   5.390 +#endif
   5.391 +
   5.392 +#ifdef AC_TIMER_STATS
   5.393 +    {
   5.394 +        XXX this is at the wrong place
   5.395 +        s32 diff;
   5.396 +        u32 i;
   5.397 +        diff = ((s32)(NOW() - t->expires)) / 1000; /* delta in us */
   5.398 +        if (diff < -BUCKETS)
   5.399 +            diff = -BUCKETS;
   5.400 +        else if (diff > BUCKETS)
   5.401 +            diff = BUCKETS;
   5.402 +        act_stats[cpu].times[diff+BUCKETS]++;
   5.403 +        act_stats[cpu].count++;
   5.404 +
   5.405 +        if (act_stats[cpu].count >= 5000) {
   5.406 +            printk("ACT Stats\n");
   5.407 +            for (i=0; i < 2*BUCKETS; i++) {
   5.408 +                if (act_stats[cpu].times[i] != 0)
   5.409 +                    printk("ACT [%02d]: %3dus: %5d\n",
   5.410 +                           cpu,i-BUCKETS, act_stats[cpu].times[i]);
   5.411 +                act_stats[cpu].times[i]=0;
   5.412 +            }
   5.413 +            act_stats[cpu].count = 0;
   5.414 +            printk("\n");
   5.415 +        }
   5.416 +    }
   5.417 +#endif
   5.418 +
   5.419 +#endif /* 0 */
     6.1 --- a/xen/common/keyhandler.c	Tue Mar 11 10:34:08 2003 +0000
     6.2 +++ b/xen/common/keyhandler.c	Fri Mar 14 15:43:32 2003 +0000
     6.3 @@ -19,18 +19,18 @@ void add_key_handler(u_char key, key_han
     6.4      char *str; 
     6.5  
     6.6      if(key_table[key].handler != NULL) 
     6.7 -	printk("Warning: overwriting handler for key 0x%x\n", key); 
     6.8 +    printk("Warning: overwriting handler for key 0x%x\n", key); 
     6.9  
    6.10      key_table[key].handler = handler; 
    6.11  
    6.12      str = key_table[key].desc; 
    6.13      for(i = 0; i < STR_MAX; i++) {
    6.14 -	if(*desc) 
    6.15 -	    *str++ = *desc++; 
    6.16 -	else break; 
    6.17 +    if(*desc) 
    6.18 +        *str++ = *desc++; 
    6.19 +    else break; 
    6.20      }
    6.21      if (i == STR_MAX) 
    6.22 -	key_table[key].desc[STR_MAX-1] = '\0'; 
    6.23 +    key_table[key].desc[STR_MAX-1] = '\0'; 
    6.24  
    6.25      return; 
    6.26  }
    6.27 @@ -47,10 +47,10 @@ void show_handlers(u_char key, void *dev
    6.28  
    6.29      printk("'%c' pressed -> showing installed handlers\n", key); 
    6.30      for(i=0; i < KEY_MAX; i++) 
    6.31 -	if(key_table[i].handler) 
    6.32 -	    printk(" key '%c' (ascii '%02x') => %s\n", 
    6.33 -			(i<33 || i>126)?(' '):(i),i,
    6.34 -			key_table[i].desc);
    6.35 +    if(key_table[i].handler) 
    6.36 +        printk(" key '%c' (ascii '%02x') => %s\n", 
    6.37 +            (i<33 || i>126)?(' '):(i),i,
    6.38 +            key_table[i].desc);
    6.39      return; 
    6.40  }
    6.41  
    6.42 @@ -94,36 +94,42 @@ void do_task_queues(u_char key, void *de
    6.43      p = &idle0_task;
    6.44      do {
    6.45          printk("Xen: DOM %d, CPU %d [has=%c], state = %s, "
    6.46 -	       "hyp_events = %08x\n", 
    6.47 -	       p->domain, p->processor, p->has_cpu ? 'T':'F', 
    6.48 -	       task_states[p->state], p->hyp_events); 
    6.49 -	s = p->shared_info; 
    6.50 -	if(!is_idle_task(p)) {
    6.51 -	    printk("Guest: events = %08lx, event_enable = %08lx\n", 
    6.52 -		   s->events, s->events_enable); 
    6.53 -	    printk("Notifying guest...\n"); 
    6.54 -	    set_bit(_EVENT_DEBUG, &s->events); 
    6.55 -	}
    6.56 +           "hyp_events = %08x\n", 
    6.57 +           p->domain, p->processor, p->has_cpu ? 'T':'F', 
    6.58 +           task_states[p->state], p->hyp_events); 
    6.59 +    s = p->shared_info; 
    6.60 +    if(!is_idle_task(p)) {
    6.61 +        printk("Guest: events = %08lx, event_enable = %08lx\n", 
    6.62 +           s->events, s->events_enable); 
    6.63 +        printk("Notifying guest...\n"); 
    6.64 +        set_bit(_EVENT_DEBUG, &s->events); 
    6.65 +    }
    6.66      } while ( (p = p->next_task) != &idle0_task );
    6.67  
    6.68      read_unlock_irqrestore(&tasklist_lock, flags); 
    6.69  }
    6.70  
    6.71  
    6.72 +extern void dump_timerq(u_char key, void *dev_id, struct pt_regs *regs);
    6.73 +extern void dump_runq(u_char key, void *dev_id, struct pt_regs *regs);
    6.74 +
    6.75 +
    6.76  void initialize_keytable() 
    6.77  {
    6.78      int i; 
    6.79  
    6.80      /* first initialize key handler table */
    6.81      for(i = 0; i < KEY_MAX; i++) 
    6.82 -	key_table[i].handler = (key_handler *)NULL; 
    6.83 -	
    6.84 +    key_table[i].handler = (key_handler *)NULL; 
    6.85 +    
    6.86      /* setup own handlers */
    6.87 +    add_key_handler('a', dump_timerq,    "dump ac_timer queues");
    6.88      add_key_handler('d', dump_registers, "dump registers"); 
    6.89 -    add_key_handler('h', show_handlers, "show this message");
    6.90 +    add_key_handler('h', show_handlers,  "show this message");
    6.91      add_key_handler('p', perfc_printall, "print performance counters"); 
    6.92      add_key_handler('q', do_task_queues, "dump task queues + guest state");
    6.93 -    add_key_handler('R', halt_machine, "reboot machine ungracefully"); 
    6.94 +    add_key_handler('r', dump_runq,      "dump run queue"); 
    6.95 +    add_key_handler('R', halt_machine,   "reboot machine ungracefully"); 
    6.96      
    6.97      return; 
    6.98  }
     7.1 --- a/xen/common/schedule.c	Tue Mar 11 10:34:08 2003 +0000
     7.2 +++ b/xen/common/schedule.c	Fri Mar 14 15:43:32 2003 +0000
     7.3 @@ -40,8 +40,8 @@
     7.4  #endif
     7.5  
     7.6  
     7.7 -#define MCU			(s32)MICROSECS(100)		/* Minimum unit */
     7.8 -#define CTX_ALLOW	(s32)MILLISECS(10)		/* context switch allowance */
     7.9 +#define MCU         (s32)MICROSECS(100)     /* Minimum unit */
    7.10 +static s32 ctx_allow=(s32)MILLISECS(10);    /* context switch allowance */
    7.11  
    7.12  /*****************************************************************************
    7.13   * per CPU data for the scheduler.
    7.14 @@ -50,15 +50,15 @@ typedef struct schedule_data_st
    7.15  {
    7.16      spinlock_t          lock;           /* lock for protecting this */
    7.17      struct list_head    runqueue;       /* runqueue */
    7.18 -    struct task_struct *prev, *curr;	/* dito */
    7.19 -
    7.20 -	long				svt;			/* system virtual time. per CPU??? */
    7.21 -	struct ac_timer     s_timer;		/* scheduling timer  */
    7.22 +    struct task_struct *prev, *curr;    /* previous and current task */
    7.23 +    struct task_struct *idle;           /* idle task for this cpu */
    7.24 +    u32                 svt;            /* system virtual time. per CPU??? */
    7.25 +    struct ac_timer     s_timer;        /* scheduling timer  */
    7.26  
    7.27  } __cacheline_aligned schedule_data_t;
    7.28  schedule_data_t schedule_data[NR_CPUS];
    7.29  
    7.30 -struct ac_timer     v_timer;		/* scheduling timer  */
    7.31 +struct ac_timer     v_timer;        /* scheduling timer  */
    7.32  static void virt_timer(unsigned long foo);
    7.33  
    7.34  
    7.35 @@ -68,7 +68,7 @@ static void virt_timer(unsigned long foo
    7.36  /* add a task to the head of the runqueue */
    7.37  static inline void __add_to_runqueue_head(struct task_struct * p)
    7.38  {
    7.39 -	
    7.40 +    
    7.41      list_add(&p->run_list, &schedule_data[p->processor].runqueue);
    7.42  }
    7.43  /* add a task to the tail of the runqueue */
    7.44 @@ -97,11 +97,19 @@ static inline int __task_on_runqueue(str
    7.45  ******************************************************************************/
    7.46  void sched_add_domain(struct task_struct *p) 
    7.47  {
    7.48 -    p->state    = TASK_UNINTERRUPTIBLE;
    7.49 -	/* set avt end evt to system virtual time */
    7.50 -	p->avt		= schedule_data[p->processor].svt;
    7.51 -	p->evt		= schedule_data[p->processor].svt;
    7.52 -	/* RN: XXX BVT fill in other bits */
    7.53 +    p->state       = TASK_UNINTERRUPTIBLE;
    7.54 +    p->mcu_advance = 10;
    7.55 +
    7.56 +    if (p->domain == IDLE_DOMAIN_ID) {
    7.57 +        p->avt = 0xffffffff;
    7.58 +        p->evt = 0xffffffff;
    7.59 +        schedule_data[p->processor].idle = p;
    7.60 +    } else {
    7.61 +        /* set avt end evt to system virtual time */
    7.62 +        p->avt         = schedule_data[p->processor].svt;
    7.63 +        p->evt         = schedule_data[p->processor].svt;
    7.64 +        /* RN: XXX BVT fill in other bits */
    7.65 +    }
    7.66  }
    7.67  
    7.68  void sched_rem_domain(struct task_struct *p) 
    7.69 @@ -117,16 +125,20 @@ int wake_up(struct task_struct *p)
    7.70  {
    7.71      unsigned long flags;
    7.72      int ret = 0;
    7.73 +
    7.74      spin_lock_irqsave(&schedule_data[p->processor].lock, flags);
    7.75 +
    7.76      if ( __task_on_runqueue(p) ) goto out;
    7.77 -    p->state = TASK_RUNNING;
    7.78  
    7.79 -	/* set the BVT parameters */
    7.80 -	if (p->avt < schedule_data[p->processor].svt)
    7.81 -		p->avt = schedule_data[p->processor].svt;
    7.82 -	p->evt = p->avt; /* RN: XXX BVT deal with warping here */
    7.83 -	
    7.84 +    p->state = TASK_RUNNING;
    7.85      __add_to_runqueue_head(p);
    7.86 +
    7.87 +    /* set the BVT parameters */
    7.88 +    if (p->avt < schedule_data[p->processor].svt)
    7.89 +        p->avt = schedule_data[p->processor].svt;
    7.90 +
    7.91 +    p->evt = p->avt; /* RN: XXX BVT deal with warping here */
    7.92 +
    7.93      ret = 1;
    7.94  
    7.95   out:
    7.96 @@ -134,30 +146,56 @@ int wake_up(struct task_struct *p)
    7.97      return ret;
    7.98  }
    7.99  
   7.100 -/* RN: XXX turn this into do_halt() */
   7.101  /****************************************************************************
   7.102   * Domain requested scheduling operations
   7.103   ****************************************************************************/
   7.104  long do_sched_op(void)
   7.105  {
   7.106 +    /* XXX implement proper */
   7.107      current->state = TASK_INTERRUPTIBLE;
   7.108      schedule();
   7.109      return 0;
   7.110  }
   7.111  
   7.112  /****************************************************************************
   7.113 + * Control the scheduler
   7.114 + ****************************************************************************/
   7.115 +long sched_bvtctl(unsigned long c_allow)
   7.116 +{
   7.117 +    printk("sched: bvtctl %lu\n", c_allow);
   7.118 +    ctx_allow = c_allow;
   7.119 +    return 0;
   7.120 +}
   7.121 +
   7.122 +/****************************************************************************
   7.123   * Adjust scheduling parameter for a given domain
   7.124   ****************************************************************************/
   7.125  long sched_adjdom(int dom, unsigned long mcu_adv, unsigned long warp, 
   7.126 -				 unsigned long warpl, unsigned long warpu)
   7.127 +                 unsigned long warpl, unsigned long warpu)
   7.128  {
   7.129 -	printk("sched: adjdom %02d %lu %lu %lu %lu\n",
   7.130 -		   dom, mcu_adv, warp, warpl, warpu);
   7.131 -	return 0;
   7.132 +    struct task_struct *p;
   7.133 +
   7.134 +    printk("sched: adjdom %02d %lu %lu %lu %lu\n",
   7.135 +           dom, mcu_adv, warp, warpl, warpu);
   7.136 +
   7.137 +    p = find_domain_by_id(dom);
   7.138 +    if ( p == NULL ) return -ESRCH;
   7.139 +
   7.140 +    spin_lock_irq(&schedule_data[p->processor].lock);   
   7.141 +
   7.142 +    p->mcu_advance = mcu_adv;
   7.143 +
   7.144 +    spin_unlock_irq(&schedule_data[p->processor].lock); 
   7.145 +
   7.146 +    return 0;
   7.147  }
   7.148  
   7.149  /****************************************************************************
   7.150   * cause a run through the scheduler when appropriate
   7.151 + * Appropriate is:
   7.152 + * - current task is idle task
   7.153 + * - new processes evt is lower than current one
   7.154 + * - the current task already ran for it's context switch allowance
   7.155   ****************************************************************************/
   7.156  void reschedule(struct task_struct *p)
   7.157  {
   7.158 @@ -166,16 +204,20 @@ void reschedule(struct task_struct *p)
   7.159      unsigned long flags;
   7.160  
   7.161      if (p->has_cpu)
   7.162 -		return;
   7.163 +        return;
   7.164  
   7.165      spin_lock_irqsave(&schedule_data[cpu].lock, flags);
   7.166      curr = schedule_data[cpu].curr;
   7.167 -    if (is_idle_task(curr)) {
   7.168 +
   7.169 +    if ( is_idle_task(curr) ||
   7.170 +         (p->evt < curr->evt) ||
   7.171 +         (curr->lastschd + ctx_allow >= NOW()) ) {
   7.172 +        /* reschedule */
   7.173          set_bit(_HYP_EVENT_NEED_RESCHED, &curr->hyp_events);
   7.174          spin_unlock_irqrestore(&schedule_data[cpu].lock, flags);
   7.175  #ifdef CONFIG_SMP
   7.176          if (cpu != smp_processor_id())
   7.177 -			smp_send_event_check_cpu(cpu);
   7.178 +            smp_send_event_check_cpu(cpu);
   7.179  #endif
   7.180      } else {
   7.181          spin_unlock_irqrestore(&schedule_data[cpu].lock, flags);
   7.182 @@ -194,27 +236,26 @@ asmlinkage void schedule(void)
   7.183  {
   7.184      struct task_struct *prev, *next, *next_prime, *p;
   7.185      struct list_head   *tmp;
   7.186 -    int 				this_cpu;
   7.187 -	s_time_t			now;
   7.188 -	s32					r_time;		/* time for new dom to run */
   7.189 -	s32					ranfor;	    /* assume we never run longer than 2.1s! */
   7.190 -	s32					mcus;
   7.191 -	u32					next_evt, next_prime_evt;
   7.192 +    int                 this_cpu;
   7.193 +    s_time_t            now;
   7.194 +    s32                 r_time;     /* time for new dom to run */
   7.195 +    s32                 ranfor;     /* assume we never run longer than 2.1s! */
   7.196 +    s32                 mcus;
   7.197 +    u32                 next_evt, next_prime_evt, min_avt;
   7.198  
   7.199 -	perfc_incrc(sched_run1);
   7.200 +    perfc_incrc(sched_run1);
   7.201   need_resched_back:
   7.202 -	perfc_incrc(sched_run2);
   7.203 +    perfc_incrc(sched_run2);
   7.204  
   7.205 -	now = NOW();
   7.206 -
   7.207 -	/* remove timer  */
   7.208 -	rem_ac_timer(&schedule_data[smp_processor_id()].s_timer);
   7.209 -
   7.210 +    now = NOW();
   7.211      next = NULL;
   7.212      prev = current;
   7.213      this_cpu = prev->processor;
   7.214  
   7.215 -	/*
   7.216 +    /* remove timer  */
   7.217 +    rem_ac_timer(&schedule_data[this_cpu].s_timer);
   7.218 +
   7.219 +    /*
   7.220       * deschedule the current domain
   7.221       */
   7.222  
   7.223 @@ -223,95 +264,115 @@ asmlinkage void schedule(void)
   7.224      ASSERT(!in_interrupt());
   7.225      ASSERT(__task_on_runqueue(prev));
   7.226  
   7.227 -	if (is_idle_task(prev)) 
   7.228 -		goto deschedule_done;
   7.229 +    if (is_idle_task(prev)) 
   7.230 +        goto deschedule_done;
   7.231  
   7.232 -	/* do some accounting */
   7.233 -	ranfor = (s32)(now - prev->lastschd);
   7.234 +    /* do some accounting */
   7.235 +    ranfor = (s32)(now - prev->lastschd);
   7.236      ASSERT((ranfor>0));
   7.237 -	prev->cpu_time += ranfor;
   7.238 -	
   7.239 -	/* calculate mcu and update avt */
   7.240 -	mcus = ranfor/MCU;
   7.241 -	if (ranfor % MCU) mcus ++;	/* always round up */
   7.242 -	prev->avt += mcus * prev->mcu_advance;
   7.243 -	prev->evt = prev->avt; /* RN: XXX BVT deal with warping here */
   7.244 +    prev->cpu_time += ranfor;
   7.245 +    
   7.246 +    /* calculate mcu and update avt */
   7.247 +    mcus = ranfor/MCU;
   7.248 +    if (ranfor % MCU) mcus ++;  /* always round up */
   7.249 +    prev->avt += mcus * prev->mcu_advance;
   7.250 +    prev->evt = prev->avt; /* RN: XXX BVT deal with warping here */
   7.251  
   7.252 -	/* dequeue */
   7.253 -	__del_from_runqueue(prev);
   7.254 -	switch (prev->state) {
   7.255 -	case TASK_INTERRUPTIBLE:
   7.256 -		if (signal_pending(prev)) {
   7.257 -			prev->state = TASK_RUNNING; /* but has events pending */
   7.258 -			break;
   7.259 -		}
   7.260 -	case TASK_UNINTERRUPTIBLE:
   7.261 -	case TASK_WAIT:
   7.262 -	case TASK_DYING:
   7.263 -	default:
   7.264 -		/* done if not running. Else, continue */
   7.265 -		goto deschedule_done;
   7.266 -	case TASK_RUNNING:;
   7.267 -	}
   7.268 +    /* dequeue */
   7.269 +    __del_from_runqueue(prev);
   7.270 +    switch (prev->state) {
   7.271 +    case TASK_INTERRUPTIBLE:
   7.272 +        if (signal_pending(prev)) {
   7.273 +            prev->state = TASK_RUNNING; /* but has events pending */
   7.274 +            break;
   7.275 +        }
   7.276 +    case TASK_UNINTERRUPTIBLE:
   7.277 +    case TASK_WAIT:
   7.278 +    case TASK_DYING:
   7.279 +    default:
   7.280 +        /* done if not running. Else, continue */
   7.281 +        goto deschedule_done;
   7.282 +    case TASK_RUNNING:;
   7.283 +    }
   7.284  
   7.285 -	/* requeue */
   7.286 -	__add_to_runqueue_tail(prev);
   7.287 -	
   7.288 +    /* requeue */
   7.289 +    __add_to_runqueue_tail(prev);
   7.290 +    
   7.291  
   7.292   deschedule_done:
   7.293      clear_bit(_HYP_EVENT_NEED_RESCHED, &prev->hyp_events);
   7.294  
   7.295 -	/*
   7.296 +    /*
   7.297       * Pick a new domain
   7.298       */
   7.299  
   7.300 -	/* we should at least have the idle task */
   7.301 -	ASSERT(!list_empty(&schedule_data[smp_processor_id()].runqueue));
   7.302 +    /* we should at least have the idle task */
   7.303 +    ASSERT(!list_empty(&schedule_data[this_cpu].runqueue));
   7.304  
   7.305 -	/*
   7.306 +    /*
   7.307       * scan through the run queue and pick the task with the lowest evt
   7.308       * *and* the task the second lowest evt.
   7.309 -	 * this code is O(n) but we expect n to be small.
   7.310 +     * this code is O(n) but we expect n to be small.
   7.311       */
   7.312 -	next       = NULL;
   7.313 -	next_prime = NULL;
   7.314 +    next       = schedule_data[this_cpu].idle;
   7.315 +    next_prime = NULL;
   7.316  
   7.317 -	next_evt       = 0xffffffff;
   7.318 -	next_prime_evt = 0xffffffff;
   7.319 +    next_evt       = 0xffffffff;
   7.320 +    next_prime_evt = 0xffffffff;
   7.321 +    min_avt        = 0xffffffff;    /* to calculate svt */
   7.322 +
   7.323  
   7.324 -	list_for_each(tmp, &schedule_data[smp_processor_id()].runqueue) {
   7.325 -		p = list_entry(tmp, struct task_struct, run_list);
   7.326 -		if (p->evt < next_evt) {
   7.327 -			next_prime     = next;
   7.328 -			next_prime_evt = next_evt;
   7.329 -			next = p;
   7.330 -			next_evt = p->evt;
   7.331 -		}
   7.332 -	}
   7.333 -	ASSERT(next != NULL);	/* we should have at least the idle task */
   7.334 +    list_for_each(tmp, &schedule_data[this_cpu].runqueue) {
   7.335 +        p = list_entry(tmp, struct task_struct, run_list);
   7.336 +        if (p->evt < next_evt) {
   7.337 +            next_prime     = next;
   7.338 +            next_prime_evt = next_evt;
   7.339 +            next = p;
   7.340 +            next_evt = p->evt;
   7.341 +        } else if (next_prime_evt == 0xffffffff) {
   7.342 +            next_prime_evt = p->evt;
   7.343 +            next_prime     = p;
   7.344 +        } else if (p->evt < next_prime_evt) {
   7.345 +            next_prime_evt = p->evt;
   7.346 +            next_prime     = p;
   7.347 +        }
   7.348 +        /* determine system virtual time */
   7.349 +        if (p->avt < min_avt)
   7.350 +            min_avt = p->avt;
   7.351 +    }
   7.352 +    ASSERT(next != NULL);   /* we should have at least the idle task */
   7.353  
   7.354 -	if (next == NULL || is_idle_task(next)) {
   7.355 -		next = &idle0_task;	/* to be sure */
   7.356 -		r_time = CTX_ALLOW;
   7.357 -		goto sched_done;
   7.358 -	}
   7.359 +    /* update system virtual time  */
   7.360 +    if (min_avt != 0xffffffff) schedule_data[this_cpu].svt = min_avt;
   7.361 +
   7.362 +    if (is_idle_task(next)) {
   7.363 +        r_time = ctx_allow;
   7.364 +        goto sched_done;
   7.365 +    }
   7.366  
   7.367 -	if (next_prime == NULL || is_idle_task(next_prime)) {
   7.368 -		/* we have only one runable task besides the idle task */
   7.369 -		r_time = CTX_ALLOW;		/* RN: XXX should be much larger */
   7.370 -		goto sched_done;
   7.371 -	}
   7.372 +    if (next_prime == NULL || is_idle_task(next_prime)) {
   7.373 +        /* we have only one runable task besides the idle task */
   7.374 +        r_time = 10 * ctx_allow;     /* RN: random constant */
   7.375 +        goto sched_done;
   7.376 +    }
   7.377  
   7.378 -	/*
   7.379 +    /*
   7.380       * if we are here we have two runable tasks.
   7.381 -	 * work out how long 'next' can run till its evt is greater than
   7.382 +     * work out how long 'next' can run till its evt is greater than
   7.383       * 'next_prime's evt. Taking context switch allowance into account.
   7.384       */
   7.385 -	r_time = ((next_prime->evt - next->evt)/next->mcu_advance) + CTX_ALLOW;
   7.386 +    ASSERT(next_prime->evt > next->evt);
   7.387 +    r_time = ((next_prime->evt - next->evt)/next->mcu_advance) + ctx_allow;
   7.388  
   7.389   sched_done:
   7.390 -	ASSERT(r_time != 0);
   7.391 -	ASSERT(r_time > 0);
   7.392 +    ASSERT(r_time != 0);
   7.393 +    ASSERT(r_time > ctx_allow);
   7.394 +
   7.395 +    if ( (r_time==0) || (r_time < ctx_allow)) {
   7.396 +        printk("[%02d]: %lx\n", this_cpu, r_time);
   7.397 +        dump_rqueue(&schedule_data[this_cpu].runqueue, "foo");
   7.398 +    }
   7.399 +
   7.400  
   7.401      prev->has_cpu = 0;
   7.402      next->has_cpu = 1;
   7.403 @@ -319,16 +380,16 @@ asmlinkage void schedule(void)
   7.404      schedule_data[this_cpu].prev = prev;
   7.405      schedule_data[this_cpu].curr = next;
   7.406  
   7.407 -	next->lastschd = now;
   7.408 +    next->lastschd = now;
   7.409  
   7.410 -	/* reprogramm the timer */
   7.411 +    /* reprogramm the timer */
   7.412   timer_redo:
   7.413 -	schedule_data[this_cpu].s_timer.expires  = now + r_time;
   7.414 -	if (add_ac_timer(&schedule_data[this_cpu].s_timer) == 1) {
   7.415 -		printk("SCHED: Shit this shouldn't happen\n");
   7.416 -		now = NOW();
   7.417 -		goto timer_redo;
   7.418 -	}
   7.419 +    schedule_data[this_cpu].s_timer.expires  = now + r_time;
   7.420 +    if (add_ac_timer(&schedule_data[this_cpu].s_timer) == 1) {
   7.421 +        printk("SCHED[%02d]: Shit this shouldn't happen\n", this_cpu);
   7.422 +        now = NOW();
   7.423 +        goto timer_redo;
   7.424 +    }
   7.425  
   7.426      spin_unlock_irq(&schedule_data[this_cpu].lock);
   7.427  
   7.428 @@ -339,6 +400,8 @@ asmlinkage void schedule(void)
   7.429          goto same_process;
   7.430      }
   7.431  
   7.432 +    perfc_incrc(sched_ctx);
   7.433 +
   7.434      prepare_to_switch();
   7.435      switch_to(prev, next);
   7.436      prev = schedule_data[this_cpu].prev;
   7.437 @@ -347,12 +410,12 @@ asmlinkage void schedule(void)
   7.438      if ( prev->state == TASK_DYING ) release_task(prev);
   7.439  
   7.440   same_process:
   7.441 -	/* update the domains notion of time  */
   7.442 +    /* update the domains notion of time  */
   7.443      update_dom_time(current->shared_info);
   7.444  
   7.445      if ( test_bit(_HYP_EVENT_NEED_RESCHED, &current->hyp_events) ) {
   7.446          goto need_resched_back;
   7.447 -	}
   7.448 +    }
   7.449      return;
   7.450  }
   7.451  
   7.452 @@ -361,11 +424,11 @@ asmlinkage void schedule(void)
   7.453   */
   7.454  static void sched_timer(unsigned long foo)
   7.455  {
   7.456 -    int 				cpu  = smp_processor_id();
   7.457 +    int                 cpu  = smp_processor_id();
   7.458      struct task_struct *curr = schedule_data[cpu].curr;
   7.459 -	/* cause a reschedule */
   7.460 -	set_bit(_HYP_EVENT_NEED_RESCHED, &curr->hyp_events);
   7.461 -	perfc_incrc(sched_irq);
   7.462 +    /* cause a reschedule */
   7.463 +    set_bit(_HYP_EVENT_NEED_RESCHED, &curr->hyp_events);
   7.464 +    perfc_incrc(sched_irq);
   7.465  }
   7.466  
   7.467  /*
   7.468 @@ -373,23 +436,23 @@ static void sched_timer(unsigned long fo
   7.469   */
   7.470  static void virt_timer(unsigned long foo)
   7.471  {
   7.472 -	unsigned long cpu_mask = 0;
   7.473 -	struct task_struct *p;
   7.474 -	s_time_t now;
   7.475 -	int res;
   7.476 +    unsigned long cpu_mask = 0;
   7.477 +    struct task_struct *p;
   7.478 +    s_time_t now;
   7.479 +    int res;
   7.480  
   7.481 -	/* send virtual timer interrupt */
   7.482 -	read_lock(&tasklist_lock);
   7.483 -	p = &idle0_task;
   7.484 -	do {
   7.485 -		if ( is_idle_task(p) ) continue;
   7.486 -		cpu_mask |= mark_guest_event(p, _EVENT_TIMER);
   7.487 -	}
   7.488 -	while ( (p = p->next_task) != &idle0_task );
   7.489 -	read_unlock(&tasklist_lock);
   7.490 -	guest_event_notify(cpu_mask);
   7.491 +    /* send virtual timer interrupt */
   7.492 +    read_lock(&tasklist_lock);
   7.493 +    p = &idle0_task;
   7.494 +    do {
   7.495 +        if ( is_idle_task(p) ) continue;
   7.496 +        cpu_mask |= mark_guest_event(p, _EVENT_TIMER);
   7.497 +    }
   7.498 +    while ( (p = p->next_task) != &idle0_task );
   7.499 +    read_unlock(&tasklist_lock);
   7.500 +    guest_event_notify(cpu_mask);
   7.501  
   7.502 -	again:
   7.503 +    again:
   7.504      now = NOW();
   7.505      v_timer.expires  = now + MILLISECS(10);
   7.506      res=add_ac_timer(&v_timer);
   7.507 @@ -412,14 +475,15 @@ void __init scheduler_init(void)
   7.508          spin_lock_init(&schedule_data[i].lock);
   7.509          schedule_data[i].prev = &idle0_task;
   7.510          schedule_data[i].curr = &idle0_task;
   7.511 -		
   7.512 +        
   7.513          /* a timer for each CPU  */
   7.514          init_ac_timer(&schedule_data[i].s_timer);
   7.515          schedule_data[i].s_timer.function = &sched_timer;
   7.516  
   7.517      }
   7.518 -	init_ac_timer(&v_timer);
   7.519 -	v_timer.function = &virt_timer;
   7.520 +    schedule_data[0].idle = &idle0_task; /* idle on CPU 0 is special */
   7.521 +    init_ac_timer(&v_timer);
   7.522 +    v_timer.function = &virt_timer;
   7.523  }
   7.524  
   7.525  /*
   7.526 @@ -427,46 +491,14 @@ void __init scheduler_init(void)
   7.527   * This has to be done *after* the timers, e.g., APICs, have been initialised
   7.528   */
   7.529  void schedulers_start(void) 
   7.530 -{	
   7.531 +{   
   7.532      printk("Start schedulers\n");
   7.533      __cli();
   7.534      sched_timer(0);
   7.535 -	virt_timer(0);
   7.536 +    virt_timer(0);
   7.537      smp_call_function((void *)sched_timer, NULL, 1, 1);
   7.538      __sti();
   7.539 -
   7.540 -	//add_key_handler('r', dump_run_queues, "dump run queues")
   7.541  }
   7.542 -#if 0
   7.543 -/****************************************************************************
   7.544 - * Debugging functions
   7.545 - ****************************************************************************/
   7.546 -static void dump_run_queues(u_char key, void *dev_id, struct pt_regs *regs) 
   7.547 -{
   7.548 -    u_long flags; 
   7.549 -    struct task_struct *p; 
   7.550 -    shared_info_t *s; 
   7.551 -
   7.552 -    printk("'%c' pressed -> dumping run queues\n", key); 
   7.553 -    read_lock_irqsave(&tasklist_lock, flags); 
   7.554 -    p = &idle0_task;
   7.555 -    do {
   7.556 -        printk("Xen: DOM %d, CPU %d [has=%c], state = %s, "
   7.557 -	       "hyp_events = %08x\n", 
   7.558 -	       p->domain, p->processor, p->has_cpu ? 'T':'F', 
   7.559 -	       task_states[p->state], p->hyp_events); 
   7.560 -	s = p->shared_info; 
   7.561 -	if(!is_idle_task(p)) {
   7.562 -	    printk("Guest: events = %08lx, event_enable = %08lx\n", 
   7.563 -		   s->events, s->events_enable); 
   7.564 -	    printk("Notifying guest...\n"); 
   7.565 -	    set_bit(_EVENT_DEBUG, &s->events); 
   7.566 -	}
   7.567 -    } while ( (p = p->next_task) != &idle0_task );
   7.568 -
   7.569 -    read_unlock_irqrestore(&tasklist_lock, flags); 
   7.570 -}
   7.571 -#endif
   7.572  
   7.573  
   7.574  /****************************************************************************
   7.575 @@ -533,3 +565,47 @@ long schedule_timeout(long timeout)
   7.576   out:
   7.577      return timeout < 0 ? 0 : timeout;
   7.578  }
   7.579 +
   7.580 +/****************************************************************************
   7.581 + * debug function
   7.582 + ****************************************************************************/
   7.583 +
   7.584 +static void dump_rqueue(struct list_head *queue, char *name)
   7.585 +{
   7.586 +    struct list_head *list;
   7.587 +    int loop = 0;
   7.588 +    struct task_struct  *p;
   7.589 +
   7.590 +    printk ("QUEUE %s %lx   n: %lx, p: %lx\n", name,  (unsigned long)queue,
   7.591 +            (unsigned long) queue->next, (unsigned long) queue->prev);
   7.592 +    list_for_each (list, queue) {
   7.593 +        p = list_entry(list, struct task_struct, run_list);
   7.594 +        printk("%3d: %3d has=%c mcua=0x%04X ev=0x%08X av=0x%08X c=0x%X%08X\n",
   7.595 +               loop++, p->domain,
   7.596 +               p->has_cpu ? 'T':'F',
   7.597 +               p->mcu_advance, p->evt, p->avt,
   7.598 +               (u32)(p->cpu_time>>32), (u32)p->cpu_time);
   7.599 +        printk("         l: %lx n: %lx  p: %lx\n",
   7.600 +               (unsigned long)list, (unsigned long)list->next,
   7.601 +               (unsigned long)list->prev);
   7.602 +    }
   7.603 +    return; 
   7.604 +}
   7.605 +
   7.606 +void dump_runq(u_char key, void *dev_id, struct pt_regs *regs)
   7.607 +{
   7.608 +    u_long   flags; 
   7.609 +    s_time_t now = NOW();
   7.610 +    int i;
   7.611 +
   7.612 +    printk("BVT: mcu=0x%08Xns ctx_allow=0x%08Xns NOW=0x%08X%08X\n",
   7.613 +           (u32)MCU, (u32)ctx_allow, (u32)(now>>32), (u32)now); 
   7.614 +    for (i = 0; i < smp_num_cpus; i++) {
   7.615 +        spin_lock_irqsave(&schedule_data[i].lock, flags);
   7.616 +        printk("CPU[%02d] svt=0x%08X ", i, (s32)schedule_data[i].svt);
   7.617 +        dump_rqueue(&schedule_data[i].runqueue, "rq"); 
   7.618 +        spin_unlock_irqrestore(&schedule_data[i].lock, flags);
   7.619 +    }
   7.620 +    return; 
   7.621 +}
   7.622 +
     8.1 --- a/xen/include/xeno/sched.h	Tue Mar 11 10:34:08 2003 +0000
     8.2 +++ b/xen/include/xeno/sched.h	Fri Mar 14 15:43:32 2003 +0000
     8.3 @@ -63,20 +63,20 @@ extern struct mm_struct init_mm;
     8.4  
     8.5  struct task_struct {
     8.6  
     8.7 -	/*
     8.8 +    /*
     8.9       * DO NOT CHANGE THE ORDER OF THE FOLLOWING.
    8.10       * There offsets are hardcoded in entry.S
    8.11       */
    8.12  
    8.13      int processor;               /* 00: current processor */
    8.14 -    int state;	                 /* 04: current run state */
    8.15 -	int hyp_events;              /* 08: pending events */
    8.16 +    int state;                   /* 04: current run state */
    8.17 +    int hyp_events;              /* 08: pending events */
    8.18      unsigned int domain;         /* 12: domain id */
    8.19  
    8.20      /* An unsafe pointer into a shared data area. */
    8.21      shared_info_t *shared_info;  /* 16: shared data area */
    8.22  
    8.23 -	/*
    8.24 +    /*
    8.25       * From here on things can be added and shuffled without special attention
    8.26       */
    8.27      
    8.28 @@ -84,25 +84,25 @@ struct task_struct {
    8.29      unsigned int tot_pages;     /* number of pages currently possesed */
    8.30      unsigned int max_pages;     /* max number of pages that can be possesed */
    8.31  
    8.32 -	/* scheduling */
    8.33 -    struct list_head run_list;		/* the run list  */
    8.34 -    int 			 has_cpu;
    8.35 -	int 			 policy;
    8.36 -	int 			 counter;
    8.37 +    /* scheduling */
    8.38 +    struct list_head run_list;      /* the run list  */
    8.39 +    int              has_cpu;
    8.40 +    int              policy;
    8.41 +    int              counter;
    8.42      
    8.43 -	struct ac_timer	blt;	        /* blocked timeout */
    8.44 +    struct ac_timer blt;            /* blocked timeout */
    8.45  
    8.46 -	s_time_t lastschd;		        /* time this domain was last scheduled */
    8.47 -	s_time_t cpu_time;		        /* total CPU time received till now */
    8.48 +    s_time_t lastschd;              /* time this domain was last scheduled */
    8.49 +    s_time_t cpu_time;              /* total CPU time received till now */
    8.50  
    8.51 -	long mcu_advance;		        /* inverse of weight */
    8.52 -	u32  avt;			            /* actual virtual time */
    8.53 -	u32  evt;			            /* effective virtual time */
    8.54 -	long warp;			            /* virtual time warp */
    8.55 -	long warpl;			            /* warp limit */
    8.56 -	long warpu;			            /* unwarp time requirement */
    8.57 -	long warped;		            /* time it ran warped last time */
    8.58 -	long uwarped;		            /* time it ran unwarped last time */
    8.59 +    unsigned long mcu_advance;      /* inverse of weight */
    8.60 +    s32  avt;                       /* actual virtual time */
    8.61 +    s32  evt;                       /* effective virtual time */
    8.62 +    long warp;                      /* virtual time warp */
    8.63 +    long warpl;                     /* warp limit */
    8.64 +    long warpu;                     /* unwarp time requirement */
    8.65 +    long warped;                    /* time it ran warped last time */
    8.66 +    long uwarped;                   /* time it ran unwarped last time */
    8.67  
    8.68  
    8.69      /* Network I/O */
    8.70 @@ -119,7 +119,7 @@ struct task_struct {
    8.71      segment_t *segment_list[XEN_MAX_SEGMENTS];                        /* vhd */
    8.72      int segment_count;
    8.73  
    8.74 -	/* VM */
    8.75 +    /* VM */
    8.76      struct mm_struct mm;
    8.77      /* We need this lock to check page types and frob reference counts. */
    8.78      spinlock_t page_lock;
    8.79 @@ -158,7 +158,7 @@ struct task_struct {
    8.80  #define TASK_RUNNING            0
    8.81  #define TASK_INTERRUPTIBLE      1
    8.82  #define TASK_UNINTERRUPTIBLE    2
    8.83 -#define TASK_WAIT				4
    8.84 +#define TASK_WAIT               4
    8.85  #define TASK_DYING              16
    8.86  /* #define TASK_STOPPED            8  not really used */
    8.87  
    8.88 @@ -172,8 +172,8 @@ struct task_struct {
    8.89      domain:      IDLE_DOMAIN_ID, \
    8.90      state:       TASK_RUNNING,   \
    8.91      has_cpu:     0,              \
    8.92 -    evt:         0x7fffffff,     \
    8.93 -    avt:         0x7fffffff,     \
    8.94 +    evt:         0xffffffff,     \
    8.95 +    avt:         0xffffffff,     \
    8.96      mm:          IDLE0_MM,       \
    8.97      addr_limit:  KERNEL_DS,      \
    8.98      active_mm:   &idle0_task.mm, \
    8.99 @@ -186,7 +186,7 @@ struct task_struct {
   8.100  #define is_idle_task(_p) ((_p)->domain == IDLE_DOMAIN_ID)
   8.101  
   8.102  #ifndef IDLE0_TASK_SIZE
   8.103 -#define IDLE0_TASK_SIZE	2048*sizeof(long)
   8.104 +#define IDLE0_TASK_SIZE 2048*sizeof(long)
   8.105  #endif
   8.106  
   8.107  union task_union {
   8.108 @@ -235,8 +235,9 @@ void scheduler_init(void);
   8.109  void schedulers_start(void);
   8.110  void sched_add_domain(struct task_struct *p);
   8.111  void sched_rem_domain(struct task_struct *p);
   8.112 +long sched_bvtctl(unsigned long ctx_allow);
   8.113  long sched_adjdom(int dom, unsigned long mcu_adv, unsigned long warp, 
   8.114 -				  unsigned long warpl, unsigned long warpu);
   8.115 +                  unsigned long warpl, unsigned long warpu);
   8.116  int  wake_up(struct task_struct *p);
   8.117  long schedule_timeout(long timeout);
   8.118  long do_yield(void);