ia64/xen-unstable

changeset 278:73453fd4320b

bitkeeper revision 1.105.2.1 (3e6dbacbRqvcRHd-b9HOUbf35si4iw)

sched_ops.c:
new file
dom0_ops.h:
added new commands
Makefile:
added new file
sched.h:
prototype for adjusting scheduliung parameters
perfc_defn.h:
new performance counter
perfc.h:
per CPU performance dounters
dom0_ops.h:
two more operations for scheduling
schedule.c:
bug fix and skeleton for adjust sched parameters
perfc.c:
added per CPU performance counters
dom0_ops.c:
Added call for adjusting scheduling parameters
apic.c:
added performance counter for APIC timer interrupts
sched.h:
intermediate checkin for new scheduler
schedule.c:
intermediate checkin for working timer
ac_timer.c:
simplified add_ac_timer.
author rn@wyvis.research.intel-research.net
date Tue Mar 11 10:30:35 2003 +0000 (2003-03-11)
parents 9c04a98c7932
children 314d67fab116
files .rootkeys BitKeeper/etc/logging_ok xen/arch/i386/apic.c xen/common/ac_timer.c xen/common/dom0_ops.c xen/common/perfc.c xen/common/schedule.c xen/include/xeno/dom0_ops.h xen/include/xeno/perfc.h xen/include/xeno/perfc_defn.h xen/include/xeno/sched.h xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/Makefile xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/dom0_ops.h xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/sched_ops.c
line diff
     1.1 --- a/.rootkeys	Mon Mar 03 15:43:22 2003 +0000
     1.2 +++ b/.rootkeys	Tue Mar 11 10:30:35 2003 +0000
     1.3 @@ -481,6 +481,7 @@ 3e5a4e65Cc7io-vynYob10SlqXTjAQ xenolinux
     1.4  3e5a4e65BXtftInNHUC2PjDfPhdZZA xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/dom0_core.c
     1.5  3e5a4e65uXAx05p6B1-HU2tijuw8qA xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/dom0_memory.c
     1.6  3e5a4e65EOOLlPwXnhSuX-iVdWLmnA xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/dom0_ops.h
     1.7 +3e6dba59C8o0kBks7UZ4IW_FY853Aw xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/sched_ops.c
     1.8  3e5a4e65gfn_ltB8ujHMVFApnTTNRQ xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/vfr.c
     1.9  3e5a4e65gZBRBB6RsSVg1c9iahigAw xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/network/Makefile
    1.10  3e5a4e65ZxKrbFetVB84JhrTyZ1YuQ xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/network/network.c
     2.1 --- a/BitKeeper/etc/logging_ok	Mon Mar 03 15:43:22 2003 +0000
     2.2 +++ b/BitKeeper/etc/logging_ok	Tue Mar 11 10:30:35 2003 +0000
     2.3 @@ -8,5 +8,6 @@ kaf24@plym.cl.cam.ac.uk
     2.4  kaf24@striker.cl.cam.ac.uk
     2.5  lynx@idefix.cl.cam.ac.uk
     2.6  rn@wyvis.camb.intel-research.net
     2.7 +rn@wyvis.research.intel-research.net
     2.8  smh22@boulderdash.cl.cam.ac.uk
     2.9  smh22@uridium.cl.cam.ac.uk
     3.1 --- a/xen/arch/i386/apic.c	Mon Mar 03 15:43:22 2003 +0000
     3.2 +++ b/xen/arch/i386/apic.c	Tue Mar 11 10:30:35 2003 +0000
     3.3 @@ -48,6 +48,8 @@
     3.4  
     3.5  #include <xeno/ac_timer.h>
     3.6  
     3.7 +#include <xeno/perfc.h>
     3.8 +
     3.9  #undef APIC_TIME_TRACE
    3.10  #ifdef APIC_TIME_TRACE
    3.11  #define TRC(_x) _x
    3.12 @@ -748,6 +750,7 @@ void smp_apic_timer_interrupt(struct pt_
    3.13  
    3.14  	/* call the local handler */
    3.15      irq_enter(cpu, 0);
    3.16 +	perfc_incrc(apic_timer);
    3.17      smp_local_timer_interrupt(regs);
    3.18      irq_exit(cpu, 0);
    3.19  
     4.1 --- a/xen/common/ac_timer.c	Mon Mar 03 15:43:22 2003 +0000
     4.2 +++ b/xen/common/ac_timer.c	Tue Mar 11 10:30:35 2003 +0000
     4.3 @@ -90,13 +90,12 @@ int add_ac_timer(struct ac_timer *timer)
     4.4      s_time_t		 now;
     4.5  
     4.6      /* make sure timeout value is in the future */
     4.7 +	
     4.8      now = NOW();
     4.9 -    TRC(printk("ACT  [%02d] add(): now=%lld timo=%lld\n",
    4.10 -               cpu, now, timer->expires));
    4.11      if (timer->expires <= now) {	
    4.12 -        printk("ACT[%02d] add_ac_timer: now=0x%08X%08X > expire=0x%08X%08X\n",
    4.13 -               cpu, (u32)(now>>32), (u32)now,
    4.14 -               (u32)(timer->expires>>32), (u32)timer->expires);
    4.15 +        TRC(printk("ACT[%02d] add_ac_timer:now=0x%08X%08X>expire=0x%08X%08X\n",
    4.16 +				   cpu, (u32)(now>>32), (u32)now,
    4.17 +				   (u32)(timer->expires>>32), (u32)timer->expires));
    4.18          return 1;
    4.19      }
    4.20      spin_lock_irqsave(&ac_timers[cpu].lock, flags);
    4.21 @@ -107,43 +106,29 @@ int add_ac_timer(struct ac_timer *timer)
    4.22      if (list_empty(&ac_timers[cpu].timers)) {
    4.23          /* Reprogramm and add to head of list */
    4.24          if (!reprogram_ac_timer(timer->expires)) {
    4.25 -            /* failed */
    4.26 -            printk("ACT  [%02d] add(): add at head failed\n", cpu);
    4.27              spin_unlock_irqrestore(&ac_timers[cpu].lock, flags);
    4.28 -            return 1;
    4.29 +            return 1; /* failed */
    4.30          }
    4.31          list_add(&timer->timer_list, &ac_timers[cpu].timers);
    4.32 -        TRC(printk("ACT  [%02d] add(0x%08X%08X): added at head\n", cpu,
    4.33 -                   (u32)(timer->expires>>32), (u32)timer->expires));
    4.34      } else {
    4.35          struct list_head *pos;
    4.36          struct ac_timer	 *t;
    4.37 -        for (pos = ac_timers[cpu].timers.next;
    4.38 -             pos != &ac_timers[cpu].timers;
    4.39 -             pos = pos->next) {
    4.40 -            t = list_entry(pos, struct ac_timer, timer_list);
    4.41 -            if (t->expires > timer->expires)
    4.42 +
    4.43 +		list_for_each(pos, &ac_timers[cpu].timers) {
    4.44 +			t = list_entry(pos, struct ac_timer, timer_list);
    4.45 +			if (t->expires > timer->expires)
    4.46                  break;
    4.47 -        }
    4.48 +		}
    4.49 +		list_add (&(timer->timer_list), pos->prev);
    4.50  
    4.51 -        if (pos->prev == &ac_timers[cpu].timers) {
    4.52 -            /* added to head, reprogramm timer */
    4.53 +		if (timer->timer_list.prev == &ac_timers[cpu].timers) {
    4.54 +			/* added at head */
    4.55              if (!reprogram_ac_timer(timer->expires)) {
    4.56 -                /* failed */
    4.57 -                TRC(printk("ACT  [%02d] add(): add at head failed\n", cpu));
    4.58 +				detach_ac_timer(timer);
    4.59                  spin_unlock_irqrestore(&ac_timers[cpu].lock, flags);
    4.60 -                return 1;
    4.61 +                return 1; /* failed */
    4.62              }
    4.63 -            list_add (&(timer->timer_list), pos->prev);
    4.64 -            TRC(printk("ACT  [%02d] add(0x%08X%08X): added at head\n", cpu,
    4.65 -                       (u32)(timer->expires>>32), (u32)timer->expires));
    4.66 -        } else {
    4.67 -            list_add (&(timer->timer_list), pos->prev);
    4.68 -            TRC(printk("ACT  [%02d] add(0x%08X%08X): add < exp=0x%08X%08X\n",
    4.69 -                       cpu,
    4.70 -                       (u32)(timer->expires>>32), (u32)timer->expires,
    4.71 -                       (u32)(t->expires>>32), (u32)t->expires));
    4.72 -        }
    4.73 +		}
    4.74      }
    4.75      spin_unlock_irqrestore(&ac_timers[cpu].lock, flags);
    4.76      return 0;
    4.77 @@ -173,13 +158,14 @@ static int detach_ac_timer(struct ac_tim
    4.78  int rem_ac_timer(struct ac_timer *timer)
    4.79  {
    4.80      int 		  cpu = smp_processor_id();
    4.81 -    int           res;
    4.82 +    int           res = 0;
    4.83      unsigned long flags;
    4.84  
    4.85      TRC(printk("ACT  [%02d] remove(): timo=%lld \n", cpu, timer->expires));
    4.86  
    4.87      spin_lock_irqsave(&ac_timers[cpu].lock, flags);
    4.88 -    res = detach_ac_timer(timer);	
    4.89 +	if (!timer->timer_list.next == NULL)
    4.90 +		res = detach_ac_timer(timer);	
    4.91      spin_unlock_irqrestore(&ac_timers[cpu].lock, flags);
    4.92  
    4.93      return res;
     5.1 --- a/xen/common/dom0_ops.c	Mon Mar 03 15:43:22 2003 +0000
     5.2 +++ b/xen/common/dom0_ops.c	Tue Mar 11 10:30:35 2003 +0000
     5.3 @@ -126,6 +126,26 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
     5.4      }
     5.5      break;
     5.6  
     5.7 +    case DOM0_ADJUSTDOM:
     5.8 +    {
     5.9 +        unsigned int   dom     = op.u.adjustdom.domain;
    5.10 +		unsigned long  mcu_adv = op.u.adjustdom.mcu_adv;
    5.11 +		unsigned long  warp    = op.u.adjustdom.warp;
    5.12 +		unsigned long  warpl   = op.u.adjustdom.warpl;
    5.13 +		unsigned long  warpu   = op.u.adjustdom.warpu;
    5.14 +		
    5.15 +
    5.16 +        if ( dom == IDLE_DOMAIN_ID )
    5.17 +        {
    5.18 +            ret = -EPERM;
    5.19 +        }
    5.20 +        else
    5.21 +        {
    5.22 +            ret = sched_adjdom(dom, mcu_adv, warp, warpl, warpu);
    5.23 +        }
    5.24 +    }
    5.25 +    break;
    5.26 +
    5.27      case DOM0_GETMEMLIST:
    5.28      {
    5.29          int i;
     6.1 --- a/xen/common/perfc.c	Mon Mar 03 15:43:22 2003 +0000
     6.2 +++ b/xen/common/perfc.c	Tue Mar 11 10:30:35 2003 +0000
     6.3 @@ -2,10 +2,18 @@
     6.4   * xen performance counters
     6.5   */
     6.6  
     6.7 +#include <xeno/smp.h>
     6.8 +
     6.9  #include <xeno/perfc.h>
    6.10  #include <xeno/keyhandler.h> 
    6.11  
    6.12 +/* used for different purposes in perfc.h and here */
    6.13 +#undef PERFCOUNTER
    6.14 +#undef PERFCOUNTER_CPU
    6.15 +#undef PERFCOUNTER_ARRAY
    6.16 +
    6.17  #define PERFCOUNTER( var, name ) "[0]"name"\0",
    6.18 +#define PERFCOUNTER_CPU( var, name )  "C"name"\0",
    6.19  #define PERFCOUNTER_ARRAY( var, name, size )  "["#size"]"name"\0",
    6.20  
    6.21  char* perfc_name[] = {
    6.22 @@ -19,63 +27,78 @@ void __perfc_print (unsigned long counte
    6.23    int loop;
    6.24    int total_size = 0;
    6.25    int element_size = 0;
    6.26 +  int cpus = 0;
    6.27    int num = 0;
    6.28  
    6.29 -  for (loop = 0; loop < sizeof(perfc_name) / sizeof(char *); loop++)
    6.30 -  {
    6.31 -    num = sscanf (perfc_name[loop], "[%d]", &element_size);
    6.32 -    total_size += element_size == 0 ? 1 : element_size;
    6.33 -    if (total_size > offset) break;
    6.34 +  for (loop = 0; loop < sizeof(perfc_name) / sizeof(char *); loop++) {
    6.35 +	  if (perfc_name[loop][0] == 'C') {
    6.36 +		  element_size = NR_CPUS;
    6.37 +		  cpus = 1;
    6.38 +	  } else {
    6.39 +		  num = sscanf (perfc_name[loop], "[%d]", &element_size);
    6.40 +	  }
    6.41 +
    6.42 +	  total_size += element_size == 0 ? 1 : element_size;
    6.43 +	  if (total_size > offset) break;
    6.44    }
    6.45 -  if (loop == sizeof(perfc_name) / sizeof(char *))
    6.46 -  {
    6.47 -    printf ("error: couldn't find variable\n"); 
    6.48 -    return;
    6.49 +  if (loop == sizeof(perfc_name) / sizeof(char *)) {
    6.50 +	  printf ("error: couldn't find variable\n"); 
    6.51 +	  return;
    6.52    }
    6.53 -  if (element_size == 0)                                   /* single counter */
    6.54 -  {
    6.55 -    printf ("%10ld  0x%08lx  %s\n", counter[0], counter[0],
    6.56 -	    perfc_name[loop] + 2 + num);
    6.57 -  }
    6.58 -  else                                                  /* show entire array */
    6.59 -  {
    6.60 -    for (loop = 0; loop < element_size; loop++)
    6.61 -    {
    6.62 -      printf ("%10ld  0x%08lx  %s:%d\n", 
    6.63 -	      counter[loop], counter[loop], 
    6.64 -	      perfc_name[loop] + 2 + num, loop);
    6.65 -    }
    6.66 +  if (element_size == 0) {                              /* single counter */
    6.67 +	  printf ("%10ld  0x%08lx  %s\n", counter[0], counter[0],
    6.68 +			  perfc_name[loop] + 2 + num);
    6.69 +  } else if (cpus) {									/* counter per CPU  */
    6.70 +	  for (loop = 0; loop < smp_num_cpus; loop++) {
    6.71 +		  printf ("%10ld  0x%08lx  cpu[%02d] %s\n", 
    6.72 +				  counter[loop], counter[loop], 
    6.73 +				  loop, perfc_name[loop]);
    6.74 +	  }
    6.75 +	  
    6.76 +  } else {                                             /* show entire array */
    6.77 +	  for (loop = 0; loop < element_size; loop++) {
    6.78 +		  printf ("%10ld  0x%08lx  %s:%d\n", 
    6.79 +				  counter[loop], counter[loop], 
    6.80 +				  perfc_name[loop] + 2 + num, loop);
    6.81 +	  }
    6.82    }
    6.83    return;
    6.84  }
    6.85  
    6.86  void perfc_printall (u_char key, void *dev_id, struct pt_regs *regs)
    6.87  {
    6.88 -  int loop, idx;
    6.89 -  int element_size;
    6.90 -  int num;
    6.91 -  unsigned long *counters = (unsigned long *)&perfcounters;
    6.92 +	int loop, idx;
    6.93 +	int element_size;
    6.94 +	int cpus=0;
    6.95 +	int num = 0;
    6.96 +	unsigned long *counters = (unsigned long *)&perfcounters;
    6.97 +
    6.98 +	printf ("xen performance counters\n");
    6.99  
   6.100 -  printf ("xen performance counters\n");
   6.101 -  for (loop = 0; loop < sizeof(perfc_name) / sizeof(char *); loop++)
   6.102 -  {
   6.103 -    num = sscanf (perfc_name[loop], "[%d]", &element_size);
   6.104 +	for (loop = 0; loop < sizeof(perfc_name) / sizeof(char *); loop++) {
   6.105 +
   6.106 +		if (perfc_name[loop][0] == 'C') {
   6.107 +			element_size = NR_CPUS;
   6.108 +			cpus = 1;
   6.109 +		} else {
   6.110 +			num = sscanf (perfc_name[loop], "[%d]", &element_size);
   6.111 +		}
   6.112      
   6.113 -    for (idx = 0; idx < (element_size ? element_size : 1); idx++)
   6.114 -    {
   6.115 -      if (element_size)
   6.116 -      {
   6.117 -	printf ("%10ld  0x%08lx  %s:%d\n", 
   6.118 -		*counters, *counters, perfc_name[loop] + num + 2, idx);
   6.119 -      }
   6.120 -      else
   6.121 -      {
   6.122 -	printf ("%10ld  0x%08lx  %s\n", 
   6.123 -		*counters, *counters, perfc_name[loop] + num + 2);
   6.124 -      }
   6.125 -      counters++;
   6.126 -    }
   6.127 -  }
   6.128 +		for (idx = 0; idx < (element_size ? element_size : 1); idx++) {
   6.129 +			if (cpus) {
   6.130 +				if (idx < smp_num_cpus)
   6.131 +					printf ("%10ld  0x%08lx  cpu[%02d] %s\n", 
   6.132 +							*counters, *counters, idx, perfc_name[loop] + 1);
   6.133 +			} else if (element_size) {
   6.134 +				printf ("%10ld  0x%08lx  %s:%d\n", 
   6.135 +						*counters, *counters, perfc_name[loop] + num + 2, idx);
   6.136 +			} else {
   6.137 +				printf ("%10ld  0x%08lx  %s\n", 
   6.138 +						*counters, *counters, perfc_name[loop] + num + 2);
   6.139 +			}
   6.140 +			counters++;
   6.141 +		}
   6.142 +	}
   6.143  
   6.144 -  return;
   6.145 +	return;
   6.146  }
     7.1 --- a/xen/common/schedule.c	Mon Mar 03 15:43:22 2003 +0000
     7.2 +++ b/xen/common/schedule.c	Tue Mar 11 10:30:35 2003 +0000
     7.3 @@ -11,7 +11,8 @@
     7.4   * 
     7.5   * Environment: Xen Hypervisor
     7.6   * Description: CPU scheduling
     7.7 - *				partially moved from domain.c
     7.8 + *              implements A Borrowed Virtual Time scheduler.
     7.9 + *              (see Duda & Cheriton SOSP'99)
    7.10   *
    7.11   ****************************************************************************
    7.12   * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $
    7.13 @@ -28,6 +29,9 @@
    7.14  #include <xeno/ac_timer.h>
    7.15  #include <xeno/interrupt.h>
    7.16  
    7.17 +#include <xeno/perfc.h>
    7.18 +
    7.19 +
    7.20  #undef SCHEDULER_TRACE
    7.21  #ifdef SCHEDULER_TRACE
    7.22  #define TRC(_x) _x
    7.23 @@ -35,72 +39,80 @@
    7.24  #define TRC(_x)
    7.25  #endif
    7.26  
    7.27 -/*
    7.28 +
    7.29 +#define MCU			(s32)MICROSECS(100)		/* Minimum unit */
    7.30 +#define CTX_ALLOW	(s32)MILLISECS(10)		/* context switch allowance */
    7.31 +
    7.32 +/*****************************************************************************
    7.33   * per CPU data for the scheduler.
    7.34 - */
    7.35 + *****************************************************************************/
    7.36  typedef struct schedule_data_st
    7.37  {
    7.38 -    spinlock_t lock;
    7.39 -    struct list_head runqueue;
    7.40 -    struct task_struct *prev, *curr;
    7.41 +    spinlock_t          lock;           /* lock for protecting this */
    7.42 +    struct list_head    runqueue;       /* runqueue */
    7.43 +    struct task_struct *prev, *curr;	/* dito */
    7.44 +
    7.45 +	long				svt;			/* system virtual time. per CPU??? */
    7.46 +	struct ac_timer     s_timer;		/* scheduling timer  */
    7.47 +
    7.48  } __cacheline_aligned schedule_data_t;
    7.49  schedule_data_t schedule_data[NR_CPUS];
    7.50  
    7.51 -static __cacheline_aligned struct ac_timer s_timer[NR_CPUS];
    7.52 +struct ac_timer     v_timer;		/* scheduling timer  */
    7.53 +static void virt_timer(unsigned long foo);
    7.54  
    7.55 -/*
    7.56 +
    7.57 +/*****************************************************************************
    7.58   * Some convenience functions
    7.59 - */
    7.60 -
    7.61 -static inline void __add_to_runqueue(struct task_struct * p)
    7.62 + *****************************************************************************/
    7.63 +/* add a task to the head of the runqueue */
    7.64 +static inline void __add_to_runqueue_head(struct task_struct * p)
    7.65  {
    7.66 +	
    7.67      list_add(&p->run_list, &schedule_data[p->processor].runqueue);
    7.68  }
    7.69 -
    7.70 -static inline void __move_last_runqueue(struct task_struct * p)
    7.71 +/* add a task to the tail of the runqueue */
    7.72 +static inline void __add_to_runqueue_tail(struct task_struct * p)
    7.73  {
    7.74 -    list_del(&p->run_list);
    7.75      list_add_tail(&p->run_list, &schedule_data[p->processor].runqueue);
    7.76  }
    7.77  
    7.78 -static inline void __move_first_runqueue(struct task_struct * p)
    7.79 -{
    7.80 -    list_del(&p->run_list);
    7.81 -    list_add(&p->run_list, &schedule_data[p->processor].runqueue);
    7.82 -}
    7.83 -
    7.84 +/* remove a task from runqueue  */
    7.85  static inline void __del_from_runqueue(struct task_struct * p)
    7.86  {
    7.87      list_del(&p->run_list);
    7.88      p->run_list.next = NULL;
    7.89  }
    7.90 -
    7.91 +/* is task on run queue?  */
    7.92  static inline int __task_on_runqueue(struct task_struct *p)
    7.93  {
    7.94      return (p->run_list.next != NULL);
    7.95  }
    7.96  
    7.97 +#define next_domain(p) \\
    7.98 +        list_entry((p)->run_list.next, struct task_struct, run_list)
    7.99  
   7.100 -/*
   7.101 - * Add a new domain to the scheduler
   7.102 - */
   7.103 +/******************************************************************************
   7.104 +* Add and remove a domain
   7.105 +******************************************************************************/
   7.106  void sched_add_domain(struct task_struct *p) 
   7.107  {
   7.108 -    p->state      = TASK_UNINTERRUPTIBLE;
   7.109 +    p->state    = TASK_UNINTERRUPTIBLE;
   7.110 +	/* set avt end evt to system virtual time */
   7.111 +	p->avt		= schedule_data[p->processor].svt;
   7.112 +	p->evt		= schedule_data[p->processor].svt;
   7.113 +	/* RN: XXX BVT fill in other bits */
   7.114  }
   7.115  
   7.116 -/*
   7.117 - * Remove domain to the scheduler
   7.118 - */
   7.119  void sched_rem_domain(struct task_struct *p) 
   7.120  {
   7.121      p->state = TASK_DYING;
   7.122  }
   7.123  
   7.124  
   7.125 -/*
   7.126 +/****************************************************************************
   7.127   * wake up a domain which had been sleeping
   7.128 - */
   7.129 + ****************************************************************************/
   7.130  int wake_up(struct task_struct *p)
   7.131  {
   7.132      unsigned long flags;
   7.133 @@ -108,7 +120,13 @@ int wake_up(struct task_struct *p)
   7.134      spin_lock_irqsave(&schedule_data[p->processor].lock, flags);
   7.135      if ( __task_on_runqueue(p) ) goto out;
   7.136      p->state = TASK_RUNNING;
   7.137 -    __add_to_runqueue(p);
   7.138 +
   7.139 +	/* set the BVT parameters */
   7.140 +	if (p->avt < schedule_data[p->processor].svt)
   7.141 +		p->avt = schedule_data[p->processor].svt;
   7.142 +	p->evt = p->avt; /* RN: XXX BVT deal with warping here */
   7.143 +	
   7.144 +    __add_to_runqueue_head(p);
   7.145      ret = 1;
   7.146  
   7.147   out:
   7.148 @@ -116,6 +134,349 @@ int wake_up(struct task_struct *p)
   7.149      return ret;
   7.150  }
   7.151  
   7.152 +/* RN: XXX turn this into do_halt() */
   7.153 +/****************************************************************************
   7.154 + * Domain requested scheduling operations
   7.155 + ****************************************************************************/
   7.156 +long do_sched_op(void)
   7.157 +{
   7.158 +    current->state = TASK_INTERRUPTIBLE;
   7.159 +    schedule();
   7.160 +    return 0;
   7.161 +}
   7.162 +
   7.163 +/****************************************************************************
   7.164 + * Adjust scheduling parameter for a given domain
   7.165 + ****************************************************************************/
   7.166 +long sched_adjdom(int dom, unsigned long mcu_adv, unsigned long warp, 
   7.167 +				 unsigned long warpl, unsigned long warpu)
   7.168 +{
   7.169 +	printk("sched: adjdom %02d %lu %lu %lu %lu\n",
   7.170 +		   dom, mcu_adv, warp, warpl, warpu);
   7.171 +	return 0;
   7.172 +}
   7.173 +
   7.174 +/****************************************************************************
   7.175 + * cause a run through the scheduler when appropriate
   7.176 + ****************************************************************************/
   7.177 +void reschedule(struct task_struct *p)
   7.178 +{
   7.179 +    int cpu = p->processor;
   7.180 +    struct task_struct *curr;
   7.181 +    unsigned long flags;
   7.182 +
   7.183 +    if (p->has_cpu)
   7.184 +		return;
   7.185 +
   7.186 +    spin_lock_irqsave(&schedule_data[cpu].lock, flags);
   7.187 +    curr = schedule_data[cpu].curr;
   7.188 +    if (is_idle_task(curr)) {
   7.189 +        set_bit(_HYP_EVENT_NEED_RESCHED, &curr->hyp_events);
   7.190 +        spin_unlock_irqrestore(&schedule_data[cpu].lock, flags);
   7.191 +#ifdef CONFIG_SMP
   7.192 +        if (cpu != smp_processor_id())
   7.193 +			smp_send_event_check_cpu(cpu);
   7.194 +#endif
   7.195 +    } else {
   7.196 +        spin_unlock_irqrestore(&schedule_data[cpu].lock, flags);
   7.197 +    }
   7.198 +}
   7.199 +
   7.200 +
   7.201 +/**************************************************************************** 
   7.202 + * The main function
   7.203 + * - deschedule the current domain.
   7.204 + * - pick a new domain.
   7.205 + *   i.e., the domain with lowest EVT.
   7.206 + *   The runqueue should be ordered by EVT so that is easy.
   7.207 + ****************************************************************************/
   7.208 +asmlinkage void schedule(void)
   7.209 +{
   7.210 +    struct task_struct *prev, *next, *next_prime, *p;
   7.211 +    struct list_head   *tmp;
   7.212 +    int 				this_cpu;
   7.213 +	s_time_t			now;
   7.214 +	s32					r_time;		/* time for new dom to run */
   7.215 +	s32					ranfor;	    /* assume we never run longer than 2.1s! */
   7.216 +	s32					mcus;
   7.217 +	u32					next_evt, next_prime_evt;
   7.218 +
   7.219 +	perfc_incrc(sched_run1);
   7.220 + need_resched_back:
   7.221 +	perfc_incrc(sched_run2);
   7.222 +
   7.223 +	now = NOW();
   7.224 +
   7.225 +	/* remove timer  */
   7.226 +	rem_ac_timer(&schedule_data[smp_processor_id()].s_timer);
   7.227 +
   7.228 +    next = NULL;
   7.229 +    prev = current;
   7.230 +    this_cpu = prev->processor;
   7.231 +
   7.232 +	/*
   7.233 +     * deschedule the current domain
   7.234 +     */
   7.235 +
   7.236 +    spin_lock_irq(&schedule_data[this_cpu].lock);
   7.237 +
   7.238 +    ASSERT(!in_interrupt());
   7.239 +    ASSERT(__task_on_runqueue(prev));
   7.240 +
   7.241 +	if (is_idle_task(prev)) 
   7.242 +		goto deschedule_done;
   7.243 +
   7.244 +	/* do some accounting */
   7.245 +	ranfor = (s32)(now - prev->lastschd);
   7.246 +    ASSERT((ranfor>0));
   7.247 +	prev->cpu_time += ranfor;
   7.248 +	
   7.249 +	/* calculate mcu and update avt */
   7.250 +	mcus = ranfor/MCU;
   7.251 +	if (ranfor % MCU) mcus ++;	/* always round up */
   7.252 +	prev->avt += mcus * prev->mcu_advance;
   7.253 +	prev->evt = prev->avt; /* RN: XXX BVT deal with warping here */
   7.254 +
   7.255 +	/* dequeue */
   7.256 +	__del_from_runqueue(prev);
   7.257 +	switch (prev->state) {
   7.258 +	case TASK_INTERRUPTIBLE:
   7.259 +		if (signal_pending(prev)) {
   7.260 +			prev->state = TASK_RUNNING; /* but has events pending */
   7.261 +			break;
   7.262 +		}
   7.263 +	case TASK_UNINTERRUPTIBLE:
   7.264 +	case TASK_WAIT:
   7.265 +	case TASK_DYING:
   7.266 +	default:
   7.267 +		/* done if not running. Else, continue */
   7.268 +		goto deschedule_done;
   7.269 +	case TASK_RUNNING:;
   7.270 +	}
   7.271 +
   7.272 +	/* requeue */
   7.273 +	__add_to_runqueue_tail(prev);
   7.274 +	
   7.275 +
   7.276 + deschedule_done:
   7.277 +    clear_bit(_HYP_EVENT_NEED_RESCHED, &prev->hyp_events);
   7.278 +
   7.279 +	/*
   7.280 +     * Pick a new domain
   7.281 +     */
   7.282 +
   7.283 +	/* we should at least have the idle task */
   7.284 +	ASSERT(!list_empty(&schedule_data[smp_processor_id()].runqueue));
   7.285 +
   7.286 +	/*
   7.287 +     * scan through the run queue and pick the task with the lowest evt
   7.288 +     * *and* the task the second lowest evt.
   7.289 +	 * this code is O(n) but we expect n to be small.
   7.290 +     */
   7.291 +	next       = NULL;
   7.292 +	next_prime = NULL;
   7.293 +
   7.294 +	next_evt       = 0xffffffff;
   7.295 +	next_prime_evt = 0xffffffff;
   7.296 +
   7.297 +	list_for_each(tmp, &schedule_data[smp_processor_id()].runqueue) {
   7.298 +		p = list_entry(tmp, struct task_struct, run_list);
   7.299 +		if (p->evt < next_evt) {
   7.300 +			next_prime     = next;
   7.301 +			next_prime_evt = next_evt;
   7.302 +			next = p;
   7.303 +			next_evt = p->evt;
   7.304 +		}
   7.305 +	}
   7.306 +	ASSERT(next != NULL);	/* we should have at least the idle task */
   7.307 +
   7.308 +	if (next == NULL || is_idle_task(next)) {
   7.309 +		next = &idle0_task;	/* to be sure */
   7.310 +		r_time = CTX_ALLOW;
   7.311 +		goto sched_done;
   7.312 +	}
   7.313 +
   7.314 +	if (next_prime == NULL || is_idle_task(next_prime)) {
   7.315 +		/* we have only one runable task besides the idle task */
   7.316 +		r_time = CTX_ALLOW;		/* RN: XXX should be much larger */
   7.317 +		goto sched_done;
   7.318 +	}
   7.319 +
   7.320 +	/*
   7.321 +     * if we are here we have two runable tasks.
   7.322 +	 * work out how long 'next' can run till its evt is greater than
   7.323 +     * 'next_prime's evt. Taking context switch allowance into account.
   7.324 +     */
   7.325 +	r_time = ((next_prime->evt - next->evt)/next->mcu_advance) + CTX_ALLOW;
   7.326 +
   7.327 + sched_done:
   7.328 +	ASSERT(r_time != 0);
   7.329 +	ASSERT(r_time > 0);
   7.330 +
   7.331 +    prev->has_cpu = 0;
   7.332 +    next->has_cpu = 1;
   7.333 +
   7.334 +    schedule_data[this_cpu].prev = prev;
   7.335 +    schedule_data[this_cpu].curr = next;
   7.336 +
   7.337 +	next->lastschd = now;
   7.338 +
   7.339 +	/* reprogramm the timer */
   7.340 + timer_redo:
   7.341 +	schedule_data[this_cpu].s_timer.expires  = now + r_time;
   7.342 +	if (add_ac_timer(&schedule_data[this_cpu].s_timer) == 1) {
   7.343 +		printk("SCHED: Shit this shouldn't happen\n");
   7.344 +		now = NOW();
   7.345 +		goto timer_redo;
   7.346 +	}
   7.347 +
   7.348 +    spin_unlock_irq(&schedule_data[this_cpu].lock);
   7.349 +
   7.350 +    if ( unlikely(prev == next) )
   7.351 +    {
   7.352 +        /* We won't go through the normal tail, so do this by hand */
   7.353 +        prev->policy &= ~SCHED_YIELD;
   7.354 +        goto same_process;
   7.355 +    }
   7.356 +
   7.357 +    prepare_to_switch();
   7.358 +    switch_to(prev, next);
   7.359 +    prev = schedule_data[this_cpu].prev;
   7.360 +    
   7.361 +    prev->policy &= ~SCHED_YIELD;
   7.362 +    if ( prev->state == TASK_DYING ) release_task(prev);
   7.363 +
   7.364 + same_process:
   7.365 +	/* update the domains notion of time  */
   7.366 +    update_dom_time(current->shared_info);
   7.367 +
   7.368 +    if ( test_bit(_HYP_EVENT_NEED_RESCHED, &current->hyp_events) ) {
   7.369 +        goto need_resched_back;
   7.370 +	}
   7.371 +    return;
   7.372 +}
   7.373 +
   7.374 +/*
   7.375 + * The scheduler timer.
   7.376 + */
   7.377 +static void sched_timer(unsigned long foo)
   7.378 +{
   7.379 +    int 				cpu  = smp_processor_id();
   7.380 +    struct task_struct *curr = schedule_data[cpu].curr;
   7.381 +	/* cause a reschedule */
   7.382 +	set_bit(_HYP_EVENT_NEED_RESCHED, &curr->hyp_events);
   7.383 +	perfc_incrc(sched_irq);
   7.384 +}
   7.385 +
   7.386 +/*
   7.387 + * The Domain virtual time timer
   7.388 + */
   7.389 +static void virt_timer(unsigned long foo)
   7.390 +{
   7.391 +	unsigned long cpu_mask = 0;
   7.392 +	struct task_struct *p;
   7.393 +	s_time_t now;
   7.394 +	int res;
   7.395 +
   7.396 +	/* send virtual timer interrupt */
   7.397 +	read_lock(&tasklist_lock);
   7.398 +	p = &idle0_task;
   7.399 +	do {
   7.400 +		if ( is_idle_task(p) ) continue;
   7.401 +		cpu_mask |= mark_guest_event(p, _EVENT_TIMER);
   7.402 +	}
   7.403 +	while ( (p = p->next_task) != &idle0_task );
   7.404 +	read_unlock(&tasklist_lock);
   7.405 +	guest_event_notify(cpu_mask);
   7.406 +
   7.407 +	again:
   7.408 +    now = NOW();
   7.409 +    v_timer.expires  = now + MILLISECS(10);
   7.410 +    res=add_ac_timer(&v_timer);
   7.411 +    if (res==1)
   7.412 +        goto again;
   7.413 +}
   7.414 +
   7.415 +/*
   7.416 + * Initialise the data structures
   7.417 + */
   7.418 +void __init scheduler_init(void)
   7.419 +{
   7.420 +    int i;
   7.421 +
   7.422 +    printk("Initialising schedulers\n");
   7.423 +
   7.424 +    for ( i = 0; i < NR_CPUS; i++ )
   7.425 +    {
   7.426 +        INIT_LIST_HEAD(&schedule_data[i].runqueue);
   7.427 +        spin_lock_init(&schedule_data[i].lock);
   7.428 +        schedule_data[i].prev = &idle0_task;
   7.429 +        schedule_data[i].curr = &idle0_task;
   7.430 +		
   7.431 +        /* a timer for each CPU  */
   7.432 +        init_ac_timer(&schedule_data[i].s_timer);
   7.433 +        schedule_data[i].s_timer.function = &sched_timer;
   7.434 +
   7.435 +    }
   7.436 +	init_ac_timer(&v_timer);
   7.437 +	v_timer.function = &virt_timer;
   7.438 +}
   7.439 +
   7.440 +/*
   7.441 + * Start a scheduler for each CPU
   7.442 + * This has to be done *after* the timers, e.g., APICs, have been initialised
   7.443 + */
   7.444 +void schedulers_start(void) 
   7.445 +{	
   7.446 +    printk("Start schedulers\n");
   7.447 +    __cli();
   7.448 +    sched_timer(0);
   7.449 +	virt_timer(0);
   7.450 +    smp_call_function((void *)sched_timer, NULL, 1, 1);
   7.451 +    __sti();
   7.452 +
   7.453 +	//add_key_handler('r', dump_run_queues, "dump run queues")
   7.454 +}
   7.455 +#if 0
   7.456 +/****************************************************************************
   7.457 + * Debugging functions
   7.458 + ****************************************************************************/
   7.459 +static void dump_run_queues(u_char key, void *dev_id, struct pt_regs *regs) 
   7.460 +{
   7.461 +    u_long flags; 
   7.462 +    struct task_struct *p; 
   7.463 +    shared_info_t *s; 
   7.464 +
   7.465 +    printk("'%c' pressed -> dumping run queues\n", key); 
   7.466 +    read_lock_irqsave(&tasklist_lock, flags); 
   7.467 +    p = &idle0_task;
   7.468 +    do {
   7.469 +        printk("Xen: DOM %d, CPU %d [has=%c], state = %s, "
   7.470 +	       "hyp_events = %08x\n", 
   7.471 +	       p->domain, p->processor, p->has_cpu ? 'T':'F', 
   7.472 +	       task_states[p->state], p->hyp_events); 
   7.473 +	s = p->shared_info; 
   7.474 +	if(!is_idle_task(p)) {
   7.475 +	    printk("Guest: events = %08lx, event_enable = %08lx\n", 
   7.476 +		   s->events, s->events_enable); 
   7.477 +	    printk("Notifying guest...\n"); 
   7.478 +	    set_bit(_EVENT_DEBUG, &s->events); 
   7.479 +	}
   7.480 +    } while ( (p = p->next_task) != &idle0_task );
   7.481 +
   7.482 +    read_unlock_irqrestore(&tasklist_lock, flags); 
   7.483 +}
   7.484 +#endif
   7.485 +
   7.486 +
   7.487 +/****************************************************************************
   7.488 + * Functions for legacy support. 
   7.489 + * Schedule timeout is used at a number of places and is a bit meaningless 
   7.490 + * in the context of Xen, as Domains are not able to call these and all 
   7.491 + * there entry points into Xen should be asynchronous. If a domain wishes
   7.492 + * to block for a while it should use Xen's sched_op entry point.
   7.493 + ****************************************************************************/
   7.494 +
   7.495  static void process_timeout(unsigned long __data)
   7.496  {
   7.497      struct task_struct * p = (struct task_struct *) __data;
   7.498 @@ -172,200 +533,3 @@ long schedule_timeout(long timeout)
   7.499   out:
   7.500      return timeout < 0 ? 0 : timeout;
   7.501  }
   7.502 -
   7.503 -/* RN: XXX turn this into do_halt() */
   7.504 -/*
   7.505 - * yield the current process
   7.506 - */
   7.507 -long do_sched_op(void)
   7.508 -{
   7.509 -    current->state = TASK_INTERRUPTIBLE;
   7.510 -    schedule();
   7.511 -    return 0;
   7.512 -}
   7.513 -
   7.514 -
   7.515 -void reschedule(struct task_struct *p)
   7.516 -{
   7.517 -    int cpu = p->processor;
   7.518 -    struct task_struct *curr;
   7.519 -    unsigned long flags;
   7.520 -
   7.521 -    if (p->has_cpu)
   7.522 -		return;
   7.523 -
   7.524 -    spin_lock_irqsave(&schedule_data[cpu].lock, flags);
   7.525 -    curr = schedule_data[cpu].curr;
   7.526 -    if (is_idle_task(curr)) {
   7.527 -        set_bit(_HYP_EVENT_NEED_RESCHED, &curr->hyp_events);
   7.528 -        spin_unlock_irqrestore(&schedule_data[cpu].lock, flags);
   7.529 -#ifdef CONFIG_SMP
   7.530 -        if (cpu != smp_processor_id())
   7.531 -			smp_send_event_check_cpu(cpu);
   7.532 -#endif
   7.533 -    } else {
   7.534 -        spin_unlock_irqrestore(&schedule_data[cpu].lock, flags);
   7.535 -    }
   7.536 -}
   7.537 -
   7.538 -
   7.539 -/*
   7.540 - * Pick the next domain to run
   7.541 - */
   7.542 -
   7.543 -asmlinkage void schedule(void)
   7.544 -{
   7.545 -    struct task_struct *prev, *next, *p;
   7.546 -    struct list_head *tmp;
   7.547 -    int this_cpu;
   7.548 -
   7.549 - need_resched_back:
   7.550 -    prev = current;
   7.551 -    this_cpu = prev->processor;
   7.552 -
   7.553 -    spin_lock_irq(&schedule_data[this_cpu].lock);
   7.554 -
   7.555 -    ASSERT(!in_interrupt());
   7.556 -    ASSERT(__task_on_runqueue(prev));
   7.557 -
   7.558 -	__move_last_runqueue(prev);
   7.559 -
   7.560 -    switch ( prev->state )
   7.561 -    {
   7.562 -    case TASK_INTERRUPTIBLE:
   7.563 -        if ( signal_pending(prev) )
   7.564 -        {
   7.565 -            prev->state = TASK_RUNNING;
   7.566 -            break;
   7.567 -        }
   7.568 -    default:
   7.569 -        __del_from_runqueue(prev);
   7.570 -    case TASK_RUNNING:;
   7.571 -    }
   7.572 -    clear_bit(_HYP_EVENT_NEED_RESCHED, &prev->hyp_events);
   7.573 -
   7.574 -    next = NULL;
   7.575 -    list_for_each(tmp, &schedule_data[smp_processor_id()].runqueue) {
   7.576 -        p = list_entry(tmp, struct task_struct, run_list);
   7.577 -        next = p;
   7.578 -        if ( !is_idle_task(next) ) break;
   7.579 -    }
   7.580 -
   7.581 -    prev->has_cpu = 0;
   7.582 -    next->has_cpu = 1;
   7.583 -
   7.584 -    schedule_data[this_cpu].prev = prev;
   7.585 -    schedule_data[this_cpu].curr = next;
   7.586 -
   7.587 -    spin_unlock_irq(&schedule_data[this_cpu].lock);
   7.588 -
   7.589 -    if ( unlikely(prev == next) )
   7.590 -    {
   7.591 -        /* We won't go through the normal tail, so do this by hand */
   7.592 -        prev->policy &= ~SCHED_YIELD;
   7.593 -        goto same_process;
   7.594 -    }
   7.595 -
   7.596 -    prepare_to_switch();
   7.597 -    switch_to(prev, next);
   7.598 -    prev = schedule_data[this_cpu].prev;
   7.599 -    
   7.600 -    prev->policy &= ~SCHED_YIELD;
   7.601 -    if ( prev->state == TASK_DYING ) release_task(prev);
   7.602 -
   7.603 - same_process:
   7.604 -    update_dom_time(current->shared_info);
   7.605 -
   7.606 -    if ( test_bit(_HYP_EVENT_NEED_RESCHED, &current->hyp_events) )
   7.607 -        goto need_resched_back;
   7.608 -    return;
   7.609 -}
   7.610 -
   7.611 -/*
   7.612 - * The scheduling timer.
   7.613 - */
   7.614 -static __cacheline_aligned int count[NR_CPUS];
   7.615 -static void sched_timer(unsigned long foo)
   7.616 -{
   7.617 -    int 				cpu  = smp_processor_id();
   7.618 -    struct task_struct *curr = schedule_data[cpu].curr;
   7.619 -    s_time_t			now;
   7.620 -    int 				res;
   7.621 -
   7.622 -    /* reschedule after each 5 ticks */
   7.623 -    if (count[cpu] >= 5) {
   7.624 -        set_bit(_HYP_EVENT_NEED_RESCHED, &curr->hyp_events);
   7.625 -        count[cpu] = 0;
   7.626 -    }
   7.627 -    count[cpu]++;
   7.628 -
   7.629 -    /*
   7.630 -     * deliver virtual timer interrups to domains if we are CPU 0 XXX RN: We
   7.631 -     * don't have a per CPU list of domains yet. Otherwise would use that.
   7.632 -     * Plus, this should be removed anyway once Domains "know" about virtual
   7.633 -     * time and timeouts. But, it's better here then where it was before.
   7.634 -     */
   7.635 -    if (cpu == 0) {
   7.636 -        struct task_struct *p;
   7.637 -        unsigned long cpu_mask = 0;
   7.638 -
   7.639 -        /* send virtual timer interrupt */
   7.640 -        read_lock(&tasklist_lock);
   7.641 -        p = &idle0_task;
   7.642 -        do {
   7.643 -            if ( is_idle_task(p) ) continue;
   7.644 -            cpu_mask |= mark_guest_event(p, _EVENT_TIMER);
   7.645 -        }
   7.646 -        while ( (p = p->next_task) != &idle0_task );
   7.647 -        read_unlock(&tasklist_lock);
   7.648 -        guest_event_notify(cpu_mask);
   7.649 -    }
   7.650 -
   7.651 - again:
   7.652 -    now = NOW();
   7.653 -    s_timer[cpu].expires  = now + MILLISECS(10);
   7.654 -    res=add_ac_timer(&s_timer[cpu]);
   7.655 -
   7.656 -    TRC(printk("SCHED[%02d] timer(): now=0x%08X%08X timo=0x%08X%08X\n",
   7.657 -               cpu, (u32)(now>>32), (u32)now,
   7.658 -               (u32)(s_timer[cpu].expires>>32), (u32)s_timer[cpu].expires));
   7.659 -    if (res==1)
   7.660 -        goto again;
   7.661 -
   7.662 -}
   7.663 -
   7.664 -
   7.665 -/*
   7.666 - * Initialise the data structures
   7.667 - */
   7.668 -void __init scheduler_init(void)
   7.669 -{
   7.670 -    int i;
   7.671 -
   7.672 -    printk("Initialising schedulers\n");
   7.673 -
   7.674 -    for ( i = 0; i < NR_CPUS; i++ )
   7.675 -    {
   7.676 -        INIT_LIST_HEAD(&schedule_data[i].runqueue);
   7.677 -        spin_lock_init(&schedule_data[i].lock);
   7.678 -        schedule_data[i].prev = &idle0_task;
   7.679 -        schedule_data[i].curr = &idle0_task;
   7.680 -		
   7.681 -        /* a timer for each CPU  */
   7.682 -        init_ac_timer(&s_timer[i]);
   7.683 -        s_timer[i].function = &sched_timer;
   7.684 -    }
   7.685 -}
   7.686 -
   7.687 -/*
   7.688 - * Start a scheduler for each CPU
   7.689 - * This has to be done *after* the timers, e.g., APICs, have been initialised
   7.690 - */
   7.691 -void schedulers_start(void) 
   7.692 -{	
   7.693 -    printk("Start schedulers\n");
   7.694 -    __cli();
   7.695 -    sched_timer(0);
   7.696 -    smp_call_function((void *)sched_timer, NULL, 1, 1);
   7.697 -    __sti();
   7.698 -}
     8.1 --- a/xen/include/xeno/dom0_ops.h	Mon Mar 03 15:43:22 2003 +0000
     8.2 +++ b/xen/include/xeno/dom0_ops.h	Tue Mar 11 10:30:35 2003 +0000
     8.3 @@ -4,8 +4,11 @@
     8.4   * Process command requests from domain-0 guest OS.
     8.5   * 
     8.6   * Copyright (c) 2002, K A Fraser, B Dragovic
     8.7 + * 
     8.8 + * MUST BE KEPT IN SYNC WITH xenolinux<*>/arch/xeno/drivers/dom0/dom0_ops.h
     8.9   */
    8.10  
    8.11 +
    8.12  #ifndef __DOM0_OPS_H__
    8.13  #define __DOM0_OPS_H__
    8.14  
    8.15 @@ -13,6 +16,8 @@
    8.16  #define DOM0_KILLDOMAIN  1
    8.17  #define DOM0_GETMEMLIST  2
    8.18  #define DOM0_STARTDOM    4
    8.19 +#define DOM0_BVTCTL      6
    8.20 +#define DOM0_ADJUSTDOM   7
    8.21  
    8.22  #define MAX_CMD_LEN    256
    8.23  
    8.24 @@ -48,6 +53,20 @@ typedef struct domain_launch
    8.25      char cmd_line[MAX_CMD_LEN];
    8.26  } dom_meminfo_t;
    8.27  
    8.28 +typedef struct dom0_bvtctl_st
    8.29 +{
    8.30 +	unsigned long ctx_allow;	/* context switch allowance */
    8.31 +} dom0_bvtctl_t;
    8.32 +
    8.33 +typedef struct dom0_adjustdom_st
    8.34 +{
    8.35 +    unsigned int  domain;	/* domain id */
    8.36 +	unsigned long mcu_adv;	/* mcu advance: inverse of weight */
    8.37 +	unsigned long warp;     /* time warp */
    8.38 +	unsigned long warpl;    /* warp limit */
    8.39 +	unsigned long warpu;    /* unwarp time requirement */
    8.40 +} dom0_adjustdom_t;
    8.41 +
    8.42  typedef struct dom0_op_st
    8.43  {
    8.44      unsigned long cmd;
    8.45 @@ -56,6 +75,8 @@ typedef struct dom0_op_st
    8.46          dom0_newdomain_t newdomain;
    8.47          dom0_killdomain_t killdomain;
    8.48          dom0_getmemlist_t getmemlist;
    8.49 +		dom0_bvtctl_t bvtctl;
    8.50 +		dom0_adjustdom_t adjustdom;
    8.51          dom_meminfo_t meminfo;
    8.52      }
    8.53      u;
     9.1 --- a/xen/include/xeno/perfc.h	Mon Mar 03 15:43:22 2003 +0000
     9.2 +++ b/xen/include/xeno/perfc.h	Tue Mar 11 10:30:35 2003 +0000
     9.3 @@ -3,22 +3,27 @@
     9.4   */
     9.5  
     9.6  /* 
     9.7 - * NOTE: new counters must be defined in xen_perf_defn.h
     9.8 + * NOTE: new counters must be defined in perfc_defn.h
     9.9   * 
    9.10   * PERFCOUNTER (counter, string)              define a new performance counter
    9.11 + * PERFCOUNTER_CPU (counter, string, size)    define a counter per CPU
    9.12   * PERFCOUNTER_ARRY (counter, string, size)   define an array of counters
    9.13   * 
    9.14   * unsigned long perfc_value  (counter)        get value of a counter  
    9.15   * unsigned long perfc_valuea (counter, index) get value of an array counter
    9.16 - * void perfc_incr   (counter)                 increment a counter          
    9.17 - * void perfc_incra  (counter, index)          increment an array counter   
    9.18 - * void perfc_add    (counter, value)          add a value to a counter     
    9.19 - * void perfc_adda   (counter, index, value)   add a value to array counter 
    9.20 - * void perfc_print  (counter)                 print out the counter
    9.21 + * void perfc_incr  (counter)                 increment a counter          
    9.22 + * void perfc_incrc (counter, index)          increment a per CPU counter   
    9.23 + * void perfc_incra (counter, index)          increment an array counter   
    9.24 + * void perfc_add   (counter, value)          add a value to a counter     
    9.25 + * void perfc_addc  (counter, value)          add a value to a per CPU counter 
    9.26 + * void perfc_adda  (counter, index, value)   add a value to array counter 
    9.27 + * void perfc_print (counter)                 print out the counter
    9.28   */
    9.29  
    9.30  #define PERFCOUNTER( var, name ) \
    9.31  unsigned long var[1];
    9.32 +#define PERFCOUNTER_CPU( var, name ) \
    9.33 +unsigned long var[NR_CPUS];
    9.34  #define PERFCOUNTER_ARRAY( var, name, size ) \
    9.35  unsigned long var[size];
    9.36  
    9.37 @@ -30,12 +35,15 @@ struct perfcounter_t
    9.38  extern struct perfcounter_t perfcounters;
    9.39  extern char *perfc_name[];
    9.40  
    9.41 -#define perf_value(x)    perfcounters.x[0]
    9.42 -#define perf_valuea(x,y) perfcounters.x[y]
    9.43 -#define perf_incr(x)     perfcounters.x[0]++
    9.44 -#define perf_incra(x,y)  perfcounters.x[y]++
    9.45 -#define perf_add(x,y)    perfcounters.x[0]+=(y)
    9.46 -#define perf_adda(x,y,z) perfcounters.x[y]+=(z)
    9.47 +#define perfc_value(x)    perfcounters.x[0]
    9.48 +#define perfc_valuec(x)   perfcounters.x[smp_processor_id()]
    9.49 +#define perfc_valuea(x,y) perfcounters.x[y]
    9.50 +#define perfc_incr(x)     perfcounters.x[0]++
    9.51 +#define perfc_incrc(x)    perfcounters.x[smp_processor_id()]++
    9.52 +#define perfc_incra(x,y)  perfcounters.x[y]++
    9.53 +#define perfc_add(x,y)    perfcounters.x[0]+=(y)
    9.54 +#define perfc_addc(x,y)   perfcounters.x[smp_processor_id()]+=(y)
    9.55 +#define perfc_adda(x,y,z) perfcounters.x[y]+=(z)
    9.56  
    9.57  #define perf_print(x) \
    9.58    __perfc_print(perfcounters.x, \
    10.1 --- a/xen/include/xeno/perfc_defn.h	Mon Mar 03 15:43:22 2003 +0000
    10.2 +++ b/xen/include/xeno/perfc_defn.h	Tue Mar 11 10:30:35 2003 +0000
    10.3 @@ -2,3 +2,8 @@
    10.4  PERFCOUNTER( blockio_tx, "block io: messages received from tx queue" )
    10.5  PERFCOUNTER( blockio_rx, "block io: messages sent on rx queue" )
    10.6  
    10.7 +PERFCOUNTER_CPU( apic_timer, "apic timer interrupts" )
    10.8 +PERFCOUNTER_CPU( sched_irq,  "sched: timer" )
    10.9 +PERFCOUNTER_CPU( sched_run1, "sched: calls to schedule" )
   10.10 +PERFCOUNTER_CPU( sched_run2, "sched: runs through scheduler" )
   10.11 +
    11.1 --- a/xen/include/xeno/sched.h	Mon Mar 03 15:43:22 2003 +0000
    11.2 +++ b/xen/include/xeno/sched.h	Tue Mar 11 10:30:35 2003 +0000
    11.3 @@ -12,6 +12,10 @@
    11.4  #include <hypervisor-ifs/hypervisor-if.h>
    11.5  #include <xeno/dom0_ops.h>
    11.6  
    11.7 +#include <xeno/list.h>
    11.8 +#include <xeno/time.h>
    11.9 +#include <xeno/ac_timer.h>
   11.10 +
   11.11  extern unsigned long volatile jiffies;
   11.12  extern rwlock_t tasklist_lock;
   11.13  
   11.14 @@ -58,17 +62,47 @@ extern struct mm_struct init_mm;
   11.15  
   11.16  struct task_struct {
   11.17  
   11.18 -    int processor;
   11.19 -    int state;
   11.20 -    int hyp_events;
   11.21 -    unsigned int domain;
   11.22 +	/*
   11.23 +     * DO NOT CHANGE THE ORDER OF THE FOLLOWING.
   11.24 +     * There offsets are hardcoded in entry.S
   11.25 +     */
   11.26 +
   11.27 +    int processor;               /* 00: current processor */
   11.28 +    int state;	                 /* 04: current run state */
   11.29 +	int hyp_events;              /* 08: pending events */
   11.30 +    unsigned int domain;         /* 12: domain id */
   11.31  
   11.32      /* An unsafe pointer into a shared data area. */
   11.33 -    shared_info_t *shared_info;
   11.34 +    shared_info_t *shared_info;  /* 16: shared data area */
   11.35 +
   11.36 +	/*
   11.37 +     * From here on things can be added and shuffled without special attention
   11.38 +     */
   11.39      
   11.40      struct list_head pg_head;
   11.41 -    unsigned int tot_pages;     /* number of pages currently possesed */
   11.42 -    unsigned int max_pages;     /* max number of pages that can be possesed */
   11.43 +    unsigned int tot_pages;      /* number of pages currently possesed */
   11.44 +    unsigned int max_pages;      /* max number of pages that can be possesed */
   11.45 +
   11.46 +	/* scheduling */
   11.47 +    struct list_head run_list;		/* the run list  */
   11.48 +    int 			 has_cpu;
   11.49 +	int 			 policy;
   11.50 +	int 			 counter;
   11.51 +    
   11.52 +	struct ac_timer	blt;	        /* blocked timeout */
   11.53 +
   11.54 +	s_time_t lastschd;		        /* time this domain was last scheduled */
   11.55 +	s_time_t cpu_time;		        /* total CPU time received till now */
   11.56 +
   11.57 +	long mcu_advance;		        /* inverse of weight */
   11.58 +	u32  avt;			            /* actual virtual time */
   11.59 +	u32  evt;			            /* effective virtual time */
   11.60 +	long warp;			            /* virtual time warp */
   11.61 +	long warpl;			            /* warp limit */
   11.62 +	long warpu;			            /* unwarp time requirement */
   11.63 +	long warped;		            /* time it ran warped last time */
   11.64 +	long uwarped;		            /* time it ran unwarped last time */
   11.65 +
   11.66  
   11.67      /* Network I/O */
   11.68      net_ring_t *net_ring_base;
   11.69 @@ -81,12 +115,8 @@ struct task_struct {
   11.70      struct list_head blkdev_list;
   11.71      spinlock_t blk_ring_lock;
   11.72  
   11.73 -    int has_cpu, policy, counter;
   11.74 -
   11.75 -    struct list_head run_list;
   11.76 -    
   11.77 +	/* VM */
   11.78      struct mm_struct mm;
   11.79 -
   11.80      mm_segment_t addr_limit;        /* thread address space:
   11.81                                         0-0xBFFFFFFF for user-thead
   11.82                                         0-0xFFFFFFFF for kernel-thread
   11.83 @@ -135,6 +165,8 @@ struct task_struct {
   11.84      domain:      IDLE_DOMAIN_ID, \
   11.85      state:       TASK_RUNNING,   \
   11.86      has_cpu:     0,              \
   11.87 +    evt:         0x7fffffff,     \
   11.88 +    avt:         0x7fffffff,     \
   11.89      mm:          IDLE0_MM,       \
   11.90      addr_limit:  KERNEL_DS,      \
   11.91      active_mm:   &idle0_task.mm, \
   11.92 @@ -196,6 +228,8 @@ void scheduler_init(void);
   11.93  void schedulers_start(void);
   11.94  void sched_add_domain(struct task_struct *p);
   11.95  void sched_rem_domain(struct task_struct *p);
   11.96 +long sched_adjdom(int dom, unsigned long mcu_adv, unsigned long warp, 
   11.97 +				  unsigned long warpl, unsigned long warpu);
   11.98  int  wake_up(struct task_struct *p);
   11.99  long schedule_timeout(long timeout);
  11.100  long do_yield(void);
    12.1 --- a/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/Makefile	Mon Mar 03 15:43:22 2003 +0000
    12.2 +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/Makefile	Tue Mar 11 10:30:35 2003 +0000
    12.3 @@ -1,3 +1,3 @@
    12.4  O_TARGET := dom0.o
    12.5 -obj-y := dom0_memory.o dom0_core.o vfr.o
    12.6 +obj-y := dom0_memory.o dom0_core.o vfr.o sched_ops.o
    12.7  include $(TOPDIR)/Rules.make
    13.1 --- a/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/dom0_ops.h	Mon Mar 03 15:43:22 2003 +0000
    13.2 +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/dom0_ops.h	Tue Mar 11 10:30:35 2003 +0000
    13.3 @@ -4,15 +4,19 @@
    13.4   * Process command requests from domain-0 guest OS.
    13.5   * 
    13.6   * Copyright (c) 2002, K A Fraser, B Dragovic
    13.7 + * 
    13.8 + * MUST BE KEPT IN SYNC WITH xen/include/xeno/dom0_ops.h
    13.9   */
   13.10  
   13.11  #define DOM0_NEWDOMAIN   0
   13.12  #define DOM0_KILLDOMAIN  1
   13.13  #define DOM0_GETMEMLIST  2
   13.14  #define DOM0_STARTDOM    4
   13.15 -#define MAP_DOM_MEM      6 /* Not passed down to Xen */
   13.16 -#define DO_PGUPDATES     7 /* Not passed down to Xen */
   13.17 -#define MAX_CMD          8
   13.18 +#define DOM0_BVTCTL      6
   13.19 +#define DOM0_ADJUSTDOM   7
   13.20 +#define MAP_DOM_MEM      8 /* Not passed down to Xen */
   13.21 +#define DO_PGUPDATES     9 /* Not passed down to Xen */
   13.22 +#define MAX_CMD         10
   13.23  
   13.24  #define MAX_CMD_LEN     256
   13.25  
   13.26 @@ -20,8 +24,8 @@ typedef struct dom0_newdomain_st
   13.27  {
   13.28      unsigned int domain;
   13.29      unsigned int memory_kb;
   13.30 -    unsigned int num_vifs;  // temporary
   13.31 -    unsigned long pg_head;  // return parameter
   13.32 +    unsigned int num_vifs;  /* temporary */
   13.33 +    unsigned long pg_head;  /* return parameter */
   13.34  } dom0_newdomain_t;
   13.35  
   13.36  typedef struct dom0_killdomain_st
   13.37 @@ -37,6 +41,20 @@ typedef struct dom0_getmemlist_st
   13.38      void *buffer;
   13.39  } dom0_getmemlist_t;
   13.40  
   13.41 +typedef struct dom0_bvtctl_st
   13.42 +{
   13.43 +	unsigned long ctx_allow;	/* context switch allowance */
   13.44 +} dom0_bvtctl_t;
   13.45 +
   13.46 +typedef struct dom0_adjustdom_st
   13.47 +{
   13.48 +    unsigned int  domain;	/* domain id */
   13.49 +	unsigned long mcu_adv;	/* mcu advance: inverse of weight */
   13.50 +	unsigned long warp;     /* time warp */
   13.51 +	unsigned long warpl;    /* warp limit */
   13.52 +	unsigned long warpu;    /* unwarp time requirement */
   13.53 +} dom0_adjustdom_t;
   13.54 +
   13.55  /* This is entirely processed by XenoLinux */
   13.56  typedef struct dom_mem 
   13.57  {
   13.58 @@ -64,6 +82,8 @@ typedef struct domain_launch
   13.59      char cmd_line[MAX_CMD_LEN];
   13.60  } dom_meminfo_t;
   13.61  
   13.62 +
   13.63 +
   13.64  typedef struct dom0_op_st
   13.65  {
   13.66      unsigned long cmd;
   13.67 @@ -72,6 +92,8 @@ typedef struct dom0_op_st
   13.68          dom0_newdomain_t newdomain;
   13.69          dom0_killdomain_t killdomain;
   13.70          dom0_getmemlist_t getmemlist;
   13.71 +		dom0_bvtctl_t bvtctl;
   13.72 +		dom0_adjustdom_t adjustdom;
   13.73          dom_mem_t dommem;
   13.74          dom_pgupdate_t pgupdate;
   13.75          dom_meminfo_t meminfo;
    14.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    14.2 +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/sched_ops.c	Tue Mar 11 10:30:35 2003 +0000
    14.3 @@ -0,0 +1,108 @@
    14.4 +/* -*-  Mode:C; c-basic-offset:4; tab-width:4 -*-
    14.5 + ****************************************************************************
    14.6 + * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge
    14.7 + ****************************************************************************
    14.8 + *
    14.9 + *        File: sched_ops.c
   14.10 + *      Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk)
   14.11 + *     Changes: 
   14.12 + *              
   14.13 + *        Date: Mar 2003
   14.14 + * 
   14.15 + * Environment: XenoLinux
   14.16 + * Description: Dom0 Control interface to scheduler in Xen
   14.17 + *
   14.18 + * code based on Andy's vfr parsing code
   14.19 + *
   14.20 + * Commands understood by the interface:
   14.21 + *
   14.22 + * S <did> <mcu advance> [ <warp> <warp limit> <unwarp limit> ]
   14.23 + * C <context swith allowance>
   14.24 + *
   14.25 + ****************************************************************************
   14.26 + * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $
   14.27 + ****************************************************************************
   14.28 + */
   14.29 +
   14.30 +
   14.31 +#include <linux/proc_fs.h>
   14.32 +#include <asm/hypervisor.h>
   14.33 +#include "dom0_ops.h"
   14.34 +
   14.35 +#define SCHED_ENTRY    "sched"
   14.36 +extern struct proc_dir_entry *xeno_base;
   14.37 +static struct proc_dir_entry *sched_pde;
   14.38 +
   14.39 +
   14.40 +static int sched_read_proc(char *page, char **start, off_t off,
   14.41 +						   int count, int *eof, void *data)
   14.42 +{   
   14.43 +    strcpy(page, readbuf);
   14.44 +    *readbuf = '\0';
   14.45 +    *eof = 1;
   14.46 +    *start = page;
   14.47 +    return strlen(page);
   14.48 +}
   14.49 +
   14.50 +
   14.51 +static int sched_write_proc(struct file *file, const char *buffer,
   14.52 +							u_long count, void *data)
   14.53 +{
   14.54 +	dom0_op_t op;
   14.55 +
   14.56 +	int ret, len;
   14.57 +	int ts, te, tl; /* token start, end, and length */
   14.58 +
   14.59 +    /* Only admin can adjust scheduling parameters */
   14.60 +    if ( !capable(CAP_SYS_ADMIN) )
   14.61 +        return -EPERM;
   14.62 +
   14.63 +	/* parse the commands  */
   14.64 +	len = count;
   14.65 +	ts = te = 0;
   14.66 +
   14.67 +	while ( count && isspace(buffer[ts]) ) { ts++; count--; } // skip spaces.
   14.68 +	te = ts;
   14.69 +	if ( te <= ts ) goto bad;
   14.70 +	tl = te - ts;
   14.71 +
   14.72 +	if ( strncmp(&buffer[ts], "S", tl) == 0 )
   14.73 +	{
   14.74 +		op.cmd = NETWORK_OP_ADDRULE;
   14.75 +	}
   14.76 +	else if ( strncmp(&buffer[ts], "C", tl) == 0 )
   14.77 +	{
   14.78 +		op.cmd = NETWORK_OP_DELETERULE;
   14.79 +	}
   14.80 +
   14.81 +
   14.82 +}
   14.83 +
   14.84 +
   14.85 +/*
   14.86 + * main scheduler interface driver driver initialization function.
   14.87 + */
   14.88 +static int __init init_module(void)
   14.89 +{
   14.90 +    printk(KERN_ALERT "Starting Domain Scheduler Control Interface\n");
   14.91 +
   14.92 +    sched_pde = create_proc_entry(SCHED_ENTRY, 0600, xeno_base);
   14.93 +    if ( sched_pde == NULL )
   14.94 +    {
   14.95 +        printk(KERN_ALERT "Unable to create dom scheduler proc entry!");
   14.96 +        return -1;
   14.97 +    }
   14.98 +
   14.99 +    sched_pde->read_proc  = sched_read_proc;
  14.100 +    sched_pde->write_proc = sched_write_proc;
  14.101 +
  14.102 +    return 0;
  14.103 +}
  14.104 +
  14.105 +static void __exit cleanup_module(void)
  14.106 +{
  14.107 +}
  14.108 +
  14.109 +module_init(init_module);
  14.110 +module_exit(cleanup_module);
  14.111 +