ia64/xen-unstable

changeset 2595:fe2f4bbcf869

bitkeeper revision 1.1159.99.4 (41626f06VquclgVVpIeHy9z2K3jW-A)

Rationalise scheduler locking. A bit more conservative now, but much
simpler! I only applied this to the basic BVT scheduler -- the others
are still unsafe and have been removed from the basic build.
author kaf24@freefall.cl.cam.ac.uk
date Tue Oct 05 09:53:10 2004 +0000 (2004-10-05)
parents a28d3cf3832c
children ebe6012dace7
files xen/common/Makefile xen/common/domain.c xen/common/sched_bvt.c xen/common/schedule.c xen/include/xen/sched.h
line diff
     1.1 --- a/xen/common/Makefile	Mon Oct 04 14:55:47 2004 +0000
     1.2 +++ b/xen/common/Makefile	Tue Oct 05 09:53:10 2004 +0000
     1.3 @@ -19,6 +19,14 @@ ifneq ($(trace),y)
     1.4  OBJS := $(subst trace.o,,$(OBJS))
     1.5  endif
     1.6  
     1.7 +ifneq ($(broken_schedulers),y)
     1.8 +OBJS := $(subst sched_atropos.o,,$(OBJS))
     1.9 +OBJS := $(subst sched_fair_bvt.o,,$(OBJS))
    1.10 +OBJS := $(subst sched_rrobin.o,,$(OBJS))
    1.11 +else
    1.12 +CFLAGS += -DBROKEN_SCHEDULERS
    1.13 +endif
    1.14 +
    1.15  default: $(OBJS)
    1.16  	$(LD) $(LDFLAGS) -r -o common.o $(OBJS)
    1.17  
     2.1 --- a/xen/common/domain.c	Mon Oct 04 14:55:47 2004 +0000
     2.2 +++ b/xen/common/domain.c	Tue Oct 05 09:53:10 2004 +0000
     2.3 @@ -39,7 +39,6 @@ struct domain *do_createdomain(domid_t d
     2.4      d->domain    = dom_id;
     2.5      d->processor = cpu;
     2.6      d->create_time = NOW();
     2.7 -    spin_lock_init(&d->sleep_lock);
     2.8   
     2.9      memcpy(&d->thread, &idle0_task.thread, sizeof(d->thread));
    2.10  
     3.1 --- a/xen/common/sched_bvt.c	Mon Oct 04 14:55:47 2004 +0000
     3.2 +++ b/xen/common/sched_bvt.c	Tue Oct 05 09:53:10 2004 +0000
     3.3 @@ -37,7 +37,7 @@ struct bvt_dom_info
     3.4      u32                 evt;              /* effective virtual time */
     3.5      int                 warpback;         /* warp?  */
     3.6      int                 warp;             /* warp set and within the warp 
     3.7 -                                                                     limits*/
     3.8 +                                             limits*/
     3.9      s32                 warp_value;       /* virtual time warp */
    3.10      s_time_t            warpl;            /* warp limit */
    3.11      struct ac_timer     warp_timer;       /* deals with warpl */
    3.12 @@ -47,12 +47,10 @@ struct bvt_dom_info
    3.13  
    3.14  struct bvt_cpu_info
    3.15  {
    3.16 -    spinlock_t          run_lock;   /* protects runqueue */
    3.17 -    struct list_head    runqueue;   /* runqueue for given processor */ 
    3.18 -    unsigned long       svt;        /* XXX check this is unsigned long! */
    3.19 +    struct list_head    runqueue;
    3.20 +    unsigned long       svt;
    3.21  };
    3.22  
    3.23 -
    3.24  #define BVT_INFO(p)   ((struct bvt_dom_info *)(p)->sched_priv)
    3.25  #define CPU_INFO(cpu) ((struct bvt_cpu_info *)(schedule_data[cpu]).sched_priv)
    3.26  #define RUNLIST(p)    ((struct list_head *)&(BVT_INFO(p)->run_list))
    3.27 @@ -64,13 +62,8 @@ struct bvt_cpu_info
    3.28  #define TIME_SLOP      (s32)MICROSECS(50)     /* allow time to slip a bit */
    3.29  static s32 ctx_allow = (s32)MILLISECS(5);     /* context switch allowance */
    3.30  
    3.31 -/* SLAB cache for struct bvt_dom_info objects */
    3.32  static xmem_cache_t *dom_info_cache;
    3.33  
    3.34 -/*
    3.35 - * Wrappers for run-queue management. Must be called with the run_lock
    3.36 - * held.
    3.37 - */
    3.38  static inline void __add_to_runqueue_head(struct domain *d)
    3.39  {
    3.40      list_add(RUNLIST(d), RUNQUEUE(d->processor));
    3.41 @@ -98,43 +91,42 @@ static inline int __task_on_runqueue(str
    3.42  static void warp_timer_fn(unsigned long pointer)
    3.43  {
    3.44      struct bvt_dom_info *inf = (struct bvt_dom_info *)pointer;
    3.45 -    unsigned long flags; 
    3.46 +    unsigned int cpu = inf->domain->processor;
    3.47      
    3.48 -    spin_lock_irqsave(&CPU_INFO(inf->domain->processor)->run_lock, flags);
    3.49 +    spin_lock_irq(&schedule_data[cpu].schedule_lock);
    3.50 +
    3.51      inf->warp = 0;
    3.52 +
    3.53      /* unwarp equal to zero => stop warping */
    3.54 -    if(inf->warpu == 0)
    3.55 +    if ( inf->warpu == 0 )
    3.56      {
    3.57          inf->warpback = 0;
    3.58 -        goto reschedule;
    3.59 +        cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);   
    3.60      }
    3.61      
    3.62      /* set unwarp timer */
    3.63      inf->unwarp_timer.expires = NOW() + inf->warpu;
    3.64      add_ac_timer(&inf->unwarp_timer);
    3.65 -    spin_unlock_irqrestore(&CPU_INFO(inf->domain->processor)->run_lock, flags);
    3.66  
    3.67 -reschedule:
    3.68 -    cpu_raise_softirq(inf->domain->processor, SCHEDULE_SOFTIRQ);   
    3.69 +    spin_unlock_irq(&schedule_data[cpu].schedule_lock);
    3.70  }
    3.71  
    3.72  static void unwarp_timer_fn(unsigned long pointer)
    3.73  {
    3.74 -     struct bvt_dom_info *inf = (struct bvt_dom_info *)pointer;
    3.75 -     unsigned long flags;
    3.76 +    struct bvt_dom_info *inf = (struct bvt_dom_info *)pointer;
    3.77 +    unsigned int cpu = inf->domain->processor;
    3.78  
    3.79 -     spin_lock_irqsave(&CPU_INFO(inf->domain->processor)->run_lock, flags);
    3.80 -    if(inf->warpback)
    3.81 +    spin_lock_irq(&schedule_data[cpu].schedule_lock);
    3.82 +
    3.83 +    if ( inf->warpback )
    3.84      {
    3.85          inf->warp = 1;
    3.86 -        cpu_raise_softirq(inf->domain->processor, SCHEDULE_SOFTIRQ);   
    3.87 +        cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);   
    3.88      }
    3.89 -    
    3.90 -    spin_unlock_irqrestore(&CPU_INFO(inf->domain->processor)->run_lock, flags);
    3.91 +     
    3.92 +    spin_unlock_irq(&schedule_data[cpu].schedule_lock);
    3.93  }
    3.94  
    3.95 -
    3.96 -
    3.97  static inline u32 calc_avt(struct domain *d, s_time_t now)
    3.98  {
    3.99      u32 ranfor, mcus;
   3.100 @@ -146,15 +138,14 @@ static inline u32 calc_avt(struct domain
   3.101      return inf->avt + mcus * inf->mcu_advance;
   3.102  }
   3.103  
   3.104 -
   3.105  /*
   3.106   * Calculate the effective virtual time for a domain. Take into account 
   3.107   * warping limits
   3.108   */
   3.109  static inline u32 calc_evt(struct domain *d, u32 avt)
   3.110  {
   3.111 -   struct bvt_dom_info *inf = BVT_INFO(d);
   3.112 -   /* TODO The warp routines need to be rewritten GM */
   3.113 +    struct bvt_dom_info *inf = BVT_INFO(d);
   3.114 +    /* TODO The warp routines need to be rewritten GM */
   3.115   
   3.116      if ( inf->warp ) 
   3.117          return avt - inf->warp_value;
   3.118 @@ -168,26 +159,25 @@ static inline u32 calc_evt(struct domain
   3.119   *
   3.120   * Returns non-zero on failure.
   3.121   */
   3.122 -int bvt_alloc_task(struct domain *p)
   3.123 +int bvt_alloc_task(struct domain *d)
   3.124  {
   3.125 -    p->sched_priv = xmem_cache_alloc(dom_info_cache);
   3.126 -    if ( p->sched_priv == NULL )
   3.127 +    if ( (d->sched_priv = xmem_cache_alloc(dom_info_cache)) == NULL )
   3.128          return -1;
   3.129 -    
   3.130 +    memset(d->sched_priv, 0, sizeof(struct bvt_dom_info));
   3.131      return 0;
   3.132  }
   3.133  
   3.134  /*
   3.135   * Add and remove a domain
   3.136   */
   3.137 -void bvt_add_task(struct domain *p) 
   3.138 +void bvt_add_task(struct domain *d) 
   3.139  {
   3.140 -    struct bvt_dom_info *inf = BVT_INFO(p);
   3.141 +    struct bvt_dom_info *inf = BVT_INFO(d);
   3.142      ASSERT(inf != NULL);
   3.143 -    ASSERT(p   != NULL);
   3.144 +    ASSERT(d   != NULL);
   3.145  
   3.146      inf->mcu_advance = MCU_ADVANCE;
   3.147 -    inf->domain = p;
   3.148 +    inf->domain      = d;
   3.149      inf->warpback    = 0;
   3.150      /* Set some default values here. */
   3.151      inf->warp        = 0;
   3.152 @@ -196,135 +186,95 @@ void bvt_add_task(struct domain *p)
   3.153      inf->warpu       = MILLISECS(1000);
   3.154      /* initialise the timers */
   3.155      init_ac_timer(&inf->warp_timer);
   3.156 -    inf->warp_timer.cpu = p->processor;
   3.157 +    inf->warp_timer.cpu = d->processor;
   3.158      inf->warp_timer.data = (unsigned long)inf;
   3.159      inf->warp_timer.function = &warp_timer_fn;
   3.160      init_ac_timer(&inf->unwarp_timer);
   3.161 -    inf->unwarp_timer.cpu = p->processor;
   3.162 +    inf->unwarp_timer.cpu = d->processor;
   3.163      inf->unwarp_timer.data = (unsigned long)inf;
   3.164      inf->unwarp_timer.function = &unwarp_timer_fn;
   3.165      
   3.166 -    if ( p->domain == IDLE_DOMAIN_ID )
   3.167 +    if ( d->domain == IDLE_DOMAIN_ID )
   3.168      {
   3.169          inf->avt = inf->evt = ~0U;
   3.170      } 
   3.171      else 
   3.172      {
   3.173          /* Set avt and evt to system virtual time. */
   3.174 -        inf->avt         = CPU_SVT(p->processor);
   3.175 -        inf->evt         = CPU_SVT(p->processor);
   3.176 -   }
   3.177 -
   3.178 -    return;
   3.179 +        inf->avt = CPU_SVT(d->processor);
   3.180 +        inf->evt = CPU_SVT(d->processor);
   3.181 +    }
   3.182  }
   3.183  
   3.184  int bvt_init_idle_task(struct domain *p)
   3.185  {
   3.186 -    unsigned long flags;
   3.187 -
   3.188 -    if(bvt_alloc_task(p) < 0) return -1;
   3.189 +    if ( bvt_alloc_task(p) < 0 )
   3.190 +        return -1;
   3.191  
   3.192      bvt_add_task(p);
   3.193  
   3.194 -    spin_lock_irqsave(&CPU_INFO(p->processor)->run_lock, flags);
   3.195 -    
   3.196      set_bit(DF_RUNNING, &p->flags);
   3.197      if ( !__task_on_runqueue(p) )
   3.198          __add_to_runqueue_head(p);
   3.199          
   3.200 -    spin_unlock_irqrestore(&CPU_INFO(p->processor)->run_lock, flags);
   3.201 -
   3.202      return 0;
   3.203  }
   3.204  
   3.205  void bvt_wake(struct domain *d)
   3.206  {
   3.207 -    unsigned long       flags;
   3.208      struct bvt_dom_info *inf = BVT_INFO(d);
   3.209      struct domain       *curr;
   3.210      s_time_t            now, r_time;
   3.211      int                 cpu = d->processor;
   3.212      u32                 curr_evt;
   3.213  
   3.214 -    /* The runqueue accesses must be protected */
   3.215 -    spin_lock_irqsave(&CPU_INFO(cpu)->run_lock, flags);
   3.216 -    
   3.217 -    /* If on the runqueue already then someone has done the wakeup work. */
   3.218      if ( unlikely(__task_on_runqueue(d)) )
   3.219 -    {
   3.220 -        spin_unlock_irqrestore(&CPU_INFO(cpu)->run_lock, flags);
   3.221          return;
   3.222 -    }
   3.223  
   3.224      __add_to_runqueue_head(d);
   3.225  
   3.226      now = NOW();
   3.227  
   3.228      /* Set the BVT parameters. AVT should always be updated 
   3.229 -        if CPU migration ocurred.*/
   3.230 +       if CPU migration ocurred.*/
   3.231      if ( inf->avt < CPU_SVT(cpu) || 
   3.232 -            unlikely(test_bit(DF_MIGRATED, &d->flags)) )
   3.233 +         unlikely(test_bit(DF_MIGRATED, &d->flags)) )
   3.234          inf->avt = CPU_SVT(cpu);
   3.235  
   3.236      /* Deal with warping here. */
   3.237      inf->evt = calc_evt(d, inf->avt);
   3.238 -    spin_unlock_irqrestore(&CPU_INFO(cpu)->run_lock, flags);
   3.239 -    
   3.240 -    /* Access to schedule_data protected by schedule_lock */
   3.241 -    spin_lock_irqsave(&schedule_data[cpu].schedule_lock, flags);
   3.242      
   3.243      curr = schedule_data[cpu].curr;
   3.244      curr_evt = calc_evt(curr, calc_avt(curr, now));
   3.245      /* Calculate the time the current domain would run assuming
   3.246         the second smallest evt is of the newly woken domain */
   3.247      r_time = curr->lastschd +
   3.248 -             ((inf->evt - curr_evt) / BVT_INFO(curr)->mcu_advance) +
   3.249 -             ctx_allow;
   3.250 +        ((inf->evt - curr_evt) / BVT_INFO(curr)->mcu_advance) +
   3.251 +        ctx_allow;
   3.252  
   3.253      if ( is_idle_task(curr) || (inf->evt <= curr_evt) )
   3.254          cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
   3.255      else if ( schedule_data[cpu].s_timer.expires > r_time )
   3.256          mod_ac_timer(&schedule_data[cpu].s_timer, r_time);
   3.257 -
   3.258 -    spin_unlock_irqrestore(&schedule_data[cpu].schedule_lock, flags);  
   3.259  }
   3.260  
   3.261  
   3.262  static void bvt_sleep(struct domain *d)
   3.263  {
   3.264 -    unsigned long flags;
   3.265 -    
   3.266      if ( test_bit(DF_RUNNING, &d->flags) )
   3.267          cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
   3.268 -    else 
   3.269 -    {
   3.270 -        /* The runqueue accesses must be protected */
   3.271 -        spin_lock_irqsave(&CPU_INFO(d->processor)->run_lock, flags);
   3.272 -        
   3.273 -        
   3.274 -        if ( __task_on_runqueue(d) )
   3.275 -            __del_from_runqueue(d);
   3.276 -
   3.277 -        spin_unlock_irqrestore(&CPU_INFO(d->processor)->run_lock, flags);    
   3.278 -    }
   3.279 +    else  if ( __task_on_runqueue(d) )
   3.280 +        __del_from_runqueue(d);
   3.281  }
   3.282  
   3.283  /**
   3.284   * bvt_free_task - free BVT private structures for a task
   3.285 - * @p:             task
   3.286 + * @d:             task
   3.287   */
   3.288 -void bvt_free_task(struct domain *p)
   3.289 +void bvt_free_task(struct domain *d)
   3.290  {
   3.291 -    ASSERT( p->sched_priv != NULL );
   3.292 -    xmem_cache_free( dom_info_cache, p->sched_priv );
   3.293 -}
   3.294 -
   3.295 -
   3.296 -/* 
   3.297 - * Block the currently-executing domain until a pertinent event occurs.
   3.298 - */
   3.299 -static void bvt_do_block(struct domain *p)
   3.300 -{
   3.301 +    ASSERT(d->sched_priv != NULL);
   3.302 +    xmem_cache_free(dom_info_cache, d->sched_priv);
   3.303  }
   3.304  
   3.305  /* Control the scheduler. */
   3.306 @@ -333,24 +283,19 @@ int bvt_ctl(struct sched_ctl_cmd *cmd)
   3.307      struct bvt_ctl *params = &cmd->u.bvt;
   3.308  
   3.309      if ( cmd->direction == SCHED_INFO_PUT )
   3.310 -    { 
   3.311          ctx_allow = params->ctx_allow;
   3.312 -    }
   3.313      else
   3.314 -    {
   3.315          params->ctx_allow = ctx_allow;
   3.316 -    }
   3.317      
   3.318      return 0;
   3.319  }
   3.320  
   3.321  /* Adjust scheduling parameter for a given domain. */
   3.322 -int bvt_adjdom(struct domain *p,
   3.323 -               struct sched_adjdom_cmd *cmd)
   3.324 +int bvt_adjdom(
   3.325 +    struct domain *d, struct sched_adjdom_cmd *cmd)
   3.326  {
   3.327      struct bvt_adjdom *params = &cmd->u.bvt;
   3.328 -    unsigned long flags;
   3.329 -
   3.330 +    
   3.331      if ( cmd->direction == SCHED_INFO_PUT )
   3.332      {
   3.333          u32 mcu_adv = params->mcu_adv;
   3.334 @@ -359,27 +304,17 @@ int bvt_adjdom(struct domain *p,
   3.335          s_time_t warpl = params->warpl;
   3.336          s_time_t warpu = params->warpu;
   3.337          
   3.338 -        struct bvt_dom_info *inf = BVT_INFO(p);
   3.339 +        struct bvt_dom_info *inf = BVT_INFO(d);
   3.340          
   3.341          DPRINTK("Get domain %u bvt mcu_adv=%u, warpback=%d, warpvalue=%d, "
   3.342                  "warpl=%lld, warpu=%lld\n",
   3.343 -                p->domain, inf->mcu_advance, inf->warpback, inf->warp_value,
   3.344 +                d->domain, inf->mcu_advance, inf->warpback, inf->warp_value,
   3.345                  inf->warpl, inf->warpu);
   3.346  
   3.347          /* Sanity -- this can avoid divide-by-zero. */
   3.348 -        if ( mcu_adv == 0 )
   3.349 -        {
   3.350 -            printk("Mcu advance must not be set to 0 (domain %d)\n",p->domain);
   3.351 +        if ( (mcu_adv == 0) || (warpl < 0) || (warpu < 0) )
   3.352              return -EINVAL;
   3.353 -        }
   3.354 -        else if ( warpl < 0 || warpu < 0)
   3.355 -        {
   3.356 -            printk("Warp limits must be >= 0 (domain %d)\n", p->domain);
   3.357 -            return -EINVAL;
   3.358 -        }
   3.359 -        
   3.360 -        
   3.361 -        spin_lock_irqsave(&CPU_INFO(p->processor)->run_lock, flags);   
   3.362 +
   3.363          inf->mcu_advance = mcu_adv;
   3.364          inf->warpback = warpback;  
   3.365          /* The warp should be the same as warpback */
   3.366 @@ -391,27 +326,23 @@ int bvt_adjdom(struct domain *p,
   3.367          /* If the unwarp timer set up it needs to be removed */
   3.368          rem_ac_timer(&inf->unwarp_timer);
   3.369          /* If we stop warping the warp timer needs to be removed */
   3.370 -        if(!warpback)
   3.371 +        if ( !warpback )
   3.372              rem_ac_timer(&inf->warp_timer);
   3.373          
   3.374          DPRINTK("Get domain %u bvt mcu_adv=%u, warpback=%d, warpvalue=%d, "
   3.375                  "warpl=%lld, warpu=%lld\n",
   3.376 -                p->domain, inf->mcu_advance, inf->warpback, inf->warp_value,
   3.377 +                d->domain, inf->mcu_advance, inf->warpback, inf->warp_value,
   3.378                  inf->warpl, inf->warpu);
   3.379                  
   3.380 -        spin_unlock_irqrestore(&CPU_INFO(p->processor)->run_lock, flags);
   3.381      }
   3.382      else if ( cmd->direction == SCHED_INFO_GET )
   3.383      {
   3.384 -        struct bvt_dom_info *inf = BVT_INFO(p);
   3.385 -
   3.386 -        spin_lock_irqsave(&CPU_INFO(p->processor)->run_lock, flags);   
   3.387 +        struct bvt_dom_info *inf = BVT_INFO(d);
   3.388          params->mcu_adv     = inf->mcu_advance;
   3.389          params->warpvalue   = inf->warp_value;
   3.390          params->warpback    = inf->warpback;
   3.391          params->warpl       = inf->warpl;
   3.392          params->warpu       = inf->warpu;
   3.393 -        spin_unlock_irqrestore(&CPU_INFO(p->processor)->run_lock, flags);
   3.394      }
   3.395      
   3.396      return 0;
   3.397 @@ -427,23 +358,19 @@ int bvt_adjdom(struct domain *p,
   3.398   */
   3.399  static task_slice_t bvt_do_schedule(s_time_t now)
   3.400  {
   3.401 -    unsigned long flags;
   3.402 -    struct domain *prev = current, *next = NULL, *next_prime, *p; 
   3.403 +    struct domain      *prev = current, *next = NULL, *next_prime, *p; 
   3.404      struct list_head   *tmp;
   3.405      int                 cpu = prev->processor;
   3.406      s32                 r_time;     /* time for new dom to run */
   3.407      u32                 next_evt, next_prime_evt, min_avt;
   3.408 -    struct bvt_dom_info *prev_inf       = BVT_INFO(prev),
   3.409 -                        *p_inf          = NULL,
   3.410 -                        *next_inf       = NULL,
   3.411 -                        *next_prime_inf = NULL;
   3.412 +    struct bvt_dom_info *prev_inf       = BVT_INFO(prev);
   3.413 +    struct bvt_dom_info *p_inf          = NULL;
   3.414 +    struct bvt_dom_info *next_inf       = NULL;
   3.415 +    struct bvt_dom_info *next_prime_inf = NULL;
   3.416      task_slice_t        ret;
   3.417  
   3.418 -
   3.419      ASSERT(prev->sched_priv != NULL);
   3.420      ASSERT(prev_inf != NULL);
   3.421 -    spin_lock_irqsave(&CPU_INFO(cpu)->run_lock, flags);
   3.422 -
   3.423      ASSERT(__task_on_runqueue(prev));
   3.424  
   3.425      if ( likely(!is_idle_task(prev)) ) 
   3.426 @@ -511,8 +438,6 @@ static task_slice_t bvt_do_schedule(s_ti
   3.427          add_ac_timer(&next_inf->warp_timer);
   3.428      }
   3.429     
   3.430 -    spin_unlock_irqrestore(&CPU_INFO(cpu)->run_lock, flags);
   3.431 - 
   3.432      /* Extract the domain pointers from the dom infos */
   3.433      next        = next_inf->domain;
   3.434      next_prime  = next_prime_inf->domain;
   3.435 @@ -524,9 +449,9 @@ static task_slice_t bvt_do_schedule(s_ti
   3.436      /* check for virtual time overrun on this cpu */
   3.437      if ( CPU_SVT(cpu) >= 0xf0000000 )
   3.438      {
   3.439 -        u_long t_flags;
   3.440 -        
   3.441 -        write_lock_irqsave(&tasklist_lock, t_flags); 
   3.442 +        ASSERT(!local_irq_is_enabled());
   3.443 +
   3.444 +        write_lock(&tasklist_lock);
   3.445          
   3.446          for_each_domain ( p )
   3.447          {
   3.448 @@ -538,7 +463,7 @@ static task_slice_t bvt_do_schedule(s_ti
   3.449              }
   3.450          } 
   3.451          
   3.452 -        write_unlock_irqrestore(&tasklist_lock, t_flags); 
   3.453 +        write_unlock(&tasklist_lock);
   3.454          
   3.455          CPU_SVT(cpu) -= 0xe0000000;
   3.456      }
   3.457 @@ -591,43 +516,29 @@ static void bvt_dump_settings(void)
   3.458  
   3.459  static void bvt_dump_cpu_state(int i)
   3.460  {
   3.461 -    unsigned long flags;
   3.462      struct list_head *list, *queue;
   3.463      int loop = 0;
   3.464      struct bvt_dom_info *d_inf;
   3.465      struct domain *d;
   3.466      
   3.467 -    spin_lock_irqsave(&CPU_INFO(i)->run_lock, flags);
   3.468      printk("svt=0x%08lX ", CPU_SVT(i));
   3.469  
   3.470      queue = RUNQUEUE(i);
   3.471      printk("QUEUE rq %lx   n: %lx, p: %lx\n",  (unsigned long)queue,
   3.472 -        (unsigned long) queue->next, (unsigned long) queue->prev);
   3.473 +           (unsigned long) queue->next, (unsigned long) queue->prev);
   3.474  
   3.475      list_for_each ( list, queue )
   3.476      {
   3.477          d_inf = list_entry(list, struct bvt_dom_info, run_list);
   3.478          d = d_inf->domain;
   3.479          printk("%3d: %u has=%c ", loop++, d->domain,
   3.480 -              test_bit(DF_RUNNING, &d->flags) ? 'T':'F');
   3.481 +               test_bit(DF_RUNNING, &d->flags) ? 'T':'F');
   3.482          bvt_dump_runq_el(d);
   3.483          printk("c=0x%X%08X\n", (u32)(d->cpu_time>>32), (u32)d->cpu_time);
   3.484          printk("         l: %lx n: %lx  p: %lx\n",
   3.485 -            (unsigned long)list, (unsigned long)list->next,
   3.486 -            (unsigned long)list->prev);
   3.487 +               (unsigned long)list, (unsigned long)list->next,
   3.488 +               (unsigned long)list->prev);
   3.489      }
   3.490 -    spin_unlock_irqrestore(&CPU_INFO(i)->run_lock, flags);        
   3.491 -}
   3.492 -
   3.493 -/* We use cache to create the bvt_dom_infos 
   3.494 -   this functions makes sure that the run_list
   3.495 -   is initialised properly.
   3.496 -   Call to __task_on_runqueue needs to return false */
   3.497 -static void cache_constructor(void *arg1, xmem_cache_t *arg2, unsigned long arg3)
   3.498 -{
   3.499 -    struct bvt_dom_info *dom_inf = (struct bvt_dom_info*)arg1;
   3.500 -    dom_inf->run_list.next = NULL;
   3.501 -    dom_inf->run_list.prev = NULL;
   3.502  }
   3.503  
   3.504  /* Initialise the data structures. */
   3.505 @@ -646,15 +557,12 @@ int bvt_init_scheduler()
   3.506          }
   3.507  
   3.508          INIT_LIST_HEAD(RUNQUEUE(i));
   3.509 -        spin_lock_init(&CPU_INFO(i)->run_lock);
   3.510          
   3.511          CPU_SVT(i) = 0; /* XXX do I really need to do this? */
   3.512      }
   3.513  
   3.514 -    dom_info_cache = xmem_cache_create("BVT dom info",
   3.515 -                                       sizeof(struct bvt_dom_info),
   3.516 -                                       0, 0, cache_constructor, NULL);
   3.517 -
   3.518 +    dom_info_cache = xmem_cache_create(
   3.519 +        "BVT dom info", sizeof(struct bvt_dom_info), 0, 0, NULL, NULL);
   3.520      if ( dom_info_cache == NULL )
   3.521      {
   3.522          printk("BVT: Failed to allocate domain info SLAB cache");
   3.523 @@ -664,8 +572,6 @@ int bvt_init_scheduler()
   3.524      return 0;
   3.525  }
   3.526  
   3.527 -
   3.528 -
   3.529  struct scheduler sched_bvt_def = {
   3.530      .name     = "Borrowed Virtual Time",
   3.531      .opt_name = "bvt",
   3.532 @@ -676,7 +582,6 @@ struct scheduler sched_bvt_def = {
   3.533      .alloc_task     = bvt_alloc_task,
   3.534      .add_task       = bvt_add_task,
   3.535      .free_task      = bvt_free_task,
   3.536 -    .do_block       = bvt_do_block,
   3.537      .do_schedule    = bvt_do_schedule,
   3.538      .control        = bvt_ctl,
   3.539      .adjdom         = bvt_adjdom,
   3.540 @@ -685,4 +590,3 @@ struct scheduler sched_bvt_def = {
   3.541      .sleep          = bvt_sleep,
   3.542      .wake           = bvt_wake,
   3.543  };
   3.544 -
     4.1 --- a/xen/common/schedule.c	Mon Oct 04 14:55:47 2004 +0000
     4.2 +++ b/xen/common/schedule.c	Tue Oct 05 09:53:10 2004 +0000
     4.3 @@ -67,16 +67,19 @@ static void fallback_timer_fn(unsigned l
     4.4  /* This is global for now so that private implementations can reach it */
     4.5  schedule_data_t schedule_data[NR_CPUS];
     4.6  
     4.7 -/*
     4.8 - * TODO: It would be nice if the schedulers array could get populated
     4.9 - * automagically without having to hack the code in here.
    4.10 - */
    4.11 -extern struct scheduler sched_bvt_def, sched_fbvt_def, sched_rrobin_def, sched_atropos_def;
    4.12 -static struct scheduler *schedulers[] = { &sched_bvt_def,
    4.13 -					                      &sched_fbvt_def,
    4.14 -                                          &sched_rrobin_def,
    4.15 -                                          &sched_atropos_def,
    4.16 -                                          NULL};
    4.17 +extern struct scheduler sched_bvt_def;
    4.18 +extern struct scheduler sched_fbvt_def;
    4.19 +extern struct scheduler sched_rrobin_def;
    4.20 +extern struct scheduler sched_atropos_def;
    4.21 +static struct scheduler *schedulers[] = { 
    4.22 +    &sched_bvt_def,
    4.23 +#ifdef BROKEN_SCHEDULERS
    4.24 +    &sched_fbvt_def,
    4.25 +    &sched_rrobin_def,
    4.26 +    &sched_atropos_def,
    4.27 +#endif
    4.28 +    NULL
    4.29 +};
    4.30  
    4.31  /* Operations for the current scheduler. */
    4.32  static struct scheduler ops;
    4.33 @@ -155,21 +158,20 @@ void sched_rem_domain(struct domain *d)
    4.34  
    4.35  void init_idle_task(void)
    4.36  {
    4.37 -    struct domain *d = current;
    4.38 -
    4.39 -    if ( SCHED_OP(init_idle_task, d) < 0)
    4.40 -        panic("Failed to initialise idle task for processor %d",d->processor);
    4.41 +    if ( SCHED_OP(init_idle_task, current) < 0 )
    4.42 +        BUG();
    4.43  }
    4.44  
    4.45  void domain_sleep(struct domain *d)
    4.46  {
    4.47      unsigned long flags;
    4.48  
    4.49 -    /* sleep and wake protected by domain's sleep_lock */
    4.50 -    spin_lock_irqsave(&d->sleep_lock, flags);
    4.51 +    spin_lock_irqsave(&schedule_data[d->processor].schedule_lock, flags);
    4.52 +
    4.53      if ( likely(!domain_runnable(d)) )
    4.54          SCHED_OP(sleep, d);
    4.55 -    spin_unlock_irqrestore(&d->sleep_lock, flags);
    4.56 +
    4.57 +    spin_unlock_irqrestore(&schedule_data[d->processor].schedule_lock, flags);
    4.58   
    4.59      /* Synchronous. */
    4.60      while ( test_bit(DF_RUNNING, &d->flags) && !domain_runnable(d) )
    4.61 @@ -181,10 +183,10 @@ void domain_sleep(struct domain *d)
    4.62  
    4.63  void domain_wake(struct domain *d)
    4.64  {
    4.65 -    unsigned long       flags;
    4.66 +    unsigned long flags;
    4.67  
    4.68 -    spin_lock_irqsave(&d->sleep_lock, flags);
    4.69 -    
    4.70 +    spin_lock_irqsave(&schedule_data[d->processor].schedule_lock, flags);
    4.71 +
    4.72      if ( likely(domain_runnable(d)) )
    4.73      {
    4.74          TRACE_2D(TRC_SCHED_WAKE, d->domain, d);
    4.75 @@ -196,7 +198,7 @@ void domain_wake(struct domain *d)
    4.76      
    4.77      clear_bit(DF_MIGRATED, &d->flags);
    4.78      
    4.79 -    spin_unlock_irqrestore(&d->sleep_lock, flags);
    4.80 +    spin_unlock_irqrestore(&schedule_data[d->processor].schedule_lock, flags);
    4.81  }
    4.82  
    4.83  /* Block the currently-executing domain until a pertinent event occurs. */
    4.84 @@ -218,7 +220,6 @@ static long do_yield(void)
    4.85      return 0;
    4.86  }
    4.87  
    4.88 -
    4.89  /*
    4.90   * Demultiplex scheduler-related hypercalls.
    4.91   */
    4.92 @@ -292,24 +293,25 @@ long sched_ctl(struct sched_ctl_cmd *cmd
    4.93  /* Adjust scheduling parameter for a given domain. */
    4.94  long sched_adjdom(struct sched_adjdom_cmd *cmd)
    4.95  {
    4.96 -    struct domain *p;    
    4.97 -    
    4.98 +    struct domain *d;
    4.99 +
   4.100      if ( cmd->sched_id != ops.sched_id )
   4.101          return -EINVAL;
   4.102  
   4.103      if ( cmd->direction != SCHED_INFO_PUT && cmd->direction != SCHED_INFO_GET )
   4.104          return -EINVAL;
   4.105  
   4.106 -    p = find_domain_by_id(cmd->domain);
   4.107 -
   4.108 -    if( p == NULL )
   4.109 +    d = find_domain_by_id(cmd->domain);
   4.110 +    if ( d == NULL )
   4.111          return -ESRCH;
   4.112  
   4.113 -    TRACE_1D(TRC_SCHED_ADJDOM, p->domain);
   4.114 +    TRACE_1D(TRC_SCHED_ADJDOM, d->domain);
   4.115  
   4.116 -    SCHED_OP(adjdom, p, cmd);
   4.117 +    spin_lock_irq(&schedule_data[d->processor].schedule_lock);
   4.118 +    SCHED_OP(adjdom, d, cmd);
   4.119 +    spin_unlock_irq(&schedule_data[d->processor].schedule_lock);
   4.120  
   4.121 -    put_domain(p); 
   4.122 +    put_domain(d);
   4.123      return 0;
   4.124  }
   4.125  
   4.126 @@ -335,7 +337,6 @@ void __enter_scheduler(void)
   4.127      rem_ac_timer(&schedule_data[cpu].s_timer);
   4.128      
   4.129      ASSERT(!in_irq());
   4.130 -    // TODO - move to specific scheduler ASSERT(__task_on_runqueue(prev));
   4.131  
   4.132      if ( test_bit(DF_BLOCKED, &prev->flags) )
   4.133      {
   4.134 @@ -362,6 +363,9 @@ void __enter_scheduler(void)
   4.135      schedule_data[cpu].s_timer.expires  = now + r_time;
   4.136      add_ac_timer(&schedule_data[cpu].s_timer);
   4.137  
   4.138 +    /* Must be protected by the schedule_lock! */
   4.139 +    set_bit(DF_RUNNING, &next->flags);
   4.140 +
   4.141      spin_unlock_irq(&schedule_data[cpu].schedule_lock);
   4.142  
   4.143      /* Ensure that the domain has an up-to-date time base. */
   4.144 @@ -405,7 +409,6 @@ void __enter_scheduler(void)
   4.145       * without warning). 
   4.146       */
   4.147      clear_bit(DF_RUNNING, &prev->flags);
   4.148 -    set_bit(DF_RUNNING, &next->flags);
   4.149  
   4.150      /* Mark a timer event for the newly-scheduled domain. */
   4.151      if ( !is_idle_task(next) )
   4.152 @@ -549,15 +552,23 @@ void dump_runq(u_char key, void *dev_id,
   4.153  {
   4.154      s_time_t      now = NOW();
   4.155      int           i;
   4.156 +    unsigned long flags;
   4.157 +
   4.158 +    local_irq_save(flags);
   4.159  
   4.160      printk("Scheduler: %s (%s)\n", ops.name, ops.opt_name);
   4.161      SCHED_OP(dump_settings);
   4.162      printk("NOW=0x%08X%08X\n",  (u32)(now>>32), (u32)now); 
   4.163 +
   4.164      for ( i = 0; i < smp_num_cpus; i++ )
   4.165      {
   4.166 +        spin_lock(&schedule_data[i].schedule_lock);
   4.167          printk("CPU[%02d] ", i);
   4.168          SCHED_OP(dump_cpu_state,i);
   4.169 +        spin_unlock(&schedule_data[i].schedule_lock);
   4.170      }
   4.171 +
   4.172 +    local_irq_restore(flags);
   4.173  }
   4.174  
   4.175  #if defined(WAKE_HISTO) || defined(BLOCKTIME_HISTO)
     5.1 --- a/xen/include/xen/sched.h	Mon Oct 04 14:55:47 2004 +0000
     5.2 +++ b/xen/include/xen/sched.h	Tue Oct 05 09:53:10 2004 +0000
     5.3 @@ -97,7 +97,6 @@ struct domain
     5.4  
     5.5      /* Scheduling. */
     5.6      int              shutdown_code; /* code value from OS (if DF_SHUTDOWN). */
     5.7 -    spinlock_t       sleep_lock;    /* wake/sleep lock                      */
     5.8      s_time_t         lastschd;      /* time this domain was last scheduled */
     5.9      s_time_t         lastdeschd;    /* time this domain was last descheduled */
    5.10      s_time_t         cpu_time;      /* total CPU time received till now */