ia64/xen-unstable

changeset 1969:02fb4c801767

bitkeeper revision 1.1108.34.1 (4107ea3eSSzRurUHPnjryAZ3X8VXbg)

New locking scheme for schedulers
author gm281@boulderdash.cl.cam.ac.uk
date Wed Jul 28 18:02:38 2004 +0000 (2004-07-28)
parents a83817a4f882
children 6d2b5ebbf4b6
files xen/common/domain.c xen/common/sched_bvt.c xen/common/sched_fair_bvt.c xen/common/sched_rrobin.c xen/common/schedule.c xen/include/xen/sched.h
line diff
     1.1 --- a/xen/common/domain.c	Tue Jul 27 10:44:40 2004 +0000
     1.2 +++ b/xen/common/domain.c	Wed Jul 28 18:02:38 2004 +0000
     1.3 @@ -40,7 +40,9 @@ struct domain *do_createdomain(domid_t d
     1.4      d->domain    = dom_id;
     1.5      d->processor = cpu;
     1.6      d->create_time = NOW();
     1.7 -
     1.8 +    /* Initialise the state_lock */
     1.9 +    spin_lock_init(&d->state_lock);
    1.10 + 
    1.11      memcpy(&d->thread, &idle0_task.thread, sizeof(d->thread));
    1.12  
    1.13      if ( d->domain != IDLE_DOMAIN_ID )
     2.1 --- a/xen/common/sched_bvt.c	Tue Jul 27 10:44:40 2004 +0000
     2.2 +++ b/xen/common/sched_bvt.c	Wed Jul 28 18:02:38 2004 +0000
     2.3 @@ -45,6 +45,7 @@ struct bvt_dom_info
     2.4  
     2.5  struct bvt_cpu_info
     2.6  {
     2.7 +    spinlock_t          run_lock;   /* protects runqueue */
     2.8      struct list_head    runqueue;   /* runqueue for given processor */ 
     2.9      unsigned long       svt;        /* XXX check this is unsigned long! */
    2.10  };
    2.11 @@ -148,15 +149,86 @@ int bvt_init_idle_task(struct domain *p)
    2.12  
    2.13      bvt_add_task(p);
    2.14  
    2.15 -    spin_lock_irqsave(&schedule_data[p->processor].schedule_lock, flags);
    2.16 +    spin_lock_irqsave(&CPU_INFO(p->processor)->run_lock, flags);
    2.17 +    
    2.18      set_bit(DF_RUNNING, &p->flags);
    2.19      if ( !__task_on_runqueue(RUNLIST(p)) )
    2.20          __add_to_runqueue_head(RUNLIST(p), RUNQUEUE(p->processor));
    2.21 -    spin_unlock_irqrestore(&schedule_data[p->processor].schedule_lock, flags);
    2.22 +        
    2.23 +    spin_unlock_irqrestore(&CPU_INFO(p->processor)->run_lock, flags);
    2.24  
    2.25      return 0;
    2.26  }
    2.27  
    2.28 +void bvt_wake(struct domain *d)
    2.29 +{
    2.30 +    unsigned long       flags;
    2.31 +    struct bvt_dom_info *inf = BVT_INFO(d);
    2.32 +    struct domain       *curr;
    2.33 +    s_time_t            now, min_time;
    2.34 +    int                 cpu = d->processor;
    2.35 +
    2.36 +    /* The runqueue accesses must be protected */
    2.37 +    spin_lock_irqsave(&CPU_INFO(cpu)->run_lock, flags);
    2.38 +    
    2.39 +    /* If on the runqueue already then someone has done the wakeup work. */
    2.40 +    if ( unlikely(__task_on_runqueue(RUNLIST(d))) )
    2.41 +    {
    2.42 +        spin_unlock_irqrestore(&CPU_INFO(cpu)->run_lock, flags);
    2.43 +        return;
    2.44 +    }
    2.45 +
    2.46 +    __add_to_runqueue_head(RUNLIST(d), RUNQUEUE(d->processor));
    2.47 +
    2.48 +    now = NOW();
    2.49 +
    2.50 +    /* Set the BVT parameters. */
    2.51 +    if ( inf->avt < CPU_SVT(cpu) )
    2.52 +        inf->avt = CPU_SVT(cpu);
    2.53 +
    2.54 +    /* Deal with warping here. */
    2.55 +    inf->warpback  = 1;
    2.56 +    inf->warped    = now;
    2.57 +    __calc_evt(inf);
    2.58 +    spin_unlock_irqrestore(&CPU_INFO(cpu)->run_lock, flags);
    2.59 +    
    2.60 +    /* Access to schedule_data protected by schedule_lock */
    2.61 +    spin_lock_irqsave(&schedule_data[cpu].schedule_lock, flags);
    2.62 +    
    2.63 +    curr = schedule_data[cpu].curr;
    2.64 +
    2.65 +    /* Currently-running domain should run at least for ctx_allow. */
    2.66 +    min_time = curr->lastschd + curr->min_slice;
    2.67 +
    2.68 +    spin_unlock_irqrestore(&schedule_data[cpu].schedule_lock, flags);   
    2.69 +   
    2.70 +    if ( is_idle_task(curr) || (min_time <= now) )
    2.71 +        cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
    2.72 +    else if ( schedule_data[cpu].s_timer.expires > (min_time + TIME_SLOP) )
    2.73 +        mod_ac_timer(&schedule_data[cpu].s_timer, min_time);
    2.74 +
    2.75 +}
    2.76 +
    2.77 +
    2.78 +static void bvt_sleep(struct domain *d)
    2.79 +{
    2.80 +    unsigned long flags;
    2.81 +    
    2.82 +    if ( test_bit(DF_RUNNING, &d->flags) )
    2.83 +        cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
    2.84 +    else 
    2.85 +    {
    2.86 +        /* The runqueue accesses must be protected */
    2.87 +        spin_lock_irqsave(&CPU_INFO(d->processor)->run_lock, flags);
    2.88 +        
    2.89 +        
    2.90 +        if ( __task_on_runqueue(RUNLIST(d)) )
    2.91 +            __del_from_runqueue(RUNLIST(d));
    2.92 +
    2.93 +        spin_unlock_irqrestore(&CPU_INFO(d->processor)->run_lock, flags);    
    2.94 +    }
    2.95 +}
    2.96 +
    2.97  /**
    2.98   * bvt_free_task - free BVT private structures for a task
    2.99   * @p:             task
   2.100 @@ -218,7 +290,7 @@ int bvt_adjdom(struct domain *p,
   2.101          if ( mcu_adv == 0 )
   2.102              return -EINVAL;
   2.103          
   2.104 -        spin_lock_irqsave(&schedule_data[p->processor].schedule_lock, flags);   
   2.105 +        spin_lock_irqsave(&CPU_INFO(p->processor)->run_lock, flags);   
   2.106          inf->mcu_advance = mcu_adv;
   2.107          inf->warp = warp;
   2.108          inf->warpl = warpl;
   2.109 @@ -229,18 +301,18 @@ int bvt_adjdom(struct domain *p,
   2.110                  p->domain, inf->mcu_advance, inf->warp,
   2.111                  inf->warpl, inf->warpu );
   2.112  
   2.113 -        spin_unlock_irqrestore(&schedule_data[p->processor].schedule_lock, flags);
   2.114 +        spin_unlock_irqrestore(&CPU_INFO(p->processor)->run_lock, flags);
   2.115      }
   2.116      else if ( cmd->direction == SCHED_INFO_GET )
   2.117      {
   2.118          struct bvt_dom_info *inf = BVT_INFO(p);
   2.119  
   2.120 -        spin_lock_irqsave(&schedule_data[p->processor].schedule_lock, flags);   
   2.121 +        spin_lock_irqsave(&CPU_INFO(p->processor)->run_lock, flags);   
   2.122          params->mcu_adv = inf->mcu_advance;
   2.123          params->warp    = inf->warp;
   2.124          params->warpl   = inf->warpl;
   2.125          params->warpu   = inf->warpu;
   2.126 -        spin_unlock_irqrestore(&schedule_data[p->processor].schedule_lock, flags);
   2.127 +        spin_unlock_irqrestore(&CPU_INFO(p->processor)->run_lock, flags);
   2.128      }
   2.129      
   2.130      return 0;
   2.131 @@ -256,6 +328,7 @@ int bvt_adjdom(struct domain *p,
   2.132   */
   2.133  static task_slice_t bvt_do_schedule(s_time_t now)
   2.134  {
   2.135 +    unsigned long flags;
   2.136      struct domain *prev = current, *next = NULL, *next_prime, *p; 
   2.137      struct list_head   *tmp;
   2.138      int                 cpu = prev->processor;
   2.139 @@ -269,8 +342,12 @@ static task_slice_t bvt_do_schedule(s_ti
   2.140                          *next_prime_inf = NULL;
   2.141      task_slice_t        ret;
   2.142  
   2.143 +
   2.144      ASSERT(prev->sched_priv != NULL);
   2.145      ASSERT(prev_inf != NULL);
   2.146 +    spin_lock_irqsave(&CPU_INFO(cpu)->run_lock, flags);
   2.147 +
   2.148 +    ASSERT(__task_on_runqueue(RUNLIST(prev)));
   2.149  
   2.150      if ( likely(!is_idle_task(prev)) ) 
   2.151      {
   2.152 @@ -329,7 +406,9 @@ static task_slice_t bvt_do_schedule(s_ti
   2.153          if ( p_inf->avt < min_avt )
   2.154              min_avt = p_inf->avt;
   2.155      }
   2.156 -
   2.157 +    
   2.158 +    spin_unlock_irqrestore(&CPU_INFO(cpu)->run_lock, flags);
   2.159 + 
   2.160      /* Extract the domain pointers from the dom infos */
   2.161      next        = next_inf->domain;
   2.162      next_prime  = next_prime_inf->domain;
   2.163 @@ -341,8 +420,10 @@ static task_slice_t bvt_do_schedule(s_ti
   2.164      /* check for virtual time overrun on this cpu */
   2.165      if ( CPU_SVT(cpu) >= 0xf0000000 )
   2.166      {
   2.167 -        u_long t_flags; 
   2.168 +        u_long t_flags;
   2.169 +        
   2.170          write_lock_irqsave(&tasklist_lock, t_flags); 
   2.171 +        
   2.172          for_each_domain ( p )
   2.173          {
   2.174              if ( p->processor == cpu )
   2.175 @@ -352,7 +433,9 @@ static task_slice_t bvt_do_schedule(s_ti
   2.176                  p_inf->avt -= 0xe0000000;
   2.177              }
   2.178          } 
   2.179 +        
   2.180          write_unlock_irqrestore(&tasklist_lock, t_flags); 
   2.181 +        
   2.182          CPU_SVT(cpu) -= 0xe0000000;
   2.183      }
   2.184  
   2.185 @@ -411,7 +494,7 @@ static void bvt_dump_cpu_state(int i)
   2.186      struct bvt_dom_info *d_inf;
   2.187      struct domain *d;
   2.188      
   2.189 -    spin_lock_irqsave(&schedule_data[i].schedule_lock, flags);
   2.190 +    spin_lock_irqsave(&CPU_INFO(i)->run_lock, flags);
   2.191      printk("svt=0x%08lX ", CPU_SVT(i));
   2.192  
   2.193      queue = RUNQUEUE(i);
   2.194 @@ -430,7 +513,7 @@ static void bvt_dump_cpu_state(int i)
   2.195              (unsigned long)list, (unsigned long)list->next,
   2.196              (unsigned long)list->prev);
   2.197      }
   2.198 -    spin_unlock_irqrestore(&schedule_data[i].schedule_lock, flags);        
   2.199 +    spin_unlock_irqrestore(&CPU_INFO(i)->run_lock, flags);        
   2.200  }
   2.201  
   2.202  /* We use cache to create the bvt_dom_infos 
   2.203 @@ -452,14 +535,16 @@ int bvt_init_scheduler()
   2.204      for ( i = 0; i < NR_CPUS; i++ )
   2.205      {
   2.206          schedule_data[i].sched_priv = xmalloc(sizeof(struct bvt_cpu_info));
   2.207 -        INIT_LIST_HEAD(RUNQUEUE(i));
   2.208 -        
   2.209 +       
   2.210          if ( schedule_data[i].sched_priv == NULL )
   2.211          {
   2.212              printk("Failed to allocate BVT scheduler per-CPU memory!\n");
   2.213              return -1;
   2.214          }
   2.215  
   2.216 +        INIT_LIST_HEAD(RUNQUEUE(i));
   2.217 +        spin_lock_init(&CPU_INFO(i)->run_lock);
   2.218 +        
   2.219          CPU_SVT(i) = 0; /* XXX do I really need to do this? */
   2.220      }
   2.221  
   2.222 @@ -476,48 +561,7 @@ int bvt_init_scheduler()
   2.223      return 0;
   2.224  }
   2.225  
   2.226 -static void bvt_sleep(struct domain *d)
   2.227 -{
   2.228 -    if ( test_bit(DF_RUNNING, &d->flags) )
   2.229 -        cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
   2.230 -    else if ( __task_on_runqueue(RUNLIST(d)) )
   2.231 -        __del_from_runqueue(RUNLIST(d));
   2.232 -}
   2.233  
   2.234 -void bvt_wake(struct domain *d)
   2.235 -{
   2.236 -    struct bvt_dom_info *inf = BVT_INFO(d);
   2.237 -    struct domain       *curr;
   2.238 -    s_time_t             now, min_time;
   2.239 -    int                  cpu = d->processor;
   2.240 -
   2.241 -    /* If on the runqueue already then someone has done the wakeup work. */
   2.242 -    if ( unlikely(__task_on_runqueue(RUNLIST(d))) )
   2.243 -        return;
   2.244 -
   2.245 -    __add_to_runqueue_head(RUNLIST(d), RUNQUEUE(d->processor));
   2.246 -
   2.247 -    now = NOW();
   2.248 -
   2.249 -    /* Set the BVT parameters. */
   2.250 -    if ( inf->avt < CPU_SVT(cpu) )
   2.251 -        inf->avt = CPU_SVT(cpu);
   2.252 -
   2.253 -    /* Deal with warping here. */
   2.254 -    inf->warpback  = 1;
   2.255 -    inf->warped    = now;
   2.256 -    __calc_evt(inf);
   2.257 -
   2.258 -    curr = schedule_data[cpu].curr;
   2.259 -
   2.260 -    /* Currently-running domain should run at least for ctx_allow. */
   2.261 -    min_time = curr->lastschd + curr->min_slice;
   2.262 -    
   2.263 -    if ( is_idle_task(curr) || (min_time <= now) )
   2.264 -        cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
   2.265 -    else if ( schedule_data[cpu].s_timer.expires > (min_time + TIME_SLOP) )
   2.266 -        mod_ac_timer(&schedule_data[cpu].s_timer, min_time);
   2.267 -}
   2.268  
   2.269  struct scheduler sched_bvt_def = {
   2.270      .name     = "Borrowed Virtual Time",
     3.1 --- a/xen/common/sched_fair_bvt.c	Tue Jul 27 10:44:40 2004 +0000
     3.2 +++ b/xen/common/sched_fair_bvt.c	Wed Jul 28 18:02:38 2004 +0000
     3.3 @@ -52,6 +52,7 @@ struct fbvt_dom_info
     3.4  
     3.5  struct fbvt_cpu_info
     3.6  {
     3.7 +    spinlock_t          run_lock;  /* protects runqueue */
     3.8      struct list_head    runqueue;  /* runqueue for this CPU */
     3.9      unsigned long       svt;       /* XXX check this is unsigned long! */
    3.10      u32                 vtb;       /* virtual time bonus */
    3.11 @@ -160,15 +161,122 @@ int fbvt_init_idle_task(struct domain *p
    3.12      if(fbvt_alloc_task(p) < 0) return -1;
    3.13  
    3.14      fbvt_add_task(p);
    3.15 -    spin_lock_irqsave(&schedule_data[p->processor].schedule_lock, flags);
    3.16 +    spin_lock_irqsave(&CPU_INFO(p->processor)->run_lock, flags);
    3.17      set_bit(DF_RUNNING, &p->flags);
    3.18      if ( !__task_on_runqueue(RUNLIST(p)) )
    3.19      __add_to_runqueue_head(RUNLIST(p), RUNQUEUE(p->processor));
    3.20 -    spin_unlock_irqrestore(&schedule_data[p->processor].schedule_lock, flags);
    3.21 +    spin_unlock_irqrestore(&CPU_INFO(p->processor)->run_lock, flags);
    3.22  
    3.23      return 0;
    3.24  }
    3.25                                          
    3.26 +static void fbvt_wake(struct domain *d)
    3.27 +{
    3.28 +    unsigned long        flags;
    3.29 +    struct fbvt_dom_info *inf = FBVT_INFO(d);
    3.30 +    struct domain        *curr;
    3.31 +    s_time_t             now, min_time;
    3.32 +    int                  cpu = d->processor;
    3.33 +    s32                  io_warp;
    3.34 +
    3.35 +    /* The runqueue accesses must be protected */
    3.36 +    spin_lock_irqsave(&CPU_INFO(cpu)->run_lock, flags);
    3.37 +    
    3.38 +    /* If on the runqueue already then someone has done the wakeup work. */
    3.39 +    if ( unlikely(__task_on_runqueue(RUNLIST(d))) )
    3.40 +    {
    3.41 +        spin_unlock_irqrestore(&CPU_INFO(cpu)->run_lock, flags); 
    3.42 +        return;
    3.43 +    }    
    3.44 +    
    3.45 +    __add_to_runqueue_head(RUNLIST(d), RUNQUEUE(cpu));
    3.46 + 
    3.47 +    now = NOW();
    3.48 +
    3.49 +#if 0
    3.50 +    /*
    3.51 +     * XXX KAF: This was fbvt_unpause(). Not sure if it's the right thing
    3.52 +     * to do, in light of the stuff that fbvt_wake_up() does.
    3.53 +     * e.g., setting 'inf->avt = CPU_SVT(cpu);' would make the later test
    3.54 +     * 'inf->avt < CPU_SVT(cpu)' redundant!
    3.55 +     */
    3.56 +    if ( d->domain == IDLE_DOMAIN_ID )
    3.57 +    {
    3.58 +        inf->avt = inf->evt = ~0U;
    3.59 +    } 
    3.60 +    else 
    3.61 +    {
    3.62 +        /* Set avt to system virtual time. */
    3.63 +        inf->avt = CPU_SVT(cpu);
    3.64 +        /* Set some default values here. */
    3.65 +        LAST_VTB(cpu) = 0;
    3.66 +        __calc_evt(inf);
    3.67 +    }
    3.68 +#endif
    3.69 +
    3.70 +    /* Set the BVT parameters. */
    3.71 +    if ( inf->avt < CPU_SVT(cpu) )
    3.72 +    {
    3.73 +        /*
    3.74 +         * We want IO bound processes to gain dispatch precedence. It is 
    3.75 +         * especially for device driver domains. Therefore AVT 
    3.76 +         * not be updated to SVT but to a value marginally smaller.
    3.77 +         * Since frequently sleeping domains have high time_slept
    3.78 +         * values, the virtual time can be determined as:
    3.79 +         * SVT - const * TIME_SLEPT
    3.80 +         */
    3.81 +        io_warp = (int)(0.5 * inf->time_slept);
    3.82 +        if ( io_warp > 1000 )
    3.83 +            io_warp = 1000;
    3.84 +
    3.85 +        ASSERT(inf->time_slept + CPU_SVT(cpu) > inf->avt + io_warp);
    3.86 +        inf->time_slept += CPU_SVT(cpu) - inf->avt - io_warp;
    3.87 +        inf->avt = CPU_SVT(cpu) - io_warp;
    3.88 +    }
    3.89 +
    3.90 +    /* Deal with warping here. */
    3.91 +    inf->warpback  = 1;
    3.92 +    inf->warped    = now;
    3.93 +    __calc_evt(inf);
    3.94 +    spin_unlock_irqrestore(&CPU_INFO(cpu)->run_lock, flags);
    3.95 +    
    3.96 +    /* Access to schedule_data protected by schedule_lock */
    3.97 +    spin_lock_irqsave(&schedule_data[cpu].schedule_lock, flags);
    3.98 +    
    3.99 + 
   3.100 +    curr = schedule_data[cpu].curr;
   3.101 + 
   3.102 +    /* Currently-running domain should run at least for ctx_allow. */
   3.103 +    min_time = curr->lastschd + curr->min_slice;
   3.104 +    
   3.105 +    spin_unlock_irqrestore(&schedule_data[cpu].schedule_lock, flags);   
   3.106 +    
   3.107 +    if ( is_idle_task(curr) || (min_time <= now) )
   3.108 +        cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
   3.109 +    else if ( schedule_data[cpu].s_timer.expires > (min_time + TIME_SLOP) )
   3.110 +        mod_ac_timer(&schedule_data[cpu].s_timer, min_time);
   3.111 +}
   3.112 +
   3.113 +
   3.114 +static void fbvt_sleep(struct domain *d)
   3.115 +{
   3.116 +    unsigned long flags;
   3.117 +
   3.118 +    
   3.119 +    if ( test_bit(DF_RUNNING, &d->flags) )
   3.120 +        cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
   3.121 +    else
   3.122 +    {
   3.123 +         /* The runqueue accesses must be protected */
   3.124 +        spin_lock_irqsave(&CPU_INFO(d->processor)->run_lock, flags);       
   3.125 +    
   3.126 +        if ( __task_on_runqueue(RUNLIST(d)) )
   3.127 +            __del_from_runqueue(RUNLIST(d));
   3.128 +
   3.129 +        spin_unlock_irqrestore(&CPU_INFO(d->processor)->run_lock, flags);
   3.130 +    }
   3.131 +}
   3.132 +
   3.133  
   3.134  /**
   3.135   * fbvt_free_task - free FBVT private structures for a task
   3.136 @@ -232,7 +340,7 @@ int fbvt_adjdom(struct domain *p,
   3.137          if ( mcu_adv == 0 )
   3.138              return -EINVAL;
   3.139          
   3.140 -        spin_lock_irqsave(&schedule_data[p->processor].schedule_lock, flags);   
   3.141 +        spin_lock_irqsave(&CPU_INFO(p->processor)->run_lock, flags);   
   3.142          inf->mcu_advance = mcu_adv;
   3.143          inf->warp = warp;
   3.144          inf->warpl = warpl;
   3.145 @@ -243,20 +351,18 @@ int fbvt_adjdom(struct domain *p,
   3.146                  p->domain, inf->mcu_advance, inf->warp,
   3.147                  inf->warpl, inf->warpu );
   3.148  
   3.149 -        spin_unlock_irqrestore(&schedule_data[p->processor].schedule_lock, 
   3.150 -                                                                        flags);
   3.151 +        spin_unlock_irqrestore(&CPU_INFO(p->processor)->run_lock, flags);
   3.152      }
   3.153      else if ( cmd->direction == SCHED_INFO_GET )
   3.154      {
   3.155          struct fbvt_dom_info *inf = FBVT_INFO(p);
   3.156  
   3.157 -        spin_lock_irqsave(&schedule_data[p->processor].schedule_lock, flags);   
   3.158 +        spin_lock_irqsave(&CPU_INFO(p->processor)->run_lock, flags);   
   3.159          params->mcu_adv = inf->mcu_advance;
   3.160          params->warp    = inf->warp;
   3.161          params->warpl   = inf->warpl;
   3.162          params->warpu   = inf->warpu;
   3.163 -        spin_unlock_irqrestore(&schedule_data[p->processor].schedule_lock, 
   3.164 -                                                                        flags);
   3.165 +        spin_unlock_irqrestore(&CPU_INFO(p->processor)->run_lock, flags);
   3.166      }
   3.167      
   3.168      return 0;
   3.169 @@ -272,6 +378,7 @@ int fbvt_adjdom(struct domain *p,
   3.170   */
   3.171  static task_slice_t fbvt_do_schedule(s_time_t now)
   3.172  {
   3.173 +    unsigned long flags;
   3.174      struct domain *prev = current, *next = NULL, *next_prime, *p;
   3.175      struct list_head   *tmp;
   3.176      int                 cpu = prev->processor;
   3.177 @@ -288,6 +395,10 @@ static task_slice_t fbvt_do_schedule(s_t
   3.178  
   3.179      ASSERT(prev->sched_priv != NULL);
   3.180      ASSERT(prev_inf != NULL);
   3.181 +    
   3.182 +    spin_lock_irqsave(&CPU_INFO(cpu)->run_lock, flags);
   3.183 +
   3.184 +    ASSERT(__task_on_runqueue(RUNLIST(prev)));
   3.185  
   3.186      if ( likely(!is_idle_task(prev)) ) 
   3.187      {
   3.188 @@ -365,6 +476,8 @@ static task_slice_t fbvt_do_schedule(s_t
   3.189              min_avt = p_inf->avt;
   3.190      }
   3.191  
   3.192 +    spin_unlock_irqrestore(&CPU_INFO(cpu)->run_lock, flags);
   3.193 +
   3.194      /* Extract the domain pointers from the dom infos */
   3.195      next        = next_inf->domain;
   3.196      next_prime  = next_prime_inf->domain;
   3.197 @@ -475,7 +588,7 @@ static void fbvt_dump_cpu_state(int i)
   3.198      struct fbvt_dom_info *d_inf;
   3.199      struct domain *d;
   3.200  
   3.201 -    spin_lock_irqsave(&schedule_data[i].schedule_lock, flags);
   3.202 +    spin_lock_irqsave(&CPU_INFO(i)->run_lock, flags);
   3.203      printk("svt=0x%08lX ", CPU_SVT(i));
   3.204  
   3.205      queue = RUNQUEUE(i);
   3.206 @@ -494,7 +607,7 @@ static void fbvt_dump_cpu_state(int i)
   3.207              (unsigned long)list, (unsigned long)list->next,
   3.208              (unsigned long)list->prev);
   3.209      }
   3.210 -    spin_unlock_irqrestore(&schedule_data[i].schedule_lock, flags);
   3.211 +    spin_unlock_irqrestore(&CPU_INFO(i)->run_lock, flags);        
   3.212  }
   3.213  
   3.214  
   3.215 @@ -519,13 +632,16 @@ int fbvt_init_scheduler()
   3.216      for ( i = 0; i < NR_CPUS; i++ )
   3.217      {
   3.218          schedule_data[i].sched_priv = xmalloc(sizeof(struct fbvt_cpu_info));
   3.219 -        INIT_LIST_HEAD(RUNQUEUE(i));
   3.220 +        
   3.221          if ( schedule_data[i].sched_priv == NULL )
   3.222          {
   3.223              printk("Failed to allocate FBVT scheduler per-CPU memory!\n");
   3.224              return -1;
   3.225          }
   3.226  
   3.227 +        INIT_LIST_HEAD(RUNQUEUE(i));
   3.228 +        spin_lock_init(&CPU_INFO(i)->run_lock);
   3.229 + 
   3.230          CPU_SVT(i) = 0; /* XXX do I really need to do this? */
   3.231      }
   3.232  
   3.233 @@ -541,86 +657,7 @@ int fbvt_init_scheduler()
   3.234  
   3.235      return 0;
   3.236  }
   3.237 -
   3.238 -static void fbvt_sleep(struct domain *d)
   3.239 -{
   3.240 -    if ( test_bit(DF_RUNNING, &d->flags) )
   3.241 -        cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
   3.242 -    else if ( __task_on_runqueue(RUNLIST(d)) )
   3.243 -        __del_from_runqueue(RUNLIST(d));
   3.244 -}
   3.245 -
   3.246 -static void fbvt_wake(struct domain *d)
   3.247 -{
   3.248 -    struct fbvt_dom_info *inf = FBVT_INFO(d);
   3.249 -    struct domain        *curr;
   3.250 -    s_time_t              now, min_time;
   3.251 -    int                   cpu = d->processor;
   3.252 -    s32                   io_warp;
   3.253 -
   3.254 -    /* If on the runqueue already then someone has done the wakeup work. */
   3.255 -    if ( unlikely(__task_on_runqueue(RUNLIST(d))) )
   3.256 -        return;
   3.257 -    __add_to_runqueue_head(RUNLIST(d), RUNQUEUE(cpu));
   3.258   
   3.259 -    now = NOW();
   3.260 -
   3.261 -#if 0
   3.262 -    /*
   3.263 -     * XXX KAF: This was fbvt_unpause(). Not sure if it's the right thing
   3.264 -     * to do, in light of the stuff that fbvt_wake_up() does.
   3.265 -     * e.g., setting 'inf->avt = CPU_SVT(cpu);' would make the later test
   3.266 -     * 'inf->avt < CPU_SVT(cpu)' redundant!
   3.267 -     */
   3.268 -    if ( d->domain == IDLE_DOMAIN_ID )
   3.269 -    {
   3.270 -        inf->avt = inf->evt = ~0U;
   3.271 -    } 
   3.272 -    else 
   3.273 -    {
   3.274 -        /* Set avt to system virtual time. */
   3.275 -        inf->avt = CPU_SVT(cpu);
   3.276 -        /* Set some default values here. */
   3.277 -        LAST_VTB(cpu) = 0;
   3.278 -        __calc_evt(inf);
   3.279 -    }
   3.280 -#endif
   3.281 -
   3.282 -    /* Set the BVT parameters. */
   3.283 -    if ( inf->avt < CPU_SVT(cpu) )
   3.284 -    {
   3.285 -        /*
   3.286 -         * We want IO bound processes to gain dispatch precedence. It is 
   3.287 -         * especially for device driver domains. Therefore AVT 
   3.288 -         * not be updated to SVT but to a value marginally smaller.
   3.289 -         * Since frequently sleeping domains have high time_slept
   3.290 -         * values, the virtual time can be determined as:
   3.291 -         * SVT - const * TIME_SLEPT
   3.292 -         */
   3.293 -        io_warp = (int)(0.5 * inf->time_slept);
   3.294 -        if ( io_warp > 1000 )
   3.295 -            io_warp = 1000;
   3.296 -
   3.297 -        ASSERT(inf->time_slept + CPU_SVT(cpu) > inf->avt + io_warp);
   3.298 -        inf->time_slept += CPU_SVT(cpu) - inf->avt - io_warp;
   3.299 -        inf->avt = CPU_SVT(cpu) - io_warp;
   3.300 -    }
   3.301 -
   3.302 -    /* Deal with warping here. */
   3.303 -    inf->warpback  = 1;
   3.304 -    inf->warped    = now;
   3.305 -    __calc_evt(inf);
   3.306 -
   3.307 -    curr = schedule_data[cpu].curr;
   3.308 - 
   3.309 -    /* Currently-running domain should run at least for ctx_allow. */
   3.310 -    min_time = curr->lastschd + curr->min_slice;
   3.311 -    
   3.312 -    if ( is_idle_task(curr) || (min_time <= now) )
   3.313 -        cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
   3.314 -    else if ( schedule_data[cpu].s_timer.expires > (min_time + TIME_SLOP) )
   3.315 -        mod_ac_timer(&schedule_data[cpu].s_timer, min_time);
   3.316 -} 
   3.317  
   3.318  struct scheduler sched_fbvt_def = {
   3.319      .name     = "Fair Borrowed Virtual Time",
     4.1 --- a/xen/common/sched_rrobin.c	Tue Jul 27 10:44:40 2004 +0000
     4.2 +++ b/xen/common/sched_rrobin.c	Wed Jul 28 18:02:38 2004 +0000
     4.3 @@ -23,6 +23,8 @@ struct rrobin_dom_info
     4.4      struct domain    *domain;
     4.5  };
     4.6  
     4.7 +static spinlock_t run_locks[NR_CPUS];
     4.8 +
     4.9  #define RR_INFO(d)      ((struct rrobin_dom_info *)d->sched_priv)
    4.10  #define RUNLIST(d)      (struct list_head *)&(RR_INFO(d)->run_list)
    4.11  #define RUNQUEUE(cpu)   RUNLIST(schedule_data[cpu].idle)
    4.12 @@ -49,7 +51,10 @@ static int rr_init_scheduler()
    4.13      int i;
    4.14  
    4.15      for ( i = 0; i < NR_CPUS; i++ )
    4.16 +    {
    4.17          INIT_LIST_HEAD(RUNQUEUE(i));
    4.18 +        spin_lock_init(&run_locks[i]);
    4.19 +    }
    4.20     
    4.21      dom_info_cache = xmem_cache_create("FBVT dom info", 
    4.22                                          sizeof(struct rrobin_dom_info), 
    4.23 @@ -95,11 +100,11 @@ static int rr_init_idle_task(struct doma
    4.24      if(rr_alloc_task(p) < 0) return -1;
    4.25      rr_add_task(p);
    4.26  
    4.27 -    spin_lock_irqsave(&schedule_data[p->processor].schedule_lock, flags);
    4.28 +    spin_lock_irqsave(&run_locks[p->processor], flags);
    4.29      set_bit(DF_RUNNING, &p->flags);
    4.30      if ( !__task_on_runqueue(RUNLIST(p)) )
    4.31           __add_to_runqueue_head(RUNLIST(p), RUNQUEUE(p->processor));
    4.32 -    spin_unlock_irqrestore(&schedule_data[p->processor].schedule_lock, flags);
    4.33 +    spin_unlock_irqrestore(&run_locks[p->processor], flags);
    4.34      return 0;
    4.35  }
    4.36  
    4.37 @@ -107,11 +112,14 @@ static int rr_init_idle_task(struct doma
    4.38  /* Main scheduling function */
    4.39  static task_slice_t rr_do_schedule(s_time_t now)
    4.40  {
    4.41 +    unsigned long flags;
    4.42      struct domain *prev = current;
    4.43      int cpu = current->processor;
    4.44      
    4.45      task_slice_t ret;
    4.46 -
    4.47 +    
    4.48 +    spin_lock_irqsave(&run_locks[cpu], flags);
    4.49 +    
    4.50      if(!is_idle_task(prev))
    4.51      {
    4.52          __del_from_runqueue(RUNLIST(prev));
    4.53 @@ -120,6 +128,8 @@ static task_slice_t rr_do_schedule(s_tim
    4.54              __add_to_runqueue_tail(RUNLIST(prev), RUNQUEUE(cpu));
    4.55      }
    4.56      
    4.57 +    spin_unlock_irqrestore(&run_locks[cpu], flags);
    4.58 +    
    4.59      ret.task = list_entry(  RUNQUEUE(cpu).next->next, 
    4.60                              struct rrobin_dom_info, 
    4.61                              run_list)->domain;
    4.62 @@ -149,27 +159,44 @@ static void rr_dump_settings()
    4.63  
    4.64  static void rr_sleep(struct domain *d)
    4.65  {
    4.66 +    unsigned long flags;
    4.67 +
    4.68      if ( test_bit(DF_RUNNING, &d->flags) )
    4.69          cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
    4.70 -    else if ( __task_on_runqueue(RUNLIST(d)) )
    4.71 -        __del_from_runqueue(RUNLIST(d));
    4.72 +    else
    4.73 +    {
    4.74 +        spin_lock_irqsave(&run_locks[d->processor], flags);
    4.75 +        if ( __task_on_runqueue(RUNLIST(d)) )
    4.76 +            __del_from_runqueue(RUNLIST(d));
    4.77 +        spin_unlock_irqrestore(&run_locks[d->processor], flags);
    4.78 +    }
    4.79  }
    4.80  
    4.81  void rr_wake(struct domain *d)
    4.82  {
    4.83 +    unsigned long       flags;
    4.84      struct domain       *curr;
    4.85 -    s_time_t             now, min_time;
    4.86 -    int                  cpu = d->processor;
    4.87 +    s_time_t            now, min_time;
    4.88 +    int                 cpu = d->processor;
    4.89  
    4.90 +    spin_lock_irqsave(&run_locks[cpu], flags);
    4.91 +    
    4.92      /* If on the runqueue already then someone has done the wakeup work. */
    4.93      if ( unlikely(__task_on_runqueue(RUNLIST(d))))
    4.94 +    {
    4.95 +        spin_unlock_irqrestore(&run_locks[cpu], flags);
    4.96          return;
    4.97 +    }
    4.98  
    4.99      __add_to_runqueue_head(RUNLIST(d), RUNQUEUE(cpu));
   4.100 +    spin_unlock_irqrestore(&run_locks[cpu], flags);
   4.101 +
   4.102      now = NOW();
   4.103  
   4.104 +    spin_lock_irqsave(&schedule_data[cpu].schedule_lock, flags);
   4.105      curr = schedule_data[cpu].curr;
   4.106 -
   4.107 +    spin_unlock_irqrestore(&schedule_data[cpu].schedule_lock, flags);
   4.108 + 
   4.109      /* Currently-running domain should run at least for ctx_allow. */
   4.110      min_time = curr->lastschd + curr->min_slice;
   4.111      
   4.112 @@ -194,7 +221,7 @@ static void rr_dump_cpu_state(int i)
   4.113      int loop = 0;
   4.114      struct rrobin_dom_info *d_inf;
   4.115  
   4.116 -    spin_lock_irqsave(&schedule_data[i].schedule_lock, flags);
   4.117 +    spin_lock_irqsave(&run_locks[i], flags);
   4.118  
   4.119      queue = RUNQUEUE(i);
   4.120      printk("QUEUE rq %lx   n: %lx, p: %lx\n",  (unsigned long)queue,
   4.121 @@ -210,7 +237,7 @@ static void rr_dump_cpu_state(int i)
   4.122          d_inf = list_entry(list, struct rrobin_dom_info, run_list);
   4.123          rr_dump_domain(d_inf->domain);
   4.124      }
   4.125 -    spin_unlock_irqrestore(&schedule_data[i].schedule_lock, flags);
   4.126 +    spin_unlock_irqrestore(&run_locks[i], flags);
   4.127  }
   4.128  
   4.129  
     5.1 --- a/xen/common/schedule.c	Tue Jul 27 10:44:40 2004 +0000
     5.2 +++ b/xen/common/schedule.c	Wed Jul 28 18:02:38 2004 +0000
     5.3 @@ -164,13 +164,13 @@ void init_idle_task(void)
     5.4  void domain_sleep(struct domain *d)
     5.5  {
     5.6      unsigned long flags;
     5.7 -    int           cpu = d->processor;
     5.8  
     5.9 -    spin_lock_irqsave(&schedule_data[cpu].schedule_lock, flags);
    5.10 +    /* sleep and wake protected by domain's state_lock */
    5.11 +    spin_lock_irqsave(&d->state_lock, flags);
    5.12      if ( likely(!domain_runnable(d)) )
    5.13          SCHED_OP(sleep, d);
    5.14 -    spin_unlock_irqrestore(&schedule_data[cpu].schedule_lock, flags);
    5.15 -
    5.16 +    spin_unlock_irqrestore(&d->state_lock, flags);
    5.17 + 
    5.18      /* Synchronous. */
    5.19      while ( test_bit(DF_RUNNING, &d->flags) && !domain_runnable(d) )
    5.20      {
    5.21 @@ -182,8 +182,9 @@ void domain_sleep(struct domain *d)
    5.22  void domain_wake(struct domain *d)
    5.23  {
    5.24      unsigned long       flags;
    5.25 -    int                 cpu = d->processor;
    5.26 -    spin_lock_irqsave(&schedule_data[cpu].schedule_lock, flags);
    5.27 +
    5.28 +    spin_lock_irqsave(&d->state_lock, flags);
    5.29 +    
    5.30      if ( likely(domain_runnable(d)) )
    5.31      {
    5.32          TRACE_2D(TRC_SCHED_WAKE, d->domain, d);
    5.33 @@ -192,7 +193,8 @@ void domain_wake(struct domain *d)
    5.34          d->wokenup = NOW();
    5.35  #endif
    5.36      }
    5.37 -    spin_unlock_irqrestore(&schedule_data[cpu].schedule_lock, flags);
    5.38 +
    5.39 +    spin_unlock_irqrestore(&d->state_lock, flags);
    5.40  }
    5.41  
    5.42  /* Block the currently-executing domain until a pertinent event occurs. */
    5.43 @@ -323,9 +325,9 @@ void __enter_scheduler(void)
    5.44      s32                 r_time;     /* time for new dom to run */
    5.45  
    5.46      perfc_incrc(sched_run);
    5.47 -
    5.48 +    
    5.49      spin_lock_irq(&schedule_data[cpu].schedule_lock);
    5.50 -
    5.51 + 
    5.52      now = NOW();
    5.53  
    5.54      rem_ac_timer(&schedule_data[cpu].s_timer);
    5.55 @@ -349,9 +351,9 @@ void __enter_scheduler(void)
    5.56  
    5.57      r_time = next_slice.time;
    5.58      next = next_slice.task;
    5.59 -
    5.60 +    
    5.61      schedule_data[cpu].curr = next;
    5.62 -
    5.63 +    
    5.64      next->lastschd = now;
    5.65  
    5.66      /* reprogramm the timer */
     6.1 --- a/xen/include/xen/sched.h	Tue Jul 27 10:44:40 2004 +0000
     6.2 +++ b/xen/include/xen/sched.h	Wed Jul 28 18:02:38 2004 +0000
     6.3 @@ -101,6 +101,7 @@ struct domain
     6.4  
     6.5      /* Scheduling. */
     6.6      int              shutdown_code; /* code value from OS (if DF_SHUTDOWN). */
     6.7 +    spinlock_t       state_lock;    /* wake/sleep lock                      */
     6.8      s_time_t         lastschd;      /* time this domain was last scheduled */
     6.9      s_time_t         lastdeschd;    /* time this domain was last descheduled */
    6.10      s_time_t         cpu_time;      /* total CPU time received till now */