ia64/xen-unstable

changeset 983:c6cfb98dc402

bitkeeper revision 1.636 (3fc376f1OnDIvL1xiIhqzjKDbchujQ)

desc.h, traps.c, sched.h, event.h, schedule.c, memory.c:
Fixes and cleanups.
author kaf24@scramble.cl.cam.ac.uk
date Tue Nov 25 15:36:17 2003 +0000 (2003-11-25)
parents 433e0c504cbe
children d63f2c8a7f71 9ceeacd05d1f 03601fb7a14d
files xen/common/memory.c xen/common/schedule.c xen/include/xeno/event.h xen/include/xeno/sched.h xenolinux-2.4.22-sparse/arch/xeno/kernel/traps.c xenolinux-2.4.22-sparse/include/asm-xeno/desc.h
line diff
     1.1 --- a/xen/common/memory.c	Mon Nov 24 22:52:49 2003 +0000
     1.2 +++ b/xen/common/memory.c	Tue Nov 25 15:36:17 2003 +0000
     1.3 @@ -821,8 +821,7 @@ static int do_extended_command(unsigned 
     1.4          else if ( (current->mm.ldt_ents != ents) || 
     1.5                    (current->mm.ldt_base != ptr) )
     1.6          {
     1.7 -            if ( current->mm.ldt_ents != 0 )
     1.8 -                invalidate_shadow_ldt();
     1.9 +            invalidate_shadow_ldt();
    1.10              current->mm.ldt_base = ptr;
    1.11              current->mm.ldt_ents = ents;
    1.12              load_LDT(current);
     2.1 --- a/xen/common/schedule.c	Mon Nov 24 22:52:49 2003 +0000
     2.2 +++ b/xen/common/schedule.c	Tue Nov 25 15:36:17 2003 +0000
     2.3 @@ -43,7 +43,6 @@ static s32 ctx_allow = (s32)MILLISECS(5)
     2.4  
     2.5  typedef struct schedule_data_st
     2.6  {
     2.7 -    spinlock_t          lock;           /* lock for protecting this */
     2.8      struct list_head    runqueue;       /* runqueue */
     2.9      struct task_struct *curr;           /* current task */
    2.10      struct task_struct *idle;           /* idle task for this cpu */
    2.11 @@ -55,6 +54,8 @@ typedef struct schedule_data_st
    2.12  } __cacheline_aligned schedule_data_t;
    2.13  static schedule_data_t schedule_data[NR_CPUS];
    2.14  
    2.15 +spinlock_t schedule_lock[NR_CPUS] __cacheline_aligned;
    2.16 +
    2.17  /* Skanky periodic event to all guests. This must die in the next release! */
    2.18  static struct ac_timer v_timer; 
    2.19  
    2.20 @@ -128,8 +129,7 @@ void sched_add_domain(struct task_struct
    2.21  
    2.22      if ( p->domain == IDLE_DOMAIN_ID )
    2.23      {
    2.24 -        p->avt = 0xffffffff;
    2.25 -        p->evt = 0xffffffff;
    2.26 +        p->avt = p->evt = ~0U;
    2.27          schedule_data[p->processor].idle = p;
    2.28      } 
    2.29      else 
    2.30 @@ -159,29 +159,21 @@ void init_idle_task(void)
    2.31  {
    2.32      unsigned long flags;
    2.33      struct task_struct *p = current;
    2.34 -    spin_lock_irqsave(&schedule_data[p->processor].lock, flags);
    2.35 +    spin_lock_irqsave(&schedule_lock[p->processor], flags);
    2.36      p->has_cpu = 1;
    2.37      p->state = TASK_RUNNING;
    2.38      if ( !__task_on_runqueue(p) )
    2.39          __add_to_runqueue_head(p);
    2.40 -    spin_unlock_irqrestore(&schedule_data[p->processor].lock, flags);
    2.41 +    spin_unlock_irqrestore(&schedule_lock[p->processor], flags);
    2.42  }
    2.43  
    2.44  
    2.45 -/*
    2.46 - * wake up a domain which had been sleeping
    2.47 - */
    2.48 -int wake_up(struct task_struct *p)
    2.49 +void __wake_up(struct task_struct *p)
    2.50  {
    2.51 -    unsigned long flags;
    2.52 -    int ret = 0;
    2.53 -
    2.54 -    spin_lock_irqsave(&schedule_data[p->processor].lock, flags);
    2.55 +    ASSERT(p->state != TASK_DYING);
    2.56  
    2.57 -    /* XXX RN: should we warp here? Might be a good idea to also boost a 
    2.58 -     * domain which currently is unwarped and on run queue and 
    2.59 -     * the receives an event. */
    2.60 -    if ( __task_on_runqueue(p) ) goto out;
    2.61 +    if ( unlikely(__task_on_runqueue(p)) )
    2.62 +        return;
    2.63  
    2.64      p->state = TASK_RUNNING;
    2.65      __add_to_runqueue_head(p);
    2.66 @@ -198,16 +190,17 @@ int wake_up(struct task_struct *p)
    2.67  #ifdef SCHED_HISTO
    2.68      p->wokenup = NOW();
    2.69  #endif
    2.70 -
    2.71 -    ret = 1;
    2.72 - out:
    2.73 -    spin_unlock_irqrestore(&schedule_data[p->processor].lock, flags);
    2.74 -    return ret;
    2.75  }
    2.76  
    2.77 -/*
    2.78 - * Voluntarily yield the processor to another domain, until an event occurs.
    2.79 - */
    2.80 +void wake_up(struct task_struct *p)
    2.81 +{
    2.82 +    unsigned long flags;
    2.83 +    spin_lock_irqsave(&schedule_lock[p->processor], flags);
    2.84 +    __wake_up(p);
    2.85 +    spin_unlock_irqrestore(&schedule_lock[p->processor], flags);
    2.86 +}
    2.87 +
    2.88 +/* Voluntarily yield the processor to another domain, until an event occurs. */
    2.89  long do_yield(void)
    2.90  {
    2.91      current->state = TASK_INTERRUPTIBLE;
    2.92 @@ -216,9 +209,7 @@ long do_yield(void)
    2.93      return 0;
    2.94  }
    2.95  
    2.96 -/*
    2.97 - *  Demultiplex scheduler-related hypercalls.
    2.98 - */
    2.99 +/* Demultiplex scheduler-related hypercalls. */
   2.100  long do_sched_op(unsigned long op)
   2.101  {
   2.102      long ret = 0;
   2.103 @@ -251,18 +242,14 @@ long do_sched_op(unsigned long op)
   2.104      return ret;
   2.105  }
   2.106  
   2.107 -/*
   2.108 - * Control the scheduler
   2.109 - */
   2.110 +/* Control the scheduler. */
   2.111  long sched_bvtctl(unsigned long c_allow)
   2.112  {
   2.113      ctx_allow = c_allow;
   2.114      return 0;
   2.115  }
   2.116  
   2.117 -/*
   2.118 - * Adjust scheduling parameter for a given domain
   2.119 - */
   2.120 +/* Adjust scheduling parameter for a given domain. */
   2.121  long sched_adjdom(int dom, unsigned long mcu_adv, unsigned long warp, 
   2.122                   unsigned long warpl, unsigned long warpu)
   2.123  {
   2.124 @@ -276,9 +263,9 @@ long sched_adjdom(int dom, unsigned long
   2.125      if ( p == NULL ) 
   2.126          return -ESRCH;
   2.127  
   2.128 -    spin_lock_irq(&schedule_data[p->processor].lock);   
   2.129 +    spin_lock_irq(&schedule_lock[p->processor]);   
   2.130      p->mcu_advance = mcu_adv;
   2.131 -    spin_unlock_irq(&schedule_data[p->processor].lock); 
   2.132 +    spin_unlock_irq(&schedule_lock[p->processor]); 
   2.133  
   2.134      put_task_struct(p);
   2.135  
   2.136 @@ -293,18 +280,15 @@ long sched_adjdom(int dom, unsigned long
   2.137   * Otherwise we do a run through the scheduler after the current tasks 
   2.138   * context switch allowance is over.
   2.139   */
   2.140 -void reschedule(struct task_struct *p)
   2.141 +unsigned long __reschedule(struct task_struct *p)
   2.142  {
   2.143      int cpu = p->processor;
   2.144      struct task_struct *curr;
   2.145 -    unsigned long flags;
   2.146      s_time_t now, min_time;
   2.147  
   2.148 -    if ( p->has_cpu )
   2.149 -        return;
   2.150 +    if ( unlikely(p->has_cpu || !__task_on_runqueue(p)) )
   2.151 +        return 0;
   2.152  
   2.153 -    spin_lock_irqsave(&schedule_data[cpu].lock, flags);
   2.154 -    
   2.155      now = NOW();
   2.156      curr = schedule_data[cpu].curr;
   2.157      /* domain should run at least for ctx_allow */
   2.158 @@ -312,23 +296,26 @@ void reschedule(struct task_struct *p)
   2.159  
   2.160      if ( is_idle_task(curr) || (min_time <= now) )
   2.161      {
   2.162 -        /* reschedule */
   2.163          set_bit(_HYP_EVENT_NEED_RESCHED, &curr->hyp_events);
   2.164 -
   2.165 -        spin_unlock_irqrestore(&schedule_data[cpu].lock, flags);
   2.166 -
   2.167 -        if ( cpu != smp_processor_id() )
   2.168 -            smp_send_event_check_cpu(cpu);
   2.169 -
   2.170 -        return;
   2.171 +        return (1 << p->processor);
   2.172      }
   2.173  
   2.174      /* current hasn't been running for long enough -> reprogram timer.
   2.175       * but don't bother if timer would go off soon anyway */
   2.176      if ( schedule_data[cpu].s_timer.expires > min_time + TIME_SLOP )
   2.177          mod_ac_timer(&schedule_data[cpu].s_timer, min_time);
   2.178 -    
   2.179 -    spin_unlock_irqrestore(&schedule_data[cpu].lock, flags);
   2.180 +
   2.181 +    return 0;
   2.182 +}
   2.183 +
   2.184 +
   2.185 +void reschedule(struct task_struct *p)
   2.186 +{
   2.187 +    unsigned long flags, cpu_mask;
   2.188 +    spin_lock_irqsave(&schedule_lock[p->processor], flags);
   2.189 +    cpu_mask = __reschedule(p);
   2.190 +    spin_unlock_irqrestore(&schedule_lock[p->processor], flags);
   2.191 +    hyp_event_notify(cpu_mask);
   2.192  }
   2.193  
   2.194  
   2.195 @@ -341,9 +328,9 @@ void reschedule(struct task_struct *p)
   2.196   */
   2.197  asmlinkage void __enter_scheduler(void)
   2.198  {
   2.199 -    struct task_struct *prev, *next, *next_prime, *p;
   2.200 +    struct task_struct *prev = current, *next = NULL, *next_prime, *p;
   2.201      struct list_head   *tmp;
   2.202 -    int                 this_cpu;
   2.203 +    int                 this_cpu = prev->processor;
   2.204      s_time_t            now;
   2.205      s32                 r_time;     /* time for new dom to run */
   2.206      s32                 ranfor;     /* assume we never run longer than 2.1s! */
   2.207 @@ -352,69 +339,41 @@ asmlinkage void __enter_scheduler(void)
   2.208  
   2.209      perfc_incrc(sched_run);
   2.210  
   2.211 -    prev = current;
   2.212 -    next = NULL;
   2.213 -
   2.214 -    this_cpu = prev->processor;
   2.215 -
   2.216 -    spin_lock_irq(&schedule_data[this_cpu].lock);
   2.217 +    spin_lock_irq(&schedule_lock[this_cpu]);
   2.218  
   2.219      now = NOW();
   2.220  
   2.221 -    /* remove timer, if still on list  */
   2.222      rem_ac_timer(&schedule_data[this_cpu].s_timer);
   2.223  
   2.224 -    /* deschedule the current domain */
   2.225 -
   2.226      ASSERT(!in_interrupt());
   2.227      ASSERT(__task_on_runqueue(prev));
   2.228 -
   2.229 -    if ( is_idle_task(prev) ) 
   2.230 -        goto deschedule_done;
   2.231 -
   2.232 -    /* do some accounting */
   2.233 -    ranfor = (s32)(now - prev->lastschd);
   2.234 -    prev->cpu_time += ranfor;
   2.235 -    
   2.236 -    /* calculate mcu and update avt */
   2.237 -    mcus = ranfor/MCU;
   2.238 -    if (ranfor % MCU) mcus ++;  /* always round up */
   2.239 -    prev->avt += mcus * prev->mcu_advance;
   2.240 -
   2.241 -    /* recalculate evt */
   2.242 -    __calc_evt(prev);
   2.243 +    ASSERT(prev->state != TASK_UNINTERRUPTIBLE);
   2.244  
   2.245 -    /* dequeue */
   2.246 -    __del_from_runqueue(prev);
   2.247 +    if ( likely(!is_idle_task(prev)) ) 
   2.248 +    {
   2.249 +        ranfor = (s32)(now - prev->lastschd);
   2.250 +        prev->cpu_time += ranfor;
   2.251      
   2.252 -    switch ( prev->state )
   2.253 -    {
   2.254 -    case TASK_INTERRUPTIBLE:
   2.255 -        if ( signal_pending(prev) )
   2.256 +        /* Calculate mcu and update avt. */
   2.257 +        mcus = (ranfor + MCU - 1) / MCU;
   2.258 +        prev->avt += mcus * prev->mcu_advance;
   2.259 +        
   2.260 +        __calc_evt(prev);
   2.261 +        
   2.262 +        __del_from_runqueue(prev);
   2.263 +        
   2.264 +        if ( likely(prev->state == TASK_RUNNING) ||
   2.265 +             unlikely((prev->state == TASK_INTERRUPTIBLE) && 
   2.266 +                      signal_pending(prev)) )
   2.267          {
   2.268 -            prev->state = TASK_RUNNING; /* but has events pending */
   2.269 -            break;
   2.270 +            prev->state = TASK_RUNNING;
   2.271 +            __add_to_runqueue_tail(prev);
   2.272          }
   2.273 -    case TASK_UNINTERRUPTIBLE:
   2.274 -    case TASK_DYING:
   2.275 -    case TASK_STOPPED:
   2.276 -    default:
   2.277 -        /* Done if not running. Else continue. */
   2.278 -        goto deschedule_done;
   2.279 -    case TASK_RUNNING:;
   2.280      }
   2.281  
   2.282 -    /* requeue */
   2.283 -    __add_to_runqueue_tail(prev);
   2.284 -    
   2.285 - deschedule_done:
   2.286      clear_bit(_HYP_EVENT_NEED_RESCHED, &prev->hyp_events);
   2.287  
   2.288 -    /*
   2.289 -     * Pick a new domain
   2.290 -     */
   2.291 -
   2.292 -    /* we should at least have the idle task */
   2.293 +    /* We should at least have the idle task */
   2.294      ASSERT(!list_empty(&schedule_data[this_cpu].runqueue));
   2.295  
   2.296      /*
   2.297 @@ -425,64 +384,76 @@ asmlinkage void __enter_scheduler(void)
   2.298      next       = schedule_data[this_cpu].idle;
   2.299      next_prime = NULL;
   2.300  
   2.301 -    next_evt       = 0xffffffff;
   2.302 -    next_prime_evt = 0xffffffff;
   2.303 -    min_avt        = 0xffffffff;    /* to calculate svt */
   2.304 +    next_evt       = ~0U;
   2.305 +    next_prime_evt = ~0U;
   2.306 +    min_avt        = ~0U;
   2.307  
   2.308 -    list_for_each(tmp, &schedule_data[this_cpu].runqueue) {
   2.309 +    list_for_each ( tmp, &schedule_data[this_cpu].runqueue )
   2.310 +    {
   2.311          p = list_entry(tmp, struct task_struct, run_list);
   2.312 -        if (p->evt < next_evt) {
   2.313 +        if ( p->evt < next_evt )
   2.314 +        {
   2.315              next_prime     = next;
   2.316              next_prime_evt = next_evt;
   2.317              next = p;
   2.318              next_evt = p->evt;
   2.319 -        } else if (next_prime_evt == 0xffffffff) {
   2.320 +        } 
   2.321 +        else if ( next_prime_evt == ~0U )
   2.322 +        {
   2.323              next_prime_evt = p->evt;
   2.324              next_prime     = p;
   2.325 -        } else if (p->evt < next_prime_evt) {
   2.326 +        } 
   2.327 +        else if ( p->evt < next_prime_evt )
   2.328 +        {
   2.329              next_prime_evt = p->evt;
   2.330              next_prime     = p;
   2.331          }
   2.332 -        /* determine system virtual time */
   2.333 -        if (p->avt < min_avt)
   2.334 +
   2.335 +        /* Determine system virtual time. */
   2.336 +        if ( p->avt < min_avt )
   2.337              min_avt = p->avt;
   2.338      }
   2.339 -    ASSERT(next != NULL);   /* we should have at least the idle task */
   2.340  
   2.341 -    /* update system virtual time  */
   2.342 -    if (min_avt != 0xffffffff) schedule_data[this_cpu].svt = min_avt;
   2.343 +    /* Update system virtual time. */
   2.344 +    if ( min_avt != ~0U )
   2.345 +        schedule_data[this_cpu].svt = min_avt;
   2.346  
   2.347      /* check for virtual time overrun on this cpu */
   2.348 -    if (schedule_data[this_cpu].svt >= 0xf0000000) {
   2.349 +    if ( schedule_data[this_cpu].svt >= 0xf0000000 )
   2.350 +    {
   2.351          u_long t_flags; 
   2.352          write_lock_irqsave(&tasklist_lock, t_flags); 
   2.353          p = &idle0_task;
   2.354          do {
   2.355 -            if (p->processor == this_cpu && !is_idle_task(p)) {
   2.356 +            if ( (p->processor == this_cpu) && !is_idle_task(p) )
   2.357 +            {
   2.358                  p->evt -= 0xe0000000;
   2.359                  p->avt -= 0xe0000000;
   2.360              }
   2.361 -        } while ( (p = p->next_task) != &idle0_task );
   2.362 +        } 
   2.363 +        while ( (p = p->next_task) != &idle0_task );
   2.364          write_unlock_irqrestore(&tasklist_lock, t_flags); 
   2.365          schedule_data[this_cpu].svt -= 0xe0000000;
   2.366      }
   2.367  
   2.368      /* work out time for next run through scheduler */
   2.369 -    if (is_idle_task(next)) {
   2.370 +    if ( is_idle_task(next) ) 
   2.371 +    {
   2.372          r_time = ctx_allow;
   2.373          goto sched_done;
   2.374      }
   2.375  
   2.376 -    if (next_prime == NULL || is_idle_task(next_prime)) {
   2.377 -        /* we have only one runable task besides the idle task */
   2.378 +    if ( (next_prime == NULL) || is_idle_task(next_prime) )
   2.379 +    {
   2.380 +        /* We have only one runnable task besides the idle task. */
   2.381          r_time = 10 * ctx_allow;     /* RN: random constant */
   2.382          goto sched_done;
   2.383      }
   2.384  
   2.385      /*
   2.386 -     * if we are here we have two runable tasks.
   2.387 -     * work out how long 'next' can run till its evt is greater than
   2.388 -     * 'next_prime's evt. Taking context switch allowance into account.
   2.389 +     * If we are here then we have two runnable tasks.
   2.390 +     * Work out how long 'next' can run till its evt is greater than
   2.391 +     * 'next_prime's evt. Take context switch allowance into account.
   2.392       */
   2.393      ASSERT(next_prime->evt >= next->evt);
   2.394      r_time = ((next_prime->evt - next->evt)/next->mcu_advance) + ctx_allow;
   2.395 @@ -491,7 +462,8 @@ asmlinkage void __enter_scheduler(void)
   2.396      ASSERT(r_time >= ctx_allow);
   2.397  
   2.398  #ifndef NDEBUG
   2.399 -    if (r_time < ctx_allow) {
   2.400 +    if ( r_time < ctx_allow )
   2.401 +    {
   2.402          printk("[%02d]: %lx\n", this_cpu, (unsigned long)r_time);
   2.403          dump_rqueue(&schedule_data[this_cpu].runqueue, "foo");
   2.404      }
   2.405 @@ -508,7 +480,7 @@ asmlinkage void __enter_scheduler(void)
   2.406      schedule_data[this_cpu].s_timer.expires  = now + r_time;
   2.407      add_ac_timer(&schedule_data[this_cpu].s_timer);
   2.408  
   2.409 -    spin_unlock_irq(&schedule_data[this_cpu].lock);
   2.410 +    spin_unlock_irq(&schedule_lock[this_cpu]);
   2.411  
   2.412      /* done, switch tasks */
   2.413      if ( unlikely(prev == next) )
   2.414 @@ -610,7 +582,7 @@ void __init scheduler_init(void)
   2.415      for ( i = 0; i < NR_CPUS; i++ )
   2.416      {
   2.417          INIT_LIST_HEAD(&schedule_data[i].runqueue);
   2.418 -        spin_lock_init(&schedule_data[i].lock);
   2.419 +        spin_lock_init(&schedule_lock[i]);
   2.420          schedule_data[i].curr = &idle0_task;
   2.421          
   2.422          init_ac_timer(&schedule_data[i].s_timer);
   2.423 @@ -688,10 +660,10 @@ void dump_runq(u_char key, void *dev_id,
   2.424      printk("BVT: mcu=0x%08Xns ctx_allow=0x%08Xns NOW=0x%08X%08X\n",
   2.425             (u32)MCU, (u32)ctx_allow, (u32)(now>>32), (u32)now); 
   2.426      for (i = 0; i < smp_num_cpus; i++) {
   2.427 -        spin_lock_irqsave(&schedule_data[i].lock, flags);
   2.428 +        spin_lock_irqsave(&schedule_lock[i], flags);
   2.429          printk("CPU[%02d] svt=0x%08X ", i, (s32)schedule_data[i].svt);
   2.430          dump_rqueue(&schedule_data[i].runqueue, "rq"); 
   2.431 -        spin_unlock_irqrestore(&schedule_data[i].lock, flags);
   2.432 +        spin_unlock_irqrestore(&schedule_lock[i], flags);
   2.433      }
   2.434      return; 
   2.435  }
     3.1 --- a/xen/include/xeno/event.h	Mon Nov 24 22:52:49 2003 +0000
     3.2 +++ b/xen/include/xeno/event.h	Tue Nov 25 15:36:17 2003 +0000
     3.3 @@ -28,31 +28,39 @@
     3.4   */
     3.5  static inline unsigned long mark_guest_event(struct task_struct *p, int event)
     3.6  {
     3.7 +    unsigned long flags, cpu_mask;
     3.8 +
     3.9      if ( test_and_set_bit(event, &p->shared_info->events) )
    3.10          return 0;
    3.11  
    3.12 -    /*
    3.13 -     * No need for the runqueue_lock! The check below does not race
    3.14 -     * with the setting of has_cpu, because that is set with runqueue_lock
    3.15 -     * held. The lock must be released before hypervisor exit (and so
    3.16 -     * a write barrier executed). And, just before hypervisor exit, 
    3.17 -     * outstanding events are checked. So bit is certainly set early enough.
    3.18 -     */
    3.19 -    smp_mb();
    3.20 -    if ( p->state == TASK_INTERRUPTIBLE ) wake_up(p);
    3.21 -    reschedule(p);
    3.22 -    return p->has_cpu ? (1 << p->processor) : 0;
    3.23 +    spin_lock_irqsave(&schedule_lock[p->processor], flags);
    3.24 +    if ( p->state == TASK_INTERRUPTIBLE )
    3.25 +        __wake_up(p);
    3.26 +    cpu_mask = __reschedule(p);
    3.27 +    if ( p->has_cpu )
    3.28 +        cpu_mask |= 1 << p->processor;
    3.29 +    spin_unlock_irqrestore(&schedule_lock[p->processor], flags);
    3.30 +
    3.31 +    return cpu_mask;
    3.32  }
    3.33  
    3.34  /* As above, but hyp_events are handled within the hypervisor. */
    3.35  static inline unsigned long mark_hyp_event(struct task_struct *p, int event)
    3.36  {
    3.37 +    unsigned long flags, cpu_mask;
    3.38 +
    3.39      if ( test_and_set_bit(event, &p->hyp_events) )
    3.40          return 0;
    3.41 -    smp_mb();
    3.42 -    if ( p->state == TASK_INTERRUPTIBLE ) wake_up(p);
    3.43 -    reschedule(p);
    3.44 -    return p->has_cpu ? (1 << p->processor) : 0;
    3.45 +
    3.46 +    spin_lock_irqsave(&schedule_lock[p->processor], flags);
    3.47 +    if ( p->state == TASK_INTERRUPTIBLE )
    3.48 +        __wake_up(p);
    3.49 +    cpu_mask = __reschedule(p);
    3.50 +    if ( p->has_cpu )
    3.51 +        cpu_mask |= 1 << p->processor;
    3.52 +    spin_unlock_irqrestore(&schedule_lock[p->processor], flags);
    3.53 +
    3.54 +    return cpu_mask;
    3.55  }
    3.56  
    3.57  /* Notify the given set of CPUs that guest events may be outstanding. */
     4.1 --- a/xen/include/xeno/sched.h	Mon Nov 24 22:52:49 2003 +0000
     4.2 +++ b/xen/include/xeno/sched.h	Tue Nov 25 15:36:17 2003 +0000
     4.3 @@ -233,7 +233,7 @@ extern void free_irq(unsigned int, void 
     4.4  extern unsigned long wait_init_idle;
     4.5  #define init_idle() clear_bit(smp_processor_id(), &wait_init_idle);
     4.6  
     4.7 -
     4.8 +extern spinlock_t schedule_lock[NR_CPUS] __cacheline_aligned;
     4.9  
    4.10  /*
    4.11   * Scheduler functions (in schedule.c)
    4.12 @@ -247,8 +247,10 @@ long sched_bvtctl(unsigned long ctx_allo
    4.13  long sched_adjdom(int dom, unsigned long mcu_adv, unsigned long warp, 
    4.14                    unsigned long warpl, unsigned long warpu);
    4.15  void init_idle_task(void);
    4.16 -int  wake_up(struct task_struct *p);
    4.17 +void __wake_up(struct task_struct *p);
    4.18 +void wake_up(struct task_struct *p);
    4.19  long do_yield(void);
    4.20 +unsigned long __reschedule(struct task_struct *p);
    4.21  void reschedule(struct task_struct *p);
    4.22  
    4.23  /* NB. Limited entry in Xen. Not for arbitrary use! */
     5.1 --- a/xenolinux-2.4.22-sparse/arch/xeno/kernel/traps.c	Mon Nov 24 22:52:49 2003 +0000
     5.2 +++ b/xenolinux-2.4.22-sparse/arch/xeno/kernel/traps.c	Tue Nov 25 15:36:17 2003 +0000
     5.3 @@ -305,6 +305,24 @@ DO_ERROR(18, SIGBUS, "machine check", ma
     5.4  
     5.5  asmlinkage void do_general_protection(struct pt_regs * regs, long error_code)
     5.6  {
     5.7 +	/*
     5.8 +	 * If we trapped on an LDT access then ensure that the default_ldt is
     5.9 +	 * loaded, if nothing else. We load default_ldt lazily because LDT
    5.10 +	 * switching costs time and many applications don't need it.
    5.11 +	 */
    5.12 +	if ( unlikely((error_code & 6) == 4) )
    5.13 +	{
    5.14 +		unsigned long ldt;
    5.15 +		flush_page_update_queue(); /* ensure LDTR is up to date */
    5.16 +		__asm__ __volatile__ ( "sldt %0" : "=r" (ldt) );
    5.17 +		if ( likely(ldt == 0) )
    5.18 +		{
    5.19 +			queue_set_ldt((unsigned long)&default_ldt[0], 5);
    5.20 +			flush_page_update_queue();
    5.21 +			return;
    5.22 +		}
    5.23 +	}
    5.24 +
    5.25  	if (!(regs->xcs & 2))
    5.26  		goto gp_in_kernel;
    5.27  
     6.1 --- a/xenolinux-2.4.22-sparse/include/asm-xeno/desc.h	Mon Nov 24 22:52:49 2003 +0000
     6.2 +++ b/xenolinux-2.4.22-sparse/include/asm-xeno/desc.h	Tue Nov 25 15:36:17 2003 +0000
     6.3 @@ -18,18 +18,20 @@ extern struct desc_struct default_ldt[];
     6.4  
     6.5  static inline void clear_LDT(void)
     6.6  {
     6.7 -    queue_set_ldt((unsigned long)&default_ldt[0], 5);
     6.8 +    /*
     6.9 +     * NB. We load the default_ldt for lcall7/27 handling on demand, as
    6.10 +     * it slows down context switching. Noone uses it anyway.
    6.11 +     */
    6.12 +    queue_set_ldt(0, 0);
    6.13  }
    6.14  
    6.15  static inline void load_LDT(struct mm_struct *mm)
    6.16  {
    6.17      void *segments = mm->context.segments;
    6.18 -    int count = LDT_ENTRIES;
    6.19 +    int count = 0;
    6.20  
    6.21 -    if (!segments) {
    6.22 -        segments = &default_ldt[0];
    6.23 -        count = 5;
    6.24 -    }
    6.25 +    if ( unlikely(segments != NULL) )
    6.26 +        count = LDT_ENTRIES;
    6.27           
    6.28      queue_set_ldt((unsigned long)segments, count);
    6.29  }