ia64/xen-unstable

changeset 6219:3b0ce44f7b7a

merge?
author cl349@firebug.cl.cam.ac.uk
date Wed Aug 17 08:27:16 2005 +0000 (2005-08-17)
parents 7c1d66d4e51e 7a92f129e117
children a4b22c7fbfa1
files linux-2.6-xen-sparse/arch/xen/kernel/reboot.c tools/examples/network-bridge tools/python/xen/xm/create.py xen/arch/ia64/xenmisc.c xen/arch/x86/domain.c xen/arch/x86/vmx.c xen/common/schedule.c xen/include/asm-x86/vmx_vmcs.h xen/include/xen/sched.h
line diff
     1.1 --- a/tools/examples/network-bridge	Wed Aug 17 08:26:58 2005 +0000
     1.2 +++ b/tools/examples/network-bridge	Wed Aug 17 08:27:16 2005 +0000
     1.3 @@ -187,6 +187,7 @@ op_start () {
     1.4  			ifconfig ${netdev} 0.0.0.0 down
     1.5  		fi
     1.6  	fi
     1.7 +	del_addrs ${netdev}
     1.8  	ip link set ${netdev} name p${netdev}
     1.9  	ip link set veth0 name ${netdev}
    1.10  	ifconfig p${netdev} -arp down
     2.1 --- a/tools/python/xen/xm/create.py	Wed Aug 17 08:26:58 2005 +0000
     2.2 +++ b/tools/python/xen/xm/create.py	Wed Aug 17 08:27:16 2005 +0000
     2.3 @@ -380,7 +380,6 @@ def randomMAC():
     2.4  
     2.5      @return: MAC address string
     2.6      """
     2.7 -    random.seed()
     2.8      mac = [ 0xaa, 0x00, 0x00,
     2.9              random.randint(0x00, 0x7f),
    2.10              random.randint(0x00, 0xff),
    2.11 @@ -689,6 +688,7 @@ def balloon_out(dom0_min_mem, opts):
    2.12      del xc
    2.13  
    2.14  def main(argv):
    2.15 +    random.seed()
    2.16      opts = gopts
    2.17      args = opts.parse(argv)
    2.18      if opts.vals.help:
     3.1 --- a/xen/arch/ia64/xenmisc.c	Wed Aug 17 08:26:58 2005 +0000
     3.2 +++ b/xen/arch/ia64/xenmisc.c	Wed Aug 17 08:27:16 2005 +0000
     3.3 @@ -280,7 +280,6 @@ void cs01foo(void) {}
     3.4  
     3.5  unsigned long context_switch_count = 0;
     3.6  
     3.7 -// context_switch
     3.8  void context_switch(struct vcpu *prev, struct vcpu *next)
     3.9  {
    3.10  //printk("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n");
    3.11 @@ -290,22 +289,14 @@ void context_switch(struct vcpu *prev, s
    3.12  //if (prev->domain->domain_id == 0 && next->domain->domain_id == 1) cs01foo();
    3.13  //printk("@@sw %d->%d\n",prev->domain->domain_id,next->domain->domain_id);
    3.14  #ifdef CONFIG_VTI
    3.15 -	unsigned long psr;
    3.16 -	/* Interrupt is enabled after next task is chosen.
    3.17 -	 * So we have to disable it for stack switch.
    3.18 -	 */
    3.19 -	local_irq_save(psr);
    3.20  	vtm_domain_out(prev);
    3.21 -	/* Housekeeping for prev domain */
    3.22 -#endif // CONFIG_VTI
    3.23 -
    3.24 +#endif
    3.25  	context_switch_count++;
    3.26  	switch_to(prev,next,prev);
    3.27  #ifdef CONFIG_VTI
    3.28 -	/* Post-setup for new domain */
    3.29  	 vtm_domain_in(current);
    3.30 -	local_irq_restore(psr);
    3.31 -#endif // CONFIG_VTI
    3.32 +#endif
    3.33 +
    3.34  // leave this debug for now: it acts as a heartbeat when more than
    3.35  // one domain is active
    3.36  {
    3.37 @@ -315,25 +306,27 @@ int id = ((struct vcpu *)current)->domai
    3.38  if (!cnt[id]--) { printk("%x",id); cnt[id] = 500000; }
    3.39  if (!i--) { printk("+",id); i = 1000000; }
    3.40  }
    3.41 -	clear_bit(_VCPUF_running, &prev->vcpu_flags);
    3.42 -	//if (!is_idle_task(next->domain) )
    3.43 -		//send_guest_virq(next, VIRQ_TIMER);
    3.44 +
    3.45  #ifdef CONFIG_VTI
    3.46  	if (VMX_DOMAIN(current))
    3.47  		vmx_load_all_rr(current);
    3.48 -	return;
    3.49 -#else // CONFIG_VTI
    3.50 +#else
    3.51  	if (!is_idle_task(current->domain)) {
    3.52  		load_region_regs(current);
    3.53  		if (vcpu_timer_expired(current)) vcpu_pend_timer(current);
    3.54  	}
    3.55  	if (vcpu_timer_expired(current)) vcpu_pend_timer(current);
    3.56 -#endif // CONFIG_VTI
    3.57 +#endif
    3.58 +}
    3.59 +
    3.60 +void context_switch_finalise(struct vcpu *next)
    3.61 +{
    3.62 +	/* nothing to do */
    3.63  }
    3.64  
    3.65  void continue_running(struct vcpu *same)
    3.66  {
    3.67 -    /* nothing to do */
    3.68 +	/* nothing to do */
    3.69  }
    3.70  
    3.71  void panic_domain(struct pt_regs *regs, const char *fmt, ...)
     4.1 --- a/xen/arch/x86/domain.c	Wed Aug 17 08:26:58 2005 +0000
     4.2 +++ b/xen/arch/x86/domain.c	Wed Aug 17 08:27:16 2005 +0000
     4.3 @@ -48,6 +48,8 @@ boolean_param("noreboot", opt_noreboot);
     4.4  
     4.5  struct percpu_ctxt {
     4.6      struct vcpu *curr_vcpu;
     4.7 +    unsigned int context_not_finalised;
     4.8 +    unsigned int dirty_segment_mask;
     4.9  } __cacheline_aligned;
    4.10  static struct percpu_ctxt percpu_ctxt[NR_CPUS];
    4.11  
    4.12 @@ -541,51 +543,59 @@ void toggle_guest_mode(struct vcpu *v)
    4.13      __r; })
    4.14  
    4.15  #if CONFIG_VMX
    4.16 -#define load_msrs(_p, _n)     if (vmx_switch_on) vmx_load_msrs((_p), (_n))
    4.17 +#define load_msrs(n)     if (vmx_switch_on) vmx_load_msrs(n)
    4.18  #else
    4.19 -#define load_msrs(_p, _n)     ((void)0)
    4.20 +#define load_msrs(n)     ((void)0)
    4.21  #endif 
    4.22  
    4.23 -static void load_segments(struct vcpu *p, struct vcpu *n)
    4.24 +/*
    4.25 + * save_segments() writes a mask of segments which are dirty (non-zero),
    4.26 + * allowing load_segments() to avoid some expensive segment loads and
    4.27 + * MSR writes.
    4.28 + */
    4.29 +#define DIRTY_DS           0x01
    4.30 +#define DIRTY_ES           0x02
    4.31 +#define DIRTY_FS           0x04
    4.32 +#define DIRTY_GS           0x08
    4.33 +#define DIRTY_FS_BASE      0x10
    4.34 +#define DIRTY_GS_BASE_USER 0x20
    4.35 +
    4.36 +static void load_segments(struct vcpu *n)
    4.37  {
    4.38 -    struct vcpu_guest_context *pctxt = &p->arch.guest_context;
    4.39      struct vcpu_guest_context *nctxt = &n->arch.guest_context;
    4.40      int all_segs_okay = 1;
    4.41 +    unsigned int dirty_segment_mask, cpu = smp_processor_id();
    4.42 +
    4.43 +    /* Load and clear the dirty segment mask. */
    4.44 +    dirty_segment_mask = percpu_ctxt[cpu].dirty_segment_mask;
    4.45 +    percpu_ctxt[cpu].dirty_segment_mask = 0;
    4.46  
    4.47      /* Either selector != 0 ==> reload. */
    4.48 -    if ( unlikely(pctxt->user_regs.ds | nctxt->user_regs.ds) )
    4.49 +    if ( unlikely((dirty_segment_mask & DIRTY_DS) | nctxt->user_regs.ds) )
    4.50          all_segs_okay &= loadsegment(ds, nctxt->user_regs.ds);
    4.51  
    4.52      /* Either selector != 0 ==> reload. */
    4.53 -    if ( unlikely(pctxt->user_regs.es | nctxt->user_regs.es) )
    4.54 +    if ( unlikely((dirty_segment_mask & DIRTY_ES) | nctxt->user_regs.es) )
    4.55          all_segs_okay &= loadsegment(es, nctxt->user_regs.es);
    4.56  
    4.57      /*
    4.58       * Either selector != 0 ==> reload.
    4.59       * Also reload to reset FS_BASE if it was non-zero.
    4.60       */
    4.61 -    if ( unlikely(pctxt->user_regs.fs |
    4.62 -                  pctxt->fs_base |
    4.63 +    if ( unlikely((dirty_segment_mask & (DIRTY_FS | DIRTY_FS_BASE)) |
    4.64                    nctxt->user_regs.fs) )
    4.65 -    {
    4.66          all_segs_okay &= loadsegment(fs, nctxt->user_regs.fs);
    4.67 -        if ( pctxt->user_regs.fs ) /* != 0 selector kills fs_base */
    4.68 -            pctxt->fs_base = 0;
    4.69 -    }
    4.70  
    4.71      /*
    4.72       * Either selector != 0 ==> reload.
    4.73       * Also reload to reset GS_BASE if it was non-zero.
    4.74       */
    4.75 -    if ( unlikely(pctxt->user_regs.gs |
    4.76 -                  pctxt->gs_base_user |
    4.77 +    if ( unlikely((dirty_segment_mask & (DIRTY_GS | DIRTY_GS_BASE_USER)) |
    4.78                    nctxt->user_regs.gs) )
    4.79      {
    4.80          /* Reset GS_BASE with user %gs? */
    4.81 -        if ( pctxt->user_regs.gs || !nctxt->gs_base_user )
    4.82 +        if ( (dirty_segment_mask & DIRTY_GS) || !nctxt->gs_base_user )
    4.83              all_segs_okay &= loadsegment(gs, nctxt->user_regs.gs);
    4.84 -        if ( pctxt->user_regs.gs ) /* != 0 selector kills gs_base_user */
    4.85 -            pctxt->gs_base_user = 0;
    4.86      }
    4.87  
    4.88      /* This can only be non-zero if selector is NULL. */
    4.89 @@ -650,7 +660,9 @@ static void load_segments(struct vcpu *p
    4.90  
    4.91  static void save_segments(struct vcpu *v)
    4.92  {
    4.93 -    struct cpu_user_regs *regs = &v->arch.guest_context.user_regs;
    4.94 +    struct vcpu_guest_context *ctxt = &v->arch.guest_context;
    4.95 +    struct cpu_user_regs      *regs = &ctxt->user_regs;
    4.96 +    unsigned int dirty_segment_mask = 0;
    4.97  
    4.98      if ( VMX_DOMAIN(v) )
    4.99          rdmsrl(MSR_SHADOW_GS_BASE, v->arch.arch_vmx.msr_content.shadow_gs);
   4.100 @@ -659,18 +671,34 @@ static void save_segments(struct vcpu *v
   4.101      __asm__ __volatile__ ( "movl %%es,%0" : "=m" (regs->es) );
   4.102      __asm__ __volatile__ ( "movl %%fs,%0" : "=m" (regs->fs) );
   4.103      __asm__ __volatile__ ( "movl %%gs,%0" : "=m" (regs->gs) );
   4.104 -}
   4.105  
   4.106 -static void clear_segments(void)
   4.107 -{
   4.108 -    __asm__ __volatile__ (
   4.109 -        " movl %0,%%ds; "
   4.110 -        " movl %0,%%es; "
   4.111 -        " movl %0,%%fs; "
   4.112 -        " movl %0,%%gs; "
   4.113 -        ""safe_swapgs"  "
   4.114 -        " movl %0,%%gs"
   4.115 -        : : "r" (0) );
   4.116 +    if ( regs->ds )
   4.117 +        dirty_segment_mask |= DIRTY_DS;
   4.118 +
   4.119 +    if ( regs->es )
   4.120 +        dirty_segment_mask |= DIRTY_ES;
   4.121 +
   4.122 +    if ( regs->fs )
   4.123 +    {
   4.124 +        dirty_segment_mask |= DIRTY_FS;
   4.125 +        ctxt->fs_base = 0; /* != 0 selector kills fs_base */
   4.126 +    }
   4.127 +    else if ( ctxt->fs_base )
   4.128 +    {
   4.129 +        dirty_segment_mask |= DIRTY_FS_BASE;
   4.130 +    }
   4.131 +
   4.132 +    if ( regs->gs )
   4.133 +    {
   4.134 +        dirty_segment_mask |= DIRTY_GS;
   4.135 +        ctxt->gs_base_user = 0; /* != 0 selector kills gs_base_user */
   4.136 +    }
   4.137 +    else if ( ctxt->gs_base_user )
   4.138 +    {
   4.139 +        dirty_segment_mask |= DIRTY_GS_BASE_USER;
   4.140 +    }
   4.141 +
   4.142 +    percpu_ctxt[smp_processor_id()].dirty_segment_mask = dirty_segment_mask;
   4.143  }
   4.144  
   4.145  long do_switch_to_user(void)
   4.146 @@ -706,10 +734,9 @@ long do_switch_to_user(void)
   4.147  
   4.148  #elif defined(__i386__)
   4.149  
   4.150 -#define load_segments(_p, _n) ((void)0)
   4.151 -#define load_msrs(_p, _n)     ((void)0)
   4.152 -#define save_segments(_p)     ((void)0)
   4.153 -#define clear_segments()      ((void)0)
   4.154 +#define load_segments(n) ((void)0)
   4.155 +#define load_msrs(n)     ((void)0)
   4.156 +#define save_segments(p) ((void)0)
   4.157  
   4.158  static inline void switch_kernel_stack(struct vcpu *n, unsigned int cpu)
   4.159  {
   4.160 @@ -726,9 +753,9 @@ static inline void switch_kernel_stack(s
   4.161  static void __context_switch(void)
   4.162  {
   4.163      struct cpu_user_regs *stack_regs = guest_cpu_user_regs();
   4.164 -    unsigned int         cpu = smp_processor_id();
   4.165 -    struct vcpu  *p = percpu_ctxt[cpu].curr_vcpu;
   4.166 -    struct vcpu  *n = current;
   4.167 +    unsigned int          cpu = smp_processor_id();
   4.168 +    struct vcpu          *p = percpu_ctxt[cpu].curr_vcpu;
   4.169 +    struct vcpu          *n = current;
   4.170  
   4.171      if ( !is_idle_task(p->domain) )
   4.172      {
   4.173 @@ -786,23 +813,31 @@ static void __context_switch(void)
   4.174  
   4.175  void context_switch(struct vcpu *prev, struct vcpu *next)
   4.176  {
   4.177 -    struct vcpu *realprev;
   4.178 +    unsigned int cpu = smp_processor_id();
   4.179  
   4.180 -    local_irq_disable();
   4.181 +    ASSERT(!local_irq_is_enabled());
   4.182  
   4.183      set_current(next);
   4.184  
   4.185 -    if ( ((realprev = percpu_ctxt[smp_processor_id()].curr_vcpu) == next) || 
   4.186 -         is_idle_task(next->domain) )
   4.187 -    {
   4.188 -        local_irq_enable();
   4.189 -    }
   4.190 -    else
   4.191 +    if ( (percpu_ctxt[cpu].curr_vcpu != next) && !is_idle_task(next->domain) )
   4.192      {
   4.193          __context_switch();
   4.194 +        percpu_ctxt[cpu].context_not_finalised = 1;
   4.195 +    }
   4.196 +}
   4.197  
   4.198 -        local_irq_enable();
   4.199 -        
   4.200 +void context_switch_finalise(struct vcpu *next)
   4.201 +{
   4.202 +    unsigned int cpu = smp_processor_id();
   4.203 +
   4.204 +    ASSERT(local_irq_is_enabled());
   4.205 +
   4.206 +    if ( percpu_ctxt[cpu].context_not_finalised )
   4.207 +    {
   4.208 +        percpu_ctxt[cpu].context_not_finalised = 0;
   4.209 +
   4.210 +        BUG_ON(percpu_ctxt[cpu].curr_vcpu != next);
   4.211 +
   4.212          if ( VMX_DOMAIN(next) )
   4.213          {
   4.214              vmx_restore_msrs(next);
   4.215 @@ -810,19 +845,11 @@ void context_switch(struct vcpu *prev, s
   4.216          else
   4.217          {
   4.218              load_LDT(next);
   4.219 -            load_segments(realprev, next);
   4.220 -            load_msrs(realprev, next);
   4.221 +            load_segments(next);
   4.222 +            load_msrs(next);
   4.223          }
   4.224      }
   4.225  
   4.226 -    /*
   4.227 -     * We do this late on because it doesn't need to be protected by the
   4.228 -     * schedule_lock, and because we want this to be the very last use of
   4.229 -     * 'prev' (after this point, a dying domain's info structure may be freed
   4.230 -     * without warning). 
   4.231 -     */
   4.232 -    clear_bit(_VCPUF_running, &prev->vcpu_flags);
   4.233 -
   4.234      schedule_tail(next);
   4.235      BUG();
   4.236  }
   4.237 @@ -835,12 +862,19 @@ void continue_running(struct vcpu *same)
   4.238  
   4.239  int __sync_lazy_execstate(void)
   4.240  {
   4.241 -    if ( percpu_ctxt[smp_processor_id()].curr_vcpu == current )
   4.242 -        return 0;
   4.243 -    __context_switch();
   4.244 -    load_LDT(current);
   4.245 -    clear_segments();
   4.246 -    return 1;
   4.247 +    unsigned long flags;
   4.248 +    int switch_required;
   4.249 +
   4.250 +    local_irq_save(flags);
   4.251 +
   4.252 +    switch_required = (percpu_ctxt[smp_processor_id()].curr_vcpu != current);
   4.253 +
   4.254 +    if ( switch_required )
   4.255 +        __context_switch();
   4.256 +
   4.257 +    local_irq_restore(flags);
   4.258 +
   4.259 +    return switch_required;
   4.260  }
   4.261  
   4.262  void sync_lazy_execstate_cpu(unsigned int cpu)
     5.1 --- a/xen/arch/x86/vmx.c	Wed Aug 17 08:26:58 2005 +0000
     5.2 +++ b/xen/arch/x86/vmx.c	Wed Aug 17 08:27:16 2005 +0000
     5.3 @@ -65,7 +65,7 @@ static u32 msr_data_index[VMX_MSR_COUNT]
     5.4   * are not modified once set for generic domains, we don't save them, 
     5.5   * but simply reset them to the values set at percpu_traps_init().
     5.6   */
     5.7 -void vmx_load_msrs(struct vcpu *p, struct vcpu *n)
     5.8 +void vmx_load_msrs(struct vcpu *n)
     5.9  {
    5.10      struct msr_state *host_state;
    5.11      host_state = &percpu_msr[smp_processor_id()];
     6.1 --- a/xen/common/schedule.c	Wed Aug 17 08:26:58 2005 +0000
     6.2 +++ b/xen/common/schedule.c	Wed Aug 17 08:27:16 2005 +0000
     6.3 @@ -474,13 +474,14 @@ static void __enter_scheduler(void)
     6.4  
     6.5      set_ac_timer(&schedule_data[cpu].s_timer, now + r_time);
     6.6  
     6.7 -    /* Must be protected by the schedule_lock! */
     6.8 -    set_bit(_VCPUF_running, &next->vcpu_flags);
     6.9 +    if ( unlikely(prev == next) )
    6.10 +    {
    6.11 +        spin_unlock_irq(&schedule_data[cpu].schedule_lock);
    6.12 +        return continue_running(prev);
    6.13 +    }
    6.14  
    6.15 -    spin_unlock_irq(&schedule_data[cpu].schedule_lock);
    6.16 -
    6.17 -    if ( unlikely(prev == next) )
    6.18 -        return continue_running(prev);
    6.19 +    clear_bit(_VCPUF_running, &prev->vcpu_flags);
    6.20 +    set_bit(_VCPUF_running, &next->vcpu_flags);
    6.21  
    6.22      perfc_incrc(sched_ctx);
    6.23  
    6.24 @@ -517,6 +518,10 @@ static void __enter_scheduler(void)
    6.25               next->domain->domain_id, next->vcpu_id);
    6.26  
    6.27      context_switch(prev, next);
    6.28 +
    6.29 +    spin_unlock_irq(&schedule_data[cpu].schedule_lock);
    6.30 +
    6.31 +    context_switch_finalise(next);
    6.32  }
    6.33  
    6.34  /* No locking needed -- pointer comparison is safe :-) */
     7.1 --- a/xen/include/asm-x86/vmx_vmcs.h	Wed Aug 17 08:26:58 2005 +0000
     7.2 +++ b/xen/include/asm-x86/vmx_vmcs.h	Wed Aug 17 08:27:16 2005 +0000
     7.3 @@ -28,10 +28,10 @@ extern int start_vmx(void);
     7.4  extern void stop_vmx(void);
     7.5  
     7.6  #if defined (__x86_64__)
     7.7 -extern void vmx_load_msrs(struct vcpu *p, struct vcpu *n);
     7.8 +extern void vmx_load_msrs(struct vcpu *n);
     7.9  void vmx_restore_msrs(struct vcpu *d);
    7.10  #else
    7.11 -#define vmx_load_msrs(_p, _n)      ((void)0)
    7.12 +#define vmx_load_msrs(_n)          ((void)0)
    7.13  #define vmx_restore_msrs(_v)       ((void)0)
    7.14  #endif
    7.15  
     8.1 --- a/xen/include/xen/sched.h	Wed Aug 17 08:26:58 2005 +0000
     8.2 +++ b/xen/include/xen/sched.h	Wed Aug 17 08:27:16 2005 +0000
     8.3 @@ -258,12 +258,32 @@ extern void sync_lazy_execstate_mask(cpu
     8.4  extern void sync_lazy_execstate_all(void);
     8.5  extern int __sync_lazy_execstate(void);
     8.6  
     8.7 -/* Called by the scheduler to switch to another vcpu. */
     8.8 +/*
     8.9 + * Called by the scheduler to switch to another VCPU. On entry, although
    8.10 + * VCPUF_running is no longer asserted for @prev, its context is still running
    8.11 + * on the local CPU and is not committed to memory. The local scheduler lock
    8.12 + * is therefore still held, and interrupts are disabled, because the local CPU
    8.13 + * is in an inconsistent state.
    8.14 + * 
    8.15 + * The callee must ensure that the local CPU is no longer running in @prev's
    8.16 + * context, and that the context is saved to memory, before returning.
    8.17 + * Alternatively, if implementing lazy context switching, it suffices to ensure
    8.18 + * that invoking __sync_lazy_execstate() will switch and commit @prev's state.
    8.19 + */
    8.20  extern void context_switch(
    8.21      struct vcpu *prev, 
    8.22      struct vcpu *next);
    8.23  
    8.24 -/* Called by the scheduler to continue running the current vcpu. */
    8.25 +/*
    8.26 + * On some architectures (notably x86) it is not possible to entirely load
    8.27 + * @next's context with interrupts disabled. These may implement a function to
    8.28 + * finalise loading the new context after interrupts are re-enabled. This
    8.29 + * function is not given @prev and is not permitted to access it.
    8.30 + */
    8.31 +extern void context_switch_finalise(
    8.32 +    struct vcpu *next);
    8.33 +
    8.34 +/* Called by the scheduler to continue running the current VCPU. */
    8.35  extern void continue_running(
    8.36      struct vcpu *same);
    8.37