ia64/xen-unstable

changeset 6219:3b0ce44f7b7a

merge?
author cl349@firebug.cl.cam.ac.uk
date Wed Aug 17 08:27:16 2005 +0000 (2005-08-17)
parents 7c1d66d4e51e 7a92f129e117
children a4b22c7fbfa1
files linux-2.6-xen-sparse/arch/xen/kernel/reboot.c tools/examples/network-bridge tools/python/xen/xm/create.py xen/arch/ia64/xenmisc.c xen/arch/x86/domain.c xen/arch/x86/vmx.c xen/common/schedule.c xen/include/asm-x86/vmx_vmcs.h xen/include/xen/sched.h
line diff
     2.1 --- a/tools/examples/network-bridge	Wed Aug 17 08:26:58 2005 +0000
     2.2 +++ b/tools/examples/network-bridge	Wed Aug 17 08:27:16 2005 +0000
     2.3 @@ -187,6 +187,7 @@ op_start () {
     2.4  			ifconfig ${netdev} 0.0.0.0 down
     2.5  		fi
     2.6  	fi
     2.7 +	del_addrs ${netdev}
     2.8  	ip link set ${netdev} name p${netdev}
     2.9  	ip link set veth0 name ${netdev}
    2.10  	ifconfig p${netdev} -arp down
     3.1 --- a/tools/python/xen/xm/create.py	Wed Aug 17 08:26:58 2005 +0000
     3.2 +++ b/tools/python/xen/xm/create.py	Wed Aug 17 08:27:16 2005 +0000
     3.3 @@ -380,7 +380,6 @@ def randomMAC():
     3.4  
     3.5      @return: MAC address string
     3.6      """
     3.7 -    random.seed()
     3.8      mac = [ 0xaa, 0x00, 0x00,
     3.9              random.randint(0x00, 0x7f),
    3.10              random.randint(0x00, 0xff),
    3.11 @@ -689,6 +688,7 @@ def balloon_out(dom0_min_mem, opts):
    3.12      del xc
    3.13  
    3.14  def main(argv):
    3.15 +    random.seed()
    3.16      opts = gopts
    3.17      args = opts.parse(argv)
    3.18      if opts.vals.help:
     4.1 --- a/xen/arch/ia64/xenmisc.c	Wed Aug 17 08:26:58 2005 +0000
     4.2 +++ b/xen/arch/ia64/xenmisc.c	Wed Aug 17 08:27:16 2005 +0000
     4.3 @@ -280,7 +280,6 @@ void cs01foo(void) {}
     4.4  
     4.5  unsigned long context_switch_count = 0;
     4.6  
     4.7 -// context_switch
     4.8  void context_switch(struct vcpu *prev, struct vcpu *next)
     4.9  {
    4.10  //printk("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n");
    4.11 @@ -290,22 +289,14 @@ void context_switch(struct vcpu *prev, s
    4.12  //if (prev->domain->domain_id == 0 && next->domain->domain_id == 1) cs01foo();
    4.13  //printk("@@sw %d->%d\n",prev->domain->domain_id,next->domain->domain_id);
    4.14  #ifdef CONFIG_VTI
    4.15 -	unsigned long psr;
    4.16 -	/* Interrupt is enabled after next task is chosen.
    4.17 -	 * So we have to disable it for stack switch.
    4.18 -	 */
    4.19 -	local_irq_save(psr);
    4.20  	vtm_domain_out(prev);
    4.21 -	/* Housekeeping for prev domain */
    4.22 -#endif // CONFIG_VTI
    4.23 -
    4.24 +#endif
    4.25  	context_switch_count++;
    4.26  	switch_to(prev,next,prev);
    4.27  #ifdef CONFIG_VTI
    4.28 -	/* Post-setup for new domain */
    4.29  	 vtm_domain_in(current);
    4.30 -	local_irq_restore(psr);
    4.31 -#endif // CONFIG_VTI
    4.32 +#endif
    4.33 +
    4.34  // leave this debug for now: it acts as a heartbeat when more than
    4.35  // one domain is active
    4.36  {
    4.37 @@ -315,25 +306,27 @@ int id = ((struct vcpu *)current)->domai
    4.38  if (!cnt[id]--) { printk("%x",id); cnt[id] = 500000; }
    4.39  if (!i--) { printk("+",id); i = 1000000; }
    4.40  }
    4.41 -	clear_bit(_VCPUF_running, &prev->vcpu_flags);
    4.42 -	//if (!is_idle_task(next->domain) )
    4.43 -		//send_guest_virq(next, VIRQ_TIMER);
    4.44 +
    4.45  #ifdef CONFIG_VTI
    4.46  	if (VMX_DOMAIN(current))
    4.47  		vmx_load_all_rr(current);
    4.48 -	return;
    4.49 -#else // CONFIG_VTI
    4.50 +#else
    4.51  	if (!is_idle_task(current->domain)) {
    4.52  		load_region_regs(current);
    4.53  		if (vcpu_timer_expired(current)) vcpu_pend_timer(current);
    4.54  	}
    4.55  	if (vcpu_timer_expired(current)) vcpu_pend_timer(current);
    4.56 -#endif // CONFIG_VTI
    4.57 +#endif
    4.58 +}
    4.59 +
    4.60 +void context_switch_finalise(struct vcpu *next)
    4.61 +{
    4.62 +	/* nothing to do */
    4.63  }
    4.64  
    4.65  void continue_running(struct vcpu *same)
    4.66  {
    4.67 -    /* nothing to do */
    4.68 +	/* nothing to do */
    4.69  }
    4.70  
    4.71  void panic_domain(struct pt_regs *regs, const char *fmt, ...)
     5.1 --- a/xen/arch/x86/domain.c	Wed Aug 17 08:26:58 2005 +0000
     5.2 +++ b/xen/arch/x86/domain.c	Wed Aug 17 08:27:16 2005 +0000
     5.3 @@ -48,6 +48,8 @@ boolean_param("noreboot", opt_noreboot);
     5.4  
     5.5  struct percpu_ctxt {
     5.6      struct vcpu *curr_vcpu;
     5.7 +    unsigned int context_not_finalised;
     5.8 +    unsigned int dirty_segment_mask;
     5.9  } __cacheline_aligned;
    5.10  static struct percpu_ctxt percpu_ctxt[NR_CPUS];
    5.11  
    5.12 @@ -541,51 +543,59 @@ void toggle_guest_mode(struct vcpu *v)
    5.13      __r; })
    5.14  
    5.15  #if CONFIG_VMX
    5.16 -#define load_msrs(_p, _n)     if (vmx_switch_on) vmx_load_msrs((_p), (_n))
    5.17 +#define load_msrs(n)     if (vmx_switch_on) vmx_load_msrs(n)
    5.18  #else
    5.19 -#define load_msrs(_p, _n)     ((void)0)
    5.20 +#define load_msrs(n)     ((void)0)
    5.21  #endif 
    5.22  
    5.23 -static void load_segments(struct vcpu *p, struct vcpu *n)
    5.24 +/*
    5.25 + * save_segments() writes a mask of segments which are dirty (non-zero),
    5.26 + * allowing load_segments() to avoid some expensive segment loads and
    5.27 + * MSR writes.
    5.28 + */
    5.29 +#define DIRTY_DS           0x01
    5.30 +#define DIRTY_ES           0x02
    5.31 +#define DIRTY_FS           0x04
    5.32 +#define DIRTY_GS           0x08
    5.33 +#define DIRTY_FS_BASE      0x10
    5.34 +#define DIRTY_GS_BASE_USER 0x20
    5.35 +
    5.36 +static void load_segments(struct vcpu *n)
    5.37  {
    5.38 -    struct vcpu_guest_context *pctxt = &p->arch.guest_context;
    5.39      struct vcpu_guest_context *nctxt = &n->arch.guest_context;
    5.40      int all_segs_okay = 1;
    5.41 +    unsigned int dirty_segment_mask, cpu = smp_processor_id();
    5.42 +
    5.43 +    /* Load and clear the dirty segment mask. */
    5.44 +    dirty_segment_mask = percpu_ctxt[cpu].dirty_segment_mask;
    5.45 +    percpu_ctxt[cpu].dirty_segment_mask = 0;
    5.46  
    5.47      /* Either selector != 0 ==> reload. */
    5.48 -    if ( unlikely(pctxt->user_regs.ds | nctxt->user_regs.ds) )
    5.49 +    if ( unlikely((dirty_segment_mask & DIRTY_DS) | nctxt->user_regs.ds) )
    5.50          all_segs_okay &= loadsegment(ds, nctxt->user_regs.ds);
    5.51  
    5.52      /* Either selector != 0 ==> reload. */
    5.53 -    if ( unlikely(pctxt->user_regs.es | nctxt->user_regs.es) )
    5.54 +    if ( unlikely((dirty_segment_mask & DIRTY_ES) | nctxt->user_regs.es) )
    5.55          all_segs_okay &= loadsegment(es, nctxt->user_regs.es);
    5.56  
    5.57      /*
    5.58       * Either selector != 0 ==> reload.
    5.59       * Also reload to reset FS_BASE if it was non-zero.
    5.60       */
    5.61 -    if ( unlikely(pctxt->user_regs.fs |
    5.62 -                  pctxt->fs_base |
    5.63 +    if ( unlikely((dirty_segment_mask & (DIRTY_FS | DIRTY_FS_BASE)) |
    5.64                    nctxt->user_regs.fs) )
    5.65 -    {
    5.66          all_segs_okay &= loadsegment(fs, nctxt->user_regs.fs);
    5.67 -        if ( pctxt->user_regs.fs ) /* != 0 selector kills fs_base */
    5.68 -            pctxt->fs_base = 0;
    5.69 -    }
    5.70  
    5.71      /*
    5.72       * Either selector != 0 ==> reload.
    5.73       * Also reload to reset GS_BASE if it was non-zero.
    5.74       */
    5.75 -    if ( unlikely(pctxt->user_regs.gs |
    5.76 -                  pctxt->gs_base_user |
    5.77 +    if ( unlikely((dirty_segment_mask & (DIRTY_GS | DIRTY_GS_BASE_USER)) |
    5.78                    nctxt->user_regs.gs) )
    5.79      {
    5.80          /* Reset GS_BASE with user %gs? */
    5.81 -        if ( pctxt->user_regs.gs || !nctxt->gs_base_user )
    5.82 +        if ( (dirty_segment_mask & DIRTY_GS) || !nctxt->gs_base_user )
    5.83              all_segs_okay &= loadsegment(gs, nctxt->user_regs.gs);
    5.84 -        if ( pctxt->user_regs.gs ) /* != 0 selector kills gs_base_user */
    5.85 -            pctxt->gs_base_user = 0;
    5.86      }
    5.87  
    5.88      /* This can only be non-zero if selector is NULL. */
    5.89 @@ -650,7 +660,9 @@ static void load_segments(struct vcpu *p
    5.90  
    5.91  static void save_segments(struct vcpu *v)
    5.92  {
    5.93 -    struct cpu_user_regs *regs = &v->arch.guest_context.user_regs;
    5.94 +    struct vcpu_guest_context *ctxt = &v->arch.guest_context;
    5.95 +    struct cpu_user_regs      *regs = &ctxt->user_regs;
    5.96 +    unsigned int dirty_segment_mask = 0;
    5.97  
    5.98      if ( VMX_DOMAIN(v) )
    5.99          rdmsrl(MSR_SHADOW_GS_BASE, v->arch.arch_vmx.msr_content.shadow_gs);
   5.100 @@ -659,18 +671,34 @@ static void save_segments(struct vcpu *v
   5.101      __asm__ __volatile__ ( "movl %%es,%0" : "=m" (regs->es) );
   5.102      __asm__ __volatile__ ( "movl %%fs,%0" : "=m" (regs->fs) );
   5.103      __asm__ __volatile__ ( "movl %%gs,%0" : "=m" (regs->gs) );
   5.104 -}
   5.105 +
   5.106 +    if ( regs->ds )
   5.107 +        dirty_segment_mask |= DIRTY_DS;
   5.108 +
   5.109 +    if ( regs->es )
   5.110 +        dirty_segment_mask |= DIRTY_ES;
   5.111  
   5.112 -static void clear_segments(void)
   5.113 -{
   5.114 -    __asm__ __volatile__ (
   5.115 -        " movl %0,%%ds; "
   5.116 -        " movl %0,%%es; "
   5.117 -        " movl %0,%%fs; "
   5.118 -        " movl %0,%%gs; "
   5.119 -        ""safe_swapgs"  "
   5.120 -        " movl %0,%%gs"
   5.121 -        : : "r" (0) );
   5.122 +    if ( regs->fs )
   5.123 +    {
   5.124 +        dirty_segment_mask |= DIRTY_FS;
   5.125 +        ctxt->fs_base = 0; /* != 0 selector kills fs_base */
   5.126 +    }
   5.127 +    else if ( ctxt->fs_base )
   5.128 +    {
   5.129 +        dirty_segment_mask |= DIRTY_FS_BASE;
   5.130 +    }
   5.131 +
   5.132 +    if ( regs->gs )
   5.133 +    {
   5.134 +        dirty_segment_mask |= DIRTY_GS;
   5.135 +        ctxt->gs_base_user = 0; /* != 0 selector kills gs_base_user */
   5.136 +    }
   5.137 +    else if ( ctxt->gs_base_user )
   5.138 +    {
   5.139 +        dirty_segment_mask |= DIRTY_GS_BASE_USER;
   5.140 +    }
   5.141 +
   5.142 +    percpu_ctxt[smp_processor_id()].dirty_segment_mask = dirty_segment_mask;
   5.143  }
   5.144  
   5.145  long do_switch_to_user(void)
   5.146 @@ -706,10 +734,9 @@ long do_switch_to_user(void)
   5.147  
   5.148  #elif defined(__i386__)
   5.149  
   5.150 -#define load_segments(_p, _n) ((void)0)
   5.151 -#define load_msrs(_p, _n)     ((void)0)
   5.152 -#define save_segments(_p)     ((void)0)
   5.153 -#define clear_segments()      ((void)0)
   5.154 +#define load_segments(n) ((void)0)
   5.155 +#define load_msrs(n)     ((void)0)
   5.156 +#define save_segments(p) ((void)0)
   5.157  
   5.158  static inline void switch_kernel_stack(struct vcpu *n, unsigned int cpu)
   5.159  {
   5.160 @@ -726,9 +753,9 @@ static inline void switch_kernel_stack(s
   5.161  static void __context_switch(void)
   5.162  {
   5.163      struct cpu_user_regs *stack_regs = guest_cpu_user_regs();
   5.164 -    unsigned int         cpu = smp_processor_id();
   5.165 -    struct vcpu  *p = percpu_ctxt[cpu].curr_vcpu;
   5.166 -    struct vcpu  *n = current;
   5.167 +    unsigned int          cpu = smp_processor_id();
   5.168 +    struct vcpu          *p = percpu_ctxt[cpu].curr_vcpu;
   5.169 +    struct vcpu          *n = current;
   5.170  
   5.171      if ( !is_idle_task(p->domain) )
   5.172      {
   5.173 @@ -786,23 +813,31 @@ static void __context_switch(void)
   5.174  
   5.175  void context_switch(struct vcpu *prev, struct vcpu *next)
   5.176  {
   5.177 -    struct vcpu *realprev;
   5.178 +    unsigned int cpu = smp_processor_id();
   5.179  
   5.180 -    local_irq_disable();
   5.181 +    ASSERT(!local_irq_is_enabled());
   5.182  
   5.183      set_current(next);
   5.184  
   5.185 -    if ( ((realprev = percpu_ctxt[smp_processor_id()].curr_vcpu) == next) || 
   5.186 -         is_idle_task(next->domain) )
   5.187 -    {
   5.188 -        local_irq_enable();
   5.189 -    }
   5.190 -    else
   5.191 +    if ( (percpu_ctxt[cpu].curr_vcpu != next) && !is_idle_task(next->domain) )
   5.192      {
   5.193          __context_switch();
   5.194 +        percpu_ctxt[cpu].context_not_finalised = 1;
   5.195 +    }
   5.196 +}
   5.197  
   5.198 -        local_irq_enable();
   5.199 -        
   5.200 +void context_switch_finalise(struct vcpu *next)
   5.201 +{
   5.202 +    unsigned int cpu = smp_processor_id();
   5.203 +
   5.204 +    ASSERT(local_irq_is_enabled());
   5.205 +
   5.206 +    if ( percpu_ctxt[cpu].context_not_finalised )
   5.207 +    {
   5.208 +        percpu_ctxt[cpu].context_not_finalised = 0;
   5.209 +
   5.210 +        BUG_ON(percpu_ctxt[cpu].curr_vcpu != next);
   5.211 +
   5.212          if ( VMX_DOMAIN(next) )
   5.213          {
   5.214              vmx_restore_msrs(next);
   5.215 @@ -810,19 +845,11 @@ void context_switch(struct vcpu *prev, s
   5.216          else
   5.217          {
   5.218              load_LDT(next);
   5.219 -            load_segments(realprev, next);
   5.220 -            load_msrs(realprev, next);
   5.221 +            load_segments(next);
   5.222 +            load_msrs(next);
   5.223          }
   5.224      }
   5.225  
   5.226 -    /*
   5.227 -     * We do this late on because it doesn't need to be protected by the
   5.228 -     * schedule_lock, and because we want this to be the very last use of
   5.229 -     * 'prev' (after this point, a dying domain's info structure may be freed
   5.230 -     * without warning). 
   5.231 -     */
   5.232 -    clear_bit(_VCPUF_running, &prev->vcpu_flags);
   5.233 -
   5.234      schedule_tail(next);
   5.235      BUG();
   5.236  }
   5.237 @@ -835,12 +862,19 @@ void continue_running(struct vcpu *same)
   5.238  
   5.239  int __sync_lazy_execstate(void)
   5.240  {
   5.241 -    if ( percpu_ctxt[smp_processor_id()].curr_vcpu == current )
   5.242 -        return 0;
   5.243 -    __context_switch();
   5.244 -    load_LDT(current);
   5.245 -    clear_segments();
   5.246 -    return 1;
   5.247 +    unsigned long flags;
   5.248 +    int switch_required;
   5.249 +
   5.250 +    local_irq_save(flags);
   5.251 +
   5.252 +    switch_required = (percpu_ctxt[smp_processor_id()].curr_vcpu != current);
   5.253 +
   5.254 +    if ( switch_required )
   5.255 +        __context_switch();
   5.256 +
   5.257 +    local_irq_restore(flags);
   5.258 +
   5.259 +    return switch_required;
   5.260  }
   5.261  
   5.262  void sync_lazy_execstate_cpu(unsigned int cpu)
     6.1 --- a/xen/arch/x86/vmx.c	Wed Aug 17 08:26:58 2005 +0000
     6.2 +++ b/xen/arch/x86/vmx.c	Wed Aug 17 08:27:16 2005 +0000
     6.3 @@ -65,7 +65,7 @@ static u32 msr_data_index[VMX_MSR_COUNT]
     6.4   * are not modified once set for generic domains, we don't save them, 
     6.5   * but simply reset them to the values set at percpu_traps_init().
     6.6   */
     6.7 -void vmx_load_msrs(struct vcpu *p, struct vcpu *n)
     6.8 +void vmx_load_msrs(struct vcpu *n)
     6.9  {
    6.10      struct msr_state *host_state;
    6.11      host_state = &percpu_msr[smp_processor_id()];
     7.1 --- a/xen/common/schedule.c	Wed Aug 17 08:26:58 2005 +0000
     7.2 +++ b/xen/common/schedule.c	Wed Aug 17 08:27:16 2005 +0000
     7.3 @@ -474,13 +474,14 @@ static void __enter_scheduler(void)
     7.4  
     7.5      set_ac_timer(&schedule_data[cpu].s_timer, now + r_time);
     7.6  
     7.7 -    /* Must be protected by the schedule_lock! */
     7.8 -    set_bit(_VCPUF_running, &next->vcpu_flags);
     7.9 +    if ( unlikely(prev == next) )
    7.10 +    {
    7.11 +        spin_unlock_irq(&schedule_data[cpu].schedule_lock);
    7.12 +        return continue_running(prev);
    7.13 +    }
    7.14  
    7.15 -    spin_unlock_irq(&schedule_data[cpu].schedule_lock);
    7.16 -
    7.17 -    if ( unlikely(prev == next) )
    7.18 -        return continue_running(prev);
    7.19 +    clear_bit(_VCPUF_running, &prev->vcpu_flags);
    7.20 +    set_bit(_VCPUF_running, &next->vcpu_flags);
    7.21  
    7.22      perfc_incrc(sched_ctx);
    7.23  
    7.24 @@ -517,6 +518,10 @@ static void __enter_scheduler(void)
    7.25               next->domain->domain_id, next->vcpu_id);
    7.26  
    7.27      context_switch(prev, next);
    7.28 +
    7.29 +    spin_unlock_irq(&schedule_data[cpu].schedule_lock);
    7.30 +
    7.31 +    context_switch_finalise(next);
    7.32  }
    7.33  
    7.34  /* No locking needed -- pointer comparison is safe :-) */
     8.1 --- a/xen/include/asm-x86/vmx_vmcs.h	Wed Aug 17 08:26:58 2005 +0000
     8.2 +++ b/xen/include/asm-x86/vmx_vmcs.h	Wed Aug 17 08:27:16 2005 +0000
     8.3 @@ -28,10 +28,10 @@ extern int start_vmx(void);
     8.4  extern void stop_vmx(void);
     8.5  
     8.6  #if defined (__x86_64__)
     8.7 -extern void vmx_load_msrs(struct vcpu *p, struct vcpu *n);
     8.8 +extern void vmx_load_msrs(struct vcpu *n);
     8.9  void vmx_restore_msrs(struct vcpu *d);
    8.10  #else
    8.11 -#define vmx_load_msrs(_p, _n)      ((void)0)
    8.12 +#define vmx_load_msrs(_n)          ((void)0)
    8.13  #define vmx_restore_msrs(_v)       ((void)0)
    8.14  #endif
    8.15  
     9.1 --- a/xen/include/xen/sched.h	Wed Aug 17 08:26:58 2005 +0000
     9.2 +++ b/xen/include/xen/sched.h	Wed Aug 17 08:27:16 2005 +0000
     9.3 @@ -258,12 +258,32 @@ extern void sync_lazy_execstate_mask(cpu
     9.4  extern void sync_lazy_execstate_all(void);
     9.5  extern int __sync_lazy_execstate(void);
     9.6  
     9.7 -/* Called by the scheduler to switch to another vcpu. */
     9.8 +/*
     9.9 + * Called by the scheduler to switch to another VCPU. On entry, although
    9.10 + * VCPUF_running is no longer asserted for @prev, its context is still running
    9.11 + * on the local CPU and is not committed to memory. The local scheduler lock
    9.12 + * is therefore still held, and interrupts are disabled, because the local CPU
    9.13 + * is in an inconsistent state.
    9.14 + * 
    9.15 + * The callee must ensure that the local CPU is no longer running in @prev's
    9.16 + * context, and that the context is saved to memory, before returning.
    9.17 + * Alternatively, if implementing lazy context switching, it suffices to ensure
    9.18 + * that invoking __sync_lazy_execstate() will switch and commit @prev's state.
    9.19 + */
    9.20  extern void context_switch(
    9.21      struct vcpu *prev, 
    9.22      struct vcpu *next);
    9.23  
    9.24 -/* Called by the scheduler to continue running the current vcpu. */
    9.25 +/*
    9.26 + * On some architectures (notably x86) it is not possible to entirely load
    9.27 + * @next's context with interrupts disabled. These may implement a function to
    9.28 + * finalise loading the new context after interrupts are re-enabled. This
    9.29 + * function is not given @prev and is not permitted to access it.
    9.30 + */
    9.31 +extern void context_switch_finalise(
    9.32 +    struct vcpu *next);
    9.33 +
    9.34 +/* Called by the scheduler to continue running the current VCPU. */
    9.35  extern void continue_running(
    9.36      struct vcpu *same);
    9.37