ia64/xen-unstable

changeset 12260:e793dad2114d

[XEN] Re-jig VCPU initialisation -- VMX init requires generic VCPU
fields to already be allocated. This has led to a general cleanup of
domain and vcpu initialisation and destruction.
Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Mon Nov 06 11:03:04 2006 +0000 (2006-11-06)
parents 31985f348d2e
children 29bfe8852dce
files xen/arch/ia64/xen/domain.c xen/arch/powerpc/domain.c xen/arch/x86/domain.c xen/arch/x86/hvm/vmx/vmcs.c xen/arch/x86/hvm/vmx/vmx.c xen/common/domain.c xen/common/sched_credit.c xen/common/sched_sedf.c xen/common/schedule.c xen/include/xen/domain.h xen/include/xen/sched-if.h xen/include/xen/sched.h
line diff
     1.1 --- a/xen/arch/ia64/xen/domain.c	Mon Nov 06 09:46:13 2006 +0000
     1.2 +++ b/xen/arch/ia64/xen/domain.c	Mon Nov 06 11:03:04 2006 +0000
     1.3 @@ -275,40 +275,61 @@ void hlt_timer_fn(void *data)
     1.4  	vcpu_unblock(v);
     1.5  }
     1.6  
     1.7 -struct vcpu *alloc_vcpu_struct(struct domain *d, unsigned int vcpu_id)
     1.8 +void relinquish_vcpu_resources(struct vcpu *v)
     1.9 +{
    1.10 +    if (HAS_PERVCPU_VHPT(v->domain))
    1.11 +        pervcpu_vhpt_free(v);
    1.12 +    if (v->arch.privregs != NULL) {
    1.13 +        free_xenheap_pages(v->arch.privregs,
    1.14 +                           get_order_from_shift(XMAPPEDREGS_SHIFT));
    1.15 +        v->arch.privregs = NULL;
    1.16 +    }
    1.17 +    kill_timer(&v->arch.hlt_timer);
    1.18 +}
    1.19 +
    1.20 +struct vcpu *alloc_vcpu_struct(void)
    1.21  {
    1.22  	struct vcpu *v;
    1.23  	struct thread_info *ti;
    1.24 +	static int first_allocation = 1;
    1.25  
    1.26 -	/* Still keep idle vcpu0 static allocated at compilation, due
    1.27 -	 * to some code from Linux still requires it in early phase.
    1.28 -	 */
    1.29 -	if (is_idle_domain(d) && !vcpu_id)
    1.30 -	    v = idle_vcpu[0];
    1.31 -	else {
    1.32 -	    if ((v = alloc_xenheap_pages(KERNEL_STACK_SIZE_ORDER)) == NULL)
    1.33 +	if (first_allocation) {
    1.34 +		first_allocation = 0;
    1.35 +		/* Still keep idle vcpu0 static allocated at compilation, due
    1.36 +		 * to some code from Linux still requires it in early phase.
    1.37 +		 */
    1.38 +		return idle_vcpu[0];
    1.39 +	}
    1.40 +
    1.41 +	if ((v = alloc_xenheap_pages(KERNEL_STACK_SIZE_ORDER)) == NULL)
    1.42  		return NULL;
    1.43 -	    memset(v, 0, sizeof(*v)); 
    1.44 +	memset(v, 0, sizeof(*v)); 
    1.45 +
    1.46 +	ti = alloc_thread_info(v);
    1.47 +	/* Clear thread_info to clear some important fields, like
    1.48 +	 * preempt_count
    1.49 +	 */
    1.50 +	memset(ti, 0, sizeof(struct thread_info));
    1.51 +	init_switch_stack(v);
    1.52  
    1.53 -	    ti = alloc_thread_info(v);
    1.54 -	    /* Clear thread_info to clear some important fields, like
    1.55 -	     * preempt_count
    1.56 -	     */
    1.57 -	    memset(ti, 0, sizeof(struct thread_info));
    1.58 -	    init_switch_stack(v);
    1.59 -	}
    1.60 +	return v;
    1.61 +}
    1.62 +
    1.63 +void free_vcpu_struct(struct vcpu *v)
    1.64 +{
    1.65 +	free_xenheap_pages(v, KERNEL_STACK_SIZE_ORDER);
    1.66 +}
    1.67 +
    1.68 +int vcpu_initialise(struct vcpu *v)
    1.69 +{
    1.70 +	struct domain *d = v->domain;
    1.71 +	int rc, order, i;
    1.72  
    1.73  	if (!is_idle_domain(d)) {
    1.74  	    if (!d->arch.is_vti) {
    1.75 -		int order;
    1.76 -		int i;
    1.77 -		// vti domain has its own vhpt policy.
    1.78 -		if (HAS_PERVCPU_VHPT(d)) {
    1.79 -			if (pervcpu_vhpt_alloc(v) < 0) {
    1.80 -				free_xenheap_pages(v, KERNEL_STACK_SIZE_ORDER);
    1.81 -				return NULL;
    1.82 -			}
    1.83 -		}
    1.84 +		if (HAS_PERVCPU_VHPT(d))
    1.85 +			if ((rc = pervcpu_vhpt_alloc(v)) != 0)
    1.86 +				return rc;
    1.87  
    1.88  		/* Create privregs page only if not VTi. */
    1.89  		order = get_order_from_shift(XMAPPEDREGS_SHIFT);
    1.90 @@ -344,34 +365,20 @@ struct vcpu *alloc_vcpu_struct(struct do
    1.91  	    v->arch.breakimm = d->arch.breakimm;
    1.92  	    v->arch.last_processor = INVALID_PROCESSOR;
    1.93  	}
    1.94 -	if (!VMX_DOMAIN(v)){
    1.95 +
    1.96 +	if (!VMX_DOMAIN(v))
    1.97  		init_timer(&v->arch.hlt_timer, hlt_timer_fn, v,
    1.98  		           first_cpu(cpu_online_map));
    1.99 -	}
   1.100  
   1.101 -	return v;
   1.102 +	return 0;
   1.103  }
   1.104  
   1.105 -void relinquish_vcpu_resources(struct vcpu *v)
   1.106 -{
   1.107 -    if (HAS_PERVCPU_VHPT(v->domain))
   1.108 -        pervcpu_vhpt_free(v);
   1.109 -    if (v->arch.privregs != NULL) {
   1.110 -        free_xenheap_pages(v->arch.privregs,
   1.111 -                           get_order_from_shift(XMAPPEDREGS_SHIFT));
   1.112 -        v->arch.privregs = NULL;
   1.113 -    }
   1.114 -    kill_timer(&v->arch.hlt_timer);
   1.115 -}
   1.116 -
   1.117 -void free_vcpu_struct(struct vcpu *v)
   1.118 +void vcpu_destroy(struct vcpu *v)
   1.119  {
   1.120  	if (v->domain->arch.is_vti)
   1.121  		vmx_relinquish_vcpu_resources(v);
   1.122  	else
   1.123  		relinquish_vcpu_resources(v);
   1.124 -
   1.125 -	free_xenheap_pages(v, KERNEL_STACK_SIZE_ORDER);
   1.126  }
   1.127  
   1.128  static void init_switch_stack(struct vcpu *v)
     2.1 --- a/xen/arch/powerpc/domain.c	Mon Nov 06 09:46:13 2006 +0000
     2.2 +++ b/xen/arch/powerpc/domain.c	Mon Nov 06 11:03:04 2006 +0000
     2.3 @@ -109,27 +109,28 @@ void machine_restart(char * __unused)
     2.4      while(1);
     2.5  }
     2.6  
     2.7 -struct vcpu *alloc_vcpu_struct(struct domain *d, unsigned int vcpu_id)
     2.8 +struct vcpu *alloc_vcpu_struct(void)
     2.9  {
    2.10      struct vcpu *v;
    2.11 -
    2.12 -    if ( (v = xmalloc(struct vcpu)) == NULL )
    2.13 -        return NULL;
    2.14 -
    2.15 -    memset(v, 0, sizeof(*v));
    2.16 -    v->vcpu_id = vcpu_id;
    2.17 -
    2.18 +    if ( (v = xmalloc(struct vcpu)) != NULL )
    2.19 +        memset(v, 0, sizeof(*v));
    2.20      return v;
    2.21  }
    2.22  
    2.23  void free_vcpu_struct(struct vcpu *v)
    2.24  {
    2.25 -    BUG_ON(v->next_in_list != NULL);
    2.26 -    if ( v->vcpu_id != 0 )
    2.27 -        v->domain->vcpu[v->vcpu_id - 1]->next_in_list = NULL;
    2.28      xfree(v);
    2.29  }
    2.30  
    2.31 +int vcpu_initialise(struct vcpu *v)
    2.32 +{
    2.33 +    return 0;
    2.34 +}
    2.35 +
    2.36 +void vcpu_destroy(struct vcpu *v)
    2.37 +{
    2.38 +}
    2.39 +
    2.40  int arch_set_info_guest(struct vcpu *v, vcpu_guest_context_t *c)
    2.41  { 
    2.42      memcpy(&v->arch.ctxt, &c->user_regs, sizeof(c->user_regs));
     3.1 --- a/xen/arch/x86/domain.c	Mon Nov 06 09:46:13 2006 +0000
     3.2 +++ b/xen/arch/x86/domain.c	Mon Nov 06 11:03:04 2006 +0000
     3.3 @@ -114,27 +114,30 @@ void dump_pageframe_info(struct domain *
     3.4      }
     3.5  }
     3.6  
     3.7 -struct vcpu *alloc_vcpu_struct(struct domain *d, unsigned int vcpu_id)
     3.8 +struct vcpu *alloc_vcpu_struct(void)
     3.9  {
    3.10      struct vcpu *v;
    3.11 -
    3.12 -    if ( (v = xmalloc(struct vcpu)) == NULL )
    3.13 -        return NULL;
    3.14 +    if ( (v = xmalloc(struct vcpu)) != NULL )
    3.15 +        memset(v, 0, sizeof(*v));
    3.16 +    return v;
    3.17 +}
    3.18  
    3.19 -    memset(v, 0, sizeof(*v));
    3.20 +void free_vcpu_struct(struct vcpu *v)
    3.21 +{
    3.22 +    xfree(v);
    3.23 +}
    3.24  
    3.25 -    v->vcpu_id = vcpu_id;
    3.26 -    v->domain  = d;
    3.27 +int vcpu_initialise(struct vcpu *v)
    3.28 +{
    3.29 +    struct domain *d = v->domain;
    3.30 +    int rc;
    3.31  
    3.32      v->arch.flags = TF_kernel_mode;
    3.33  
    3.34      if ( is_hvm_domain(d) )
    3.35      {
    3.36 -        if ( hvm_vcpu_initialise(v) != 0 )
    3.37 -        {
    3.38 -            xfree(v);
    3.39 -            return NULL;
    3.40 -        }
    3.41 +        if ( (rc = hvm_vcpu_initialise(v)) != 0 )
    3.42 +            return rc;
    3.43      }
    3.44      else
    3.45      {
    3.46 @@ -150,16 +153,15 @@ struct vcpu *alloc_vcpu_struct(struct do
    3.47      }
    3.48  
    3.49      v->arch.perdomain_ptes =
    3.50 -        d->arch.mm_perdomain_pt + (vcpu_id << GDT_LDT_VCPU_SHIFT);
    3.51 +        d->arch.mm_perdomain_pt + (v->vcpu_id << GDT_LDT_VCPU_SHIFT);
    3.52  
    3.53      pae_l3_cache_init(&v->arch.pae_l3_cache);
    3.54  
    3.55 -    return v;
    3.56 +    return 0;
    3.57  }
    3.58  
    3.59 -void free_vcpu_struct(struct vcpu *v)
    3.60 +void vcpu_destroy(struct vcpu *v)
    3.61  {
    3.62 -    xfree(v);
    3.63  }
    3.64  
    3.65  int arch_domain_create(struct domain *d)
     4.1 --- a/xen/arch/x86/hvm/vmx/vmcs.c	Mon Nov 06 09:46:13 2006 +0000
     4.2 +++ b/xen/arch/x86/hvm/vmx/vmcs.c	Mon Nov 06 11:03:04 2006 +0000
     4.3 @@ -193,11 +193,9 @@ void vmx_vmcs_enter(struct vcpu *v)
     4.4  {
     4.5      /*
     4.6       * NB. We must *always* run an HVM VCPU on its own VMCS, except for
     4.7 -     * vmx_vmcs_enter/exit critical regions. This leads to some XXX TODOs XXX:
     4.8 -     *  1. Move construct_vmcs() much earlier, to domain creation or
     4.9 -     *     context initialisation.
    4.10 -     *  2. VMPTRLD as soon as we context-switch to a HVM VCPU.
    4.11 -     *  3. VMCS destruction needs to happen later (from domain_destroy()).
    4.12 +     * vmx_vmcs_enter/exit critical regions. This leads to some TODOs:
    4.13 +     *  1. VMPTRLD as soon as we context-switch to a HVM VCPU.
    4.14 +     *  2. VMCS destruction needs to happen later (from domain_destroy()).
    4.15       * We can relax this a bit if a paused VCPU always commits its
    4.16       * architectural state to a software structure.
    4.17       */
    4.18 @@ -235,17 +233,6 @@ void vmx_free_host_vmcs(struct vmcs_stru
    4.19      vmx_free_vmcs(vmcs);
    4.20  }
    4.21  
    4.22 -static inline int construct_vmcs_controls(struct arch_vmx_struct *arch_vmx)
    4.23 -{
    4.24 -    int error = 0;
    4.25 -
    4.26 -    error |= __vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_control);
    4.27 -    error |= __vmwrite(VM_EXIT_CONTROLS, vmx_vmexit_control);
    4.28 -    error |= __vmwrite(VM_ENTRY_CONTROLS, vmx_vmentry_control);
    4.29 -
    4.30 -    return error;
    4.31 -}
    4.32 -
    4.33  #define GUEST_LAUNCH_DS         0x08
    4.34  #define GUEST_LAUNCH_CS         0x10
    4.35  #define GUEST_SEGMENT_LIMIT     0xffffffff
    4.36 @@ -366,17 +353,40 @@ static void vmx_do_launch(struct vcpu *v
    4.37      v->arch.schedule_tail = arch_vmx_do_resume;
    4.38  }
    4.39  
    4.40 -/*
    4.41 - * Initially set the same environement as host.
    4.42 - */
    4.43 -static inline int construct_init_vmcs_guest(cpu_user_regs_t *regs)
    4.44 +static int construct_vmcs(struct vcpu *v, cpu_user_regs_t *regs)
    4.45  {
    4.46      int error = 0;
    4.47 +    unsigned long tmp, eflags;
    4.48      union vmcs_arbytes arbytes;
    4.49 -    unsigned long dr7;
    4.50 -    unsigned long eflags;
    4.51 +
    4.52 +    /* VMCS controls. */
    4.53 +    error |= __vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_control);
    4.54 +    error |= __vmwrite(VM_EXIT_CONTROLS, vmx_vmexit_control);
    4.55 +    error |= __vmwrite(VM_ENTRY_CONTROLS, vmx_vmentry_control);
    4.56  
    4.57 -    /* MSR */
    4.58 +    /* Host data selectors. */
    4.59 +    error |= __vmwrite(HOST_SS_SELECTOR, __HYPERVISOR_DS);
    4.60 +    error |= __vmwrite(HOST_DS_SELECTOR, __HYPERVISOR_DS);
    4.61 +    error |= __vmwrite(HOST_ES_SELECTOR, __HYPERVISOR_DS);
    4.62 +#if defined(__i386__)
    4.63 +    error |= __vmwrite(HOST_FS_SELECTOR, __HYPERVISOR_DS);
    4.64 +    error |= __vmwrite(HOST_GS_SELECTOR, __HYPERVISOR_DS);
    4.65 +    error |= __vmwrite(HOST_FS_BASE, 0);
    4.66 +    error |= __vmwrite(HOST_GS_BASE, 0);
    4.67 +#elif defined(__x86_64__)
    4.68 +    rdmsrl(MSR_FS_BASE, tmp); error |= __vmwrite(HOST_FS_BASE, tmp);
    4.69 +    rdmsrl(MSR_GS_BASE, tmp); error |= __vmwrite(HOST_GS_BASE, tmp);
    4.70 +#endif
    4.71 +
    4.72 +    /* Host control registers. */
    4.73 +    error |= __vmwrite(HOST_CR0, read_cr0());
    4.74 +    error |= __vmwrite(HOST_CR4, read_cr4());
    4.75 +
    4.76 +    /* Host CS:RIP. */
    4.77 +    error |= __vmwrite(HOST_CS_SELECTOR, __HYPERVISOR_CS);
    4.78 +    error |= __vmwrite(HOST_RIP, (unsigned long)vmx_asm_vmexit_handler);
    4.79 +
    4.80 +    /* MSR intercepts. */
    4.81      error |= __vmwrite(VM_EXIT_MSR_LOAD_ADDR, 0);
    4.82      error |= __vmwrite(VM_EXIT_MSR_STORE_ADDR, 0);
    4.83      error |= __vmwrite(VM_EXIT_MSR_STORE_COUNT, 0);
    4.84 @@ -395,7 +405,7 @@ static inline int construct_init_vmcs_gu
    4.85  
    4.86      error |= __vmwrite(GUEST_ACTIVITY_STATE, 0);
    4.87  
    4.88 -    /* Guest Selectors */
    4.89 +    /* Guest selectors. */
    4.90      error |= __vmwrite(GUEST_ES_SELECTOR, GUEST_LAUNCH_DS);
    4.91      error |= __vmwrite(GUEST_SS_SELECTOR, GUEST_LAUNCH_DS);
    4.92      error |= __vmwrite(GUEST_DS_SELECTOR, GUEST_LAUNCH_DS);
    4.93 @@ -403,7 +413,7 @@ static inline int construct_init_vmcs_gu
    4.94      error |= __vmwrite(GUEST_GS_SELECTOR, GUEST_LAUNCH_DS);
    4.95      error |= __vmwrite(GUEST_CS_SELECTOR, GUEST_LAUNCH_CS);
    4.96  
    4.97 -    /* Guest segment bases */
    4.98 +    /* Guest segment bases. */
    4.99      error |= __vmwrite(GUEST_ES_BASE, 0);
   4.100      error |= __vmwrite(GUEST_SS_BASE, 0);
   4.101      error |= __vmwrite(GUEST_DS_BASE, 0);
   4.102 @@ -411,7 +421,7 @@ static inline int construct_init_vmcs_gu
   4.103      error |= __vmwrite(GUEST_GS_BASE, 0);
   4.104      error |= __vmwrite(GUEST_CS_BASE, 0);
   4.105  
   4.106 -    /* Guest segment Limits */
   4.107 +    /* Guest segment limits. */
   4.108      error |= __vmwrite(GUEST_ES_LIMIT, GUEST_SEGMENT_LIMIT);
   4.109      error |= __vmwrite(GUEST_SS_LIMIT, GUEST_SEGMENT_LIMIT);
   4.110      error |= __vmwrite(GUEST_DS_LIMIT, GUEST_SEGMENT_LIMIT);
   4.111 @@ -419,7 +429,7 @@ static inline int construct_init_vmcs_gu
   4.112      error |= __vmwrite(GUEST_GS_LIMIT, GUEST_SEGMENT_LIMIT);
   4.113      error |= __vmwrite(GUEST_CS_LIMIT, GUEST_SEGMENT_LIMIT);
   4.114  
   4.115 -    /* Guest segment AR bytes */
   4.116 +    /* Guest segment AR bytes. */
   4.117      arbytes.bytes = 0;
   4.118      arbytes.fields.seg_type = 0x3;          /* type = 3 */
   4.119      arbytes.fields.s = 1;                   /* code or data, i.e. not system */
   4.120 @@ -428,131 +438,54 @@ static inline int construct_init_vmcs_gu
   4.121      arbytes.fields.default_ops_size = 1;    /* 32-bit */
   4.122      arbytes.fields.g = 1;
   4.123      arbytes.fields.null_bit = 0;            /* not null */
   4.124 -
   4.125      error |= __vmwrite(GUEST_ES_AR_BYTES, arbytes.bytes);
   4.126      error |= __vmwrite(GUEST_SS_AR_BYTES, arbytes.bytes);
   4.127      error |= __vmwrite(GUEST_DS_AR_BYTES, arbytes.bytes);
   4.128      error |= __vmwrite(GUEST_FS_AR_BYTES, arbytes.bytes);
   4.129      error |= __vmwrite(GUEST_GS_AR_BYTES, arbytes.bytes);
   4.130 -
   4.131      arbytes.fields.seg_type = 0xb;          /* type = 0xb */
   4.132      error |= __vmwrite(GUEST_CS_AR_BYTES, arbytes.bytes);
   4.133  
   4.134 -    /* Guest GDT */
   4.135 +    /* Guest GDT. */
   4.136      error |= __vmwrite(GUEST_GDTR_BASE, 0);
   4.137      error |= __vmwrite(GUEST_GDTR_LIMIT, 0);
   4.138  
   4.139 -    /* Guest IDT */
   4.140 +    /* Guest IDT. */
   4.141      error |= __vmwrite(GUEST_IDTR_BASE, 0);
   4.142      error |= __vmwrite(GUEST_IDTR_LIMIT, 0);
   4.143  
   4.144 -    /* Guest LDT & TSS */
   4.145 +    /* Guest LDT and TSS. */
   4.146      arbytes.fields.s = 0;                   /* not code or data segement */
   4.147      arbytes.fields.seg_type = 0x2;          /* LTD */
   4.148      arbytes.fields.default_ops_size = 0;    /* 16-bit */
   4.149      arbytes.fields.g = 0;
   4.150      error |= __vmwrite(GUEST_LDTR_AR_BYTES, arbytes.bytes);
   4.151 -
   4.152      arbytes.fields.seg_type = 0xb;          /* 32-bit TSS (busy) */
   4.153      error |= __vmwrite(GUEST_TR_AR_BYTES, arbytes.bytes);
   4.154 -    /* CR3 is set in vmx_final_setup_guest */
   4.155  
   4.156      error |= __vmwrite(GUEST_RSP, 0);
   4.157      error |= __vmwrite(GUEST_RIP, regs->eip);
   4.158  
   4.159 -    /* Guest EFLAGS */
   4.160 +    /* Guest EFLAGS. */
   4.161      eflags = regs->eflags & ~HVM_EFLAGS_RESERVED_0; /* clear 0s */
   4.162      eflags |= HVM_EFLAGS_RESERVED_1; /* set 1s */
   4.163      error |= __vmwrite(GUEST_RFLAGS, eflags);
   4.164  
   4.165      error |= __vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0);
   4.166 -    __asm__ __volatile__ ("mov %%dr7, %0\n" : "=r" (dr7));
   4.167 -    error |= __vmwrite(GUEST_DR7, dr7);
   4.168 +    __asm__ __volatile__ ("mov %%dr7, %0\n" : "=r" (tmp));
   4.169 +    error |= __vmwrite(GUEST_DR7, tmp);
   4.170      error |= __vmwrite(VMCS_LINK_POINTER, ~0UL);
   4.171  #if defined(__i386__)
   4.172      error |= __vmwrite(VMCS_LINK_POINTER_HIGH, ~0UL);
   4.173  #endif
   4.174  
   4.175 -    return error;
   4.176 -}
   4.177 -
   4.178 -static inline int construct_vmcs_host(void)
   4.179 -{
   4.180 -    int error = 0;
   4.181 -#ifdef __x86_64__
   4.182 -    unsigned long fs_base;
   4.183 -    unsigned long gs_base;
   4.184 -#endif
   4.185 -    unsigned long crn;
   4.186 -
   4.187 -    /* Host Selectors */
   4.188 -    error |= __vmwrite(HOST_ES_SELECTOR, __HYPERVISOR_DS);
   4.189 -    error |= __vmwrite(HOST_SS_SELECTOR, __HYPERVISOR_DS);
   4.190 -    error |= __vmwrite(HOST_DS_SELECTOR, __HYPERVISOR_DS);
   4.191 -#if defined(__i386__)
   4.192 -    error |= __vmwrite(HOST_FS_SELECTOR, __HYPERVISOR_DS);
   4.193 -    error |= __vmwrite(HOST_GS_SELECTOR, __HYPERVISOR_DS);
   4.194 -    error |= __vmwrite(HOST_FS_BASE, 0);
   4.195 -    error |= __vmwrite(HOST_GS_BASE, 0);
   4.196 -
   4.197 -#else
   4.198 -    rdmsrl(MSR_FS_BASE, fs_base);
   4.199 -    rdmsrl(MSR_GS_BASE, gs_base);
   4.200 -    error |= __vmwrite(HOST_FS_BASE, fs_base);
   4.201 -    error |= __vmwrite(HOST_GS_BASE, gs_base);
   4.202 -
   4.203 -#endif
   4.204 -    error |= __vmwrite(HOST_CS_SELECTOR, __HYPERVISOR_CS);
   4.205 -
   4.206 -    __asm__ __volatile__ ("mov %%cr0,%0" : "=r" (crn) : );
   4.207 -    error |= __vmwrite(HOST_CR0, crn); /* same CR0 */
   4.208 -
   4.209 -    /* CR3 is set in vmx_final_setup_hostos */
   4.210 -    __asm__ __volatile__ ("mov %%cr4,%0" : "=r" (crn) : );
   4.211 -    error |= __vmwrite(HOST_CR4, crn);
   4.212 -
   4.213 -    error |= __vmwrite(HOST_RIP, (unsigned long) vmx_asm_vmexit_handler);
   4.214 -
   4.215 -    return error;
   4.216 -}
   4.217 -
   4.218 -/*
   4.219 - * the working VMCS pointer has been set properly
   4.220 - * just before entering this function.
   4.221 - */
   4.222 -static int construct_vmcs(struct vcpu *v,
   4.223 -                          cpu_user_regs_t *regs)
   4.224 -{
   4.225 -    struct arch_vmx_struct *arch_vmx = &v->arch.hvm_vmx;
   4.226 -    int error;
   4.227 -
   4.228 -    if ( (error = construct_vmcs_controls(arch_vmx)) ) {
   4.229 -        printk("construct_vmcs: construct_vmcs_controls failed.\n");
   4.230 -        return error;
   4.231 -    }
   4.232 -
   4.233 -    /* host selectors */
   4.234 -    if ( (error = construct_vmcs_host()) ) {
   4.235 -        printk("construct_vmcs: construct_vmcs_host failed.\n");
   4.236 -        return error;
   4.237 -    }
   4.238 -
   4.239 -    /* guest selectors */
   4.240 -    if ( (error = construct_init_vmcs_guest(regs)) ) {
   4.241 -        printk("construct_vmcs: construct_vmcs_guest failed.\n");
   4.242 -        return error;
   4.243 -    }
   4.244 -
   4.245 -    if ( (error = __vmwrite(EXCEPTION_BITMAP,
   4.246 -                            MONITOR_DEFAULT_EXCEPTION_BITMAP)) ) {
   4.247 -        printk("construct_vmcs: setting exception bitmap failed.\n");
   4.248 -        return error;
   4.249 -    }
   4.250 +    error |= __vmwrite(EXCEPTION_BITMAP,
   4.251 +                       MONITOR_DEFAULT_EXCEPTION_BITMAP);
   4.252  
   4.253      if ( regs->eflags & EF_TF )
   4.254 -        error = __vm_set_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB);
   4.255 +        error |= __vm_set_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB);
   4.256      else
   4.257 -        error = __vm_clear_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB);
   4.258 +        error |= __vm_clear_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB);
   4.259  
   4.260      return error;
   4.261  }
     5.1 --- a/xen/arch/x86/hvm/vmx/vmx.c	Mon Nov 06 09:46:13 2006 +0000
     5.2 +++ b/xen/arch/x86/hvm/vmx/vmx.c	Mon Nov 06 11:03:04 2006 +0000
     5.3 @@ -566,8 +566,6 @@ static unsigned long vmx_get_ctrl_reg(st
     5.4      return 0;                   /* dummy */
     5.5  }
     5.6  
     5.7 -
     5.8 -
     5.9  /* Make sure that xen intercepts any FP accesses from current */
    5.10  static void vmx_stts(struct vcpu *v)
    5.11  {
    5.12 @@ -591,20 +589,16 @@ static void vmx_stts(struct vcpu *v)
    5.13      }
    5.14  }
    5.15  
    5.16 -
    5.17  static void vmx_set_tsc_offset(struct vcpu *v, u64 offset)
    5.18  {
    5.19 -    /* VMX depends on operating on the current vcpu */
    5.20 -    ASSERT(v == current);
    5.21 -
    5.22 +    vmx_vmcs_enter(v);
    5.23      __vmwrite(TSC_OFFSET, offset);
    5.24  #if defined (__i386__)
    5.25      __vmwrite(TSC_OFFSET_HIGH, offset >> 32);
    5.26  #endif
    5.27 +    vmx_vmcs_exit(v);
    5.28  }
    5.29  
    5.30 -
    5.31 -
    5.32  /* SMP VMX guest support */
    5.33  static void vmx_init_ap_context(struct vcpu_guest_context *ctxt,
    5.34                           int vcpuid, int trampoline_vector)
     6.1 --- a/xen/common/domain.c	Mon Nov 06 09:46:13 2006 +0000
     6.2 +++ b/xen/common/domain.c	Mon Nov 06 11:03:04 2006 +0000
     6.3 @@ -64,12 +64,16 @@ void free_domain(struct domain *d)
     6.4      struct vcpu *v;
     6.5      int i;
     6.6  
     6.7 -    sched_destroy_domain(d);
     6.8 +    for ( i = MAX_VIRT_CPUS-1; i >= 0; i-- )
     6.9 +    {
    6.10 +        if ( (v = d->vcpu[i]) == NULL )
    6.11 +            continue;
    6.12 +        vcpu_destroy(v);
    6.13 +        sched_destroy_vcpu(v);
    6.14 +        free_vcpu_struct(v);
    6.15 +    }
    6.16  
    6.17 -    for ( i = MAX_VIRT_CPUS-1; i >= 0; i-- )
    6.18 -        if ( (v = d->vcpu[i]) != NULL )
    6.19 -            free_vcpu_struct(v);
    6.20 -
    6.21 +    sched_destroy_domain(d);
    6.22      xfree(d);
    6.23  }
    6.24  
    6.25 @@ -80,7 +84,7 @@ struct vcpu *alloc_vcpu(
    6.26  
    6.27      BUG_ON(d->vcpu[vcpu_id] != NULL);
    6.28  
    6.29 -    if ( (v = alloc_vcpu_struct(d, vcpu_id)) == NULL )
    6.30 +    if ( (v = alloc_vcpu_struct()) == NULL )
    6.31          return NULL;
    6.32  
    6.33      v->domain = d;
    6.34 @@ -94,12 +98,19 @@ struct vcpu *alloc_vcpu(
    6.35      if ( (vcpu_id != 0) && !is_idle_domain(d) )
    6.36          set_bit(_VCPUF_down, &v->vcpu_flags);
    6.37  
    6.38 -    if ( sched_init_vcpu(v, cpu_id) < 0 )
    6.39 +    if ( sched_init_vcpu(v, cpu_id) != 0 )
    6.40      {
    6.41          free_vcpu_struct(v);
    6.42          return NULL;
    6.43      }
    6.44  
    6.45 +    if ( vcpu_initialise(v) != 0 )
    6.46 +    {
    6.47 +        sched_destroy_vcpu(v);
    6.48 +        free_vcpu_struct(v);
    6.49 +        return NULL;
    6.50 +    }
    6.51 +
    6.52      d->vcpu[vcpu_id] = v;
    6.53      if ( vcpu_id != 0 )
    6.54          d->vcpu[v->vcpu_id-1]->next_in_list = v;
    6.55 @@ -153,6 +164,9 @@ struct domain *domain_create(domid_t dom
    6.56      if ( (d->iomem_caps == NULL) || (d->irq_caps == NULL) )
    6.57          goto fail4;
    6.58  
    6.59 +    if ( sched_init_domain(d) != 0 )
    6.60 +        goto fail4;
    6.61 +
    6.62      if ( !is_idle_domain(d) )
    6.63      {
    6.64          write_lock(&domlist_lock);
     7.1 --- a/xen/common/sched_credit.c	Mon Nov 06 09:46:13 2006 +0000
     7.2 +++ b/xen/common/sched_credit.c	Mon Nov 06 11:03:04 2006 +0000
     7.3 @@ -115,8 +115,10 @@
     7.4      _MACRO(steal_peer_idle)                 \
     7.5      _MACRO(steal_peer_running)              \
     7.6      _MACRO(steal_peer_pinned)               \
     7.7 +    _MACRO(dom_init)                        \
     7.8 +    _MACRO(dom_destroy)                     \
     7.9      _MACRO(vcpu_init)                       \
    7.10 -    _MACRO(dom_destroy)
    7.11 +    _MACRO(vcpu_destroy)
    7.12  
    7.13  #ifndef NDEBUG
    7.14  #define CSCHED_STATS_EXPAND_CHECKS(_MACRO)  \
    7.15 @@ -454,43 +456,14 @@ static int
    7.16  csched_vcpu_init(struct vcpu *vc)
    7.17  {
    7.18      struct domain * const dom = vc->domain;
    7.19 -    struct csched_dom *sdom;
    7.20 +    struct csched_dom *sdom = CSCHED_DOM(dom);
    7.21      struct csched_vcpu *svc;
    7.22 -    int16_t pri;
    7.23  
    7.24      CSCHED_STAT_CRANK(vcpu_init);
    7.25  
    7.26 -    /* Allocate, if appropriate, per-domain info */
    7.27 -    if ( is_idle_vcpu(vc) )
    7.28 -    {
    7.29 -        sdom = NULL;
    7.30 -        pri = CSCHED_PRI_IDLE;
    7.31 -    }
    7.32 -    else if ( CSCHED_DOM(dom) )
    7.33 -    {
    7.34 -        sdom = CSCHED_DOM(dom);
    7.35 -        pri = CSCHED_PRI_TS_UNDER;
    7.36 -    }
    7.37 -    else 
    7.38 -    {
    7.39 -        sdom = xmalloc(struct csched_dom);
    7.40 -        if ( !sdom )
    7.41 -            return -1;
    7.42 -
    7.43 -        /* Initialize credit and weight */
    7.44 -        INIT_LIST_HEAD(&sdom->active_vcpu);
    7.45 -        sdom->active_vcpu_count = 0;
    7.46 -        INIT_LIST_HEAD(&sdom->active_sdom_elem);
    7.47 -        sdom->dom = dom;
    7.48 -        sdom->weight = CSCHED_DEFAULT_WEIGHT;
    7.49 -        sdom->cap = 0U;
    7.50 -        dom->sched_priv = sdom;
    7.51 -        pri = CSCHED_PRI_TS_UNDER;
    7.52 -    }
    7.53 -
    7.54      /* Allocate per-VCPU info */
    7.55      svc = xmalloc(struct csched_vcpu);
    7.56 -    if ( !svc )
    7.57 +    if ( svc == NULL )
    7.58          return -1;
    7.59  
    7.60      INIT_LIST_HEAD(&svc->runq_elem);
    7.61 @@ -498,7 +471,7 @@ csched_vcpu_init(struct vcpu *vc)
    7.62      svc->sdom = sdom;
    7.63      svc->vcpu = vc;
    7.64      atomic_set(&svc->credit, 0);
    7.65 -    svc->pri = pri;
    7.66 +    svc->pri = is_idle_domain(dom) ? CSCHED_PRI_IDLE : CSCHED_PRI_TS_UNDER;
    7.67      memset(&svc->stats, 0, sizeof(svc->stats));
    7.68      vc->sched_priv = svc;
    7.69  
    7.70 @@ -521,12 +494,14 @@ csched_vcpu_init(struct vcpu *vc)
    7.71  }
    7.72  
    7.73  static void
    7.74 -csched_vcpu_free(struct vcpu *vc)
    7.75 +csched_vcpu_destroy(struct vcpu *vc)
    7.76  {
    7.77      struct csched_vcpu * const svc = CSCHED_VCPU(vc);
    7.78      struct csched_dom * const sdom = svc->sdom;
    7.79      unsigned long flags;
    7.80  
    7.81 +    CSCHED_STAT_CRANK(vcpu_destroy);
    7.82 +
    7.83      BUG_ON( sdom == NULL );
    7.84      BUG_ON( !list_empty(&svc->runq_elem) );
    7.85  
    7.86 @@ -641,20 +616,39 @@ csched_dom_cntl(
    7.87      return 0;
    7.88  }
    7.89  
    7.90 +static int
    7.91 +csched_dom_init(struct domain *dom)
    7.92 +{
    7.93 +    struct csched_dom *sdom;
    7.94 +
    7.95 +    CSCHED_STAT_CRANK(dom_init);
    7.96 +
    7.97 +    if ( is_idle_domain(dom) )
    7.98 +        return 0;
    7.99 +
   7.100 +    sdom = xmalloc(struct csched_dom);
   7.101 +    if ( sdom == NULL )
   7.102 +        return -ENOMEM;
   7.103 +
   7.104 +    /* Initialize credit and weight */
   7.105 +    INIT_LIST_HEAD(&sdom->active_vcpu);
   7.106 +    sdom->active_vcpu_count = 0;
   7.107 +    INIT_LIST_HEAD(&sdom->active_sdom_elem);
   7.108 +    sdom->dom = dom;
   7.109 +    sdom->weight = CSCHED_DEFAULT_WEIGHT;
   7.110 +    sdom->cap = 0U;
   7.111 +    dom->sched_priv = sdom;
   7.112 +
   7.113 +    return 0;
   7.114 +}
   7.115 +
   7.116  static void
   7.117  csched_dom_destroy(struct domain *dom)
   7.118  {
   7.119      struct csched_dom * const sdom = CSCHED_DOM(dom);
   7.120 -    int i;
   7.121  
   7.122      CSCHED_STAT_CRANK(dom_destroy);
   7.123  
   7.124 -    for ( i = 0; i < MAX_VIRT_CPUS; i++ )
   7.125 -    {
   7.126 -        if ( dom->vcpu[i] )
   7.127 -            csched_vcpu_free(dom->vcpu[i]);
   7.128 -    }
   7.129 -
   7.130      xfree(sdom);
   7.131  }
   7.132  
   7.133 @@ -1226,8 +1220,11 @@ struct scheduler sched_credit_def = {
   7.134      .opt_name       = "credit",
   7.135      .sched_id       = XEN_SCHEDULER_CREDIT,
   7.136  
   7.137 +    .init_domain    = csched_dom_init,
   7.138 +    .destroy_domain = csched_dom_destroy,
   7.139 +
   7.140      .init_vcpu      = csched_vcpu_init,
   7.141 -    .destroy_domain = csched_dom_destroy,
   7.142 +    .destroy_vcpu   = csched_vcpu_destroy,
   7.143  
   7.144      .sleep          = csched_vcpu_sleep,
   7.145      .wake           = csched_vcpu_wake,
     8.1 --- a/xen/common/sched_sedf.c	Mon Nov 06 09:46:13 2006 +0000
     8.2 +++ b/xen/common/sched_sedf.c	Mon Nov 06 11:03:04 2006 +0000
     8.3 @@ -333,14 +333,6 @@ static int sedf_init_vcpu(struct vcpu *v
     8.4  {
     8.5      struct sedf_vcpu_info *inf;
     8.6  
     8.7 -    if ( v->domain->sched_priv == NULL )
     8.8 -    {
     8.9 -        v->domain->sched_priv = xmalloc(struct sedf_dom_info);
    8.10 -        if ( v->domain->sched_priv == NULL )
    8.11 -            return -1;
    8.12 -        memset(v->domain->sched_priv, 0, sizeof(struct sedf_dom_info));
    8.13 -    }
    8.14 -
    8.15      if ( (v->sched_priv = xmalloc(struct sedf_vcpu_info)) == NULL )
    8.16          return -1;
    8.17      memset(v->sched_priv, 0, sizeof(struct sedf_vcpu_info));
    8.18 @@ -398,15 +390,25 @@ static int sedf_init_vcpu(struct vcpu *v
    8.19      return 0;
    8.20  }
    8.21  
    8.22 +static void sedf_destroy_vcpu(struct vcpu *v)
    8.23 +{
    8.24 +    xfree(v->sched_priv);
    8.25 +}
    8.26 +
    8.27 +static int sedf_init_domain(struct domain *d)
    8.28 +{
    8.29 +    d->sched_priv = xmalloc(struct sedf_dom_info);
    8.30 +    if ( d->sched_priv == NULL )
    8.31 +        return -ENOMEM;
    8.32 +
    8.33 +    memset(d->sched_priv, 0, sizeof(struct sedf_dom_info));
    8.34 +
    8.35 +    return 0;
    8.36 +}
    8.37 +
    8.38  static void sedf_destroy_domain(struct domain *d)
    8.39  {
    8.40 -    int i;
    8.41 -
    8.42      xfree(d->sched_priv);
    8.43 - 
    8.44 -    for ( i = 0; i < MAX_VIRT_CPUS; i++ )
    8.45 -        if ( d->vcpu[i] )
    8.46 -            xfree(d->vcpu[i]->sched_priv);
    8.47  }
    8.48  
    8.49  /*
    8.50 @@ -1427,8 +1429,11 @@ struct scheduler sched_sedf_def = {
    8.51      .opt_name = "sedf",
    8.52      .sched_id = XEN_SCHEDULER_SEDF,
    8.53      
    8.54 +    .init_domain    = sedf_init_domain,
    8.55 +    .destroy_domain = sedf_destroy_domain,
    8.56 +
    8.57      .init_vcpu      = sedf_init_vcpu,
    8.58 -    .destroy_domain = sedf_destroy_domain,
    8.59 +    .destroy_vcpu   = sedf_destroy_vcpu,
    8.60  
    8.61      .do_schedule    = sedf_do_schedule,
    8.62      .dump_cpu_state = sedf_dump_cpu_state,
     9.1 --- a/xen/common/schedule.c	Mon Nov 06 09:46:13 2006 +0000
     9.2 +++ b/xen/common/schedule.c	Mon Nov 06 11:03:04 2006 +0000
     9.3 @@ -132,17 +132,20 @@ int sched_init_vcpu(struct vcpu *v, unsi
     9.4      return SCHED_OP(init_vcpu, v);
     9.5  }
     9.6  
     9.7 +void sched_destroy_vcpu(struct vcpu *v)
     9.8 +{
     9.9 +    kill_timer(&v->timer);
    9.10 +    kill_timer(&v->poll_timer);
    9.11 +    SCHED_OP(destroy_vcpu, v);
    9.12 +}
    9.13 +
    9.14 +int sched_init_domain(struct domain *d)
    9.15 +{
    9.16 +    return SCHED_OP(init_domain, d);
    9.17 +}
    9.18 +
    9.19  void sched_destroy_domain(struct domain *d)
    9.20  {
    9.21 -    struct vcpu *v;
    9.22 -
    9.23 -    for_each_vcpu ( d, v )
    9.24 -    {
    9.25 -        kill_timer(&v->timer);
    9.26 -        kill_timer(&v->poll_timer);
    9.27 -        TRACE_2D(TRC_SCHED_DOM_REM, v->domain->domain_id, v->vcpu_id);
    9.28 -    }
    9.29 -
    9.30      SCHED_OP(destroy_domain, d);
    9.31  }
    9.32  
    10.1 --- a/xen/include/xen/domain.h	Mon Nov 06 09:46:13 2006 +0000
    10.2 +++ b/xen/include/xen/domain.h	Mon Nov 06 11:03:04 2006 +0000
    10.3 @@ -15,9 +15,19 @@ void free_domain(struct domain *d);
    10.4   * Arch-specifics.
    10.5   */
    10.6  
    10.7 -struct vcpu *alloc_vcpu_struct(struct domain *d, unsigned int vcpu_id);
    10.8 +/* Allocate/free a VCPU structure. */
    10.9 +struct vcpu *alloc_vcpu_struct(void);
   10.10 +void free_vcpu_struct(struct vcpu *v);
   10.11  
   10.12 -void free_vcpu_struct(struct vcpu *v);
   10.13 +/*
   10.14 + * Initialise/destroy arch-specific details of a VCPU.
   10.15 + *  - vcpu_initialise() is called after the basic generic fields of the
   10.16 + *    VCPU structure are initialised. Many operations can be applied to the
   10.17 + *    VCPU at this point (e.g., vcpu_pause()).
   10.18 + *  - vcpu_destroy() is called only if vcpu_initialise() previously succeeded.
   10.19 + */
   10.20 +int  vcpu_initialise(struct vcpu *v);
   10.21 +void vcpu_destroy(struct vcpu *v);
   10.22  
   10.23  int arch_domain_create(struct domain *d);
   10.24  
    11.1 --- a/xen/include/xen/sched-if.h	Mon Nov 06 09:46:13 2006 +0000
    11.2 +++ b/xen/include/xen/sched-if.h	Mon Nov 06 11:03:04 2006 +0000
    11.3 @@ -63,8 +63,11 @@ struct scheduler {
    11.4      void         (*init)           (void);
    11.5      void         (*tick)           (unsigned int cpu);
    11.6  
    11.7 +    int          (*init_domain)    (struct domain *);
    11.8 +    void         (*destroy_domain) (struct domain *);
    11.9 +
   11.10      int          (*init_vcpu)      (struct vcpu *);
   11.11 -    void         (*destroy_domain) (struct domain *);
   11.12 +    void         (*destroy_vcpu)   (struct vcpu *);
   11.13  
   11.14      void         (*sleep)          (struct vcpu *);
   11.15      void         (*wake)           (struct vcpu *);
    12.1 --- a/xen/include/xen/sched.h	Mon Nov 06 09:46:13 2006 +0000
    12.2 +++ b/xen/include/xen/sched.h	Mon Nov 06 11:03:04 2006 +0000
    12.3 @@ -291,7 +291,9 @@ void new_thread(struct vcpu *d,
    12.4  void scheduler_init(void);
    12.5  void schedulers_start(void);
    12.6  int  sched_init_vcpu(struct vcpu *v, unsigned int processor);
    12.7 -void sched_destroy_domain(struct domain *);
    12.8 +void sched_destroy_vcpu(struct vcpu *v);
    12.9 +int  sched_init_domain(struct domain *d);
   12.10 +void sched_destroy_domain(struct domain *d);
   12.11  long sched_adjust(struct domain *, struct xen_domctl_scheduler_op *);
   12.12  int  sched_id(void);
   12.13  void vcpu_wake(struct vcpu *d);