From: Zhang Xiantao Date: Tue, 15 Jan 2013 10:17:01 +0000 (+0100) Subject: EPT: Make ept data structure or operations neutral X-Git-Url: http://xenbits.xensource.com/gitweb?a=commitdiff_plain;h=90805dcb16a1f4132b9dec0726cc56de4d771cc5;p=people%2Faperard%2Fxen-arm.git EPT: Make ept data structure or operations neutral Share the current EPT logic with nested EPT case, so make the related data structure or operations netural to comment EPT and nested EPT. Signed-off-by: Zhang Xiantao Acked-by: Tim Deegan Acked-by: Jun Nakajima Acked-by: Eddie Dong Committed-by: Jan Beulich --- diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c index 9adc7a453a..de22e032df 100644 --- a/xen/arch/x86/hvm/vmx/vmcs.c +++ b/xen/arch/x86/hvm/vmx/vmcs.c @@ -942,7 +942,13 @@ static int construct_vmcs(struct vcpu *v) } if ( paging_mode_hap(d) ) - __vmwrite(EPT_POINTER, d->arch.hvm_domain.vmx.ept_control.eptp); + { + struct p2m_domain *p2m = p2m_get_hostp2m(d); + struct ept_data *ept = &p2m->ept; + + ept->asr = pagetable_get_pfn(p2m_get_pagetable(p2m)); + __vmwrite(EPT_POINTER, ept_get_eptp(ept)); + } if ( cpu_has_vmx_pat && paging_mode_hap(d) ) { diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c index 4abfa90f1c..d74aae0855 100644 --- a/xen/arch/x86/hvm/vmx/vmx.c +++ b/xen/arch/x86/hvm/vmx/vmx.c @@ -74,38 +74,19 @@ static void vmx_fpu_dirty_intercept(void); static int vmx_msr_read_intercept(unsigned int msr, uint64_t *msr_content); static int vmx_msr_write_intercept(unsigned int msr, uint64_t msr_content); static void vmx_invlpg_intercept(unsigned long vaddr); -static void __ept_sync_domain(void *info); static int vmx_domain_initialise(struct domain *d) { int rc; - /* Set the memory type used when accessing EPT paging structures. */ - d->arch.hvm_domain.vmx.ept_control.ept_mt = EPT_DEFAULT_MT; - - /* set EPT page-walk length, now it's actual walk length - 1, i.e. 3 */ - d->arch.hvm_domain.vmx.ept_control.ept_wl = 3; - - d->arch.hvm_domain.vmx.ept_control.asr = - pagetable_get_pfn(p2m_get_pagetable(p2m_get_hostp2m(d))); - - if ( !zalloc_cpumask_var(&d->arch.hvm_domain.vmx.ept_synced) ) - return -ENOMEM; - if ( (rc = vmx_alloc_vlapic_mapping(d)) != 0 ) - { - free_cpumask_var(d->arch.hvm_domain.vmx.ept_synced); return rc; - } return 0; } static void vmx_domain_destroy(struct domain *d) { - if ( paging_mode_hap(d) ) - on_each_cpu(__ept_sync_domain, d, 1); - free_cpumask_var(d->arch.hvm_domain.vmx.ept_synced); vmx_free_vlapic_mapping(d); } @@ -641,6 +622,7 @@ static void vmx_ctxt_switch_to(struct vcpu *v) { struct domain *d = v->domain; unsigned long old_cr4 = read_cr4(), new_cr4 = mmu_cr4_features; + struct ept_data *ept_data = &p2m_get_hostp2m(d)->ept; /* HOST_CR4 in VMCS is always mmu_cr4_features. Sync CR4 now. */ if ( old_cr4 != new_cr4 ) @@ -650,10 +632,10 @@ static void vmx_ctxt_switch_to(struct vcpu *v) { unsigned int cpu = smp_processor_id(); /* Test-and-test-and-set this CPU in the EPT-is-synced mask. */ - if ( !cpumask_test_cpu(cpu, d->arch.hvm_domain.vmx.ept_synced) && + if ( !cpumask_test_cpu(cpu, ept_get_synced_mask(ept_data)) && !cpumask_test_and_set_cpu(cpu, - d->arch.hvm_domain.vmx.ept_synced) ) - __invept(INVEPT_SINGLE_CONTEXT, ept_get_eptp(d), 0); + ept_get_synced_mask(ept_data)) ) + __invept(INVEPT_SINGLE_CONTEXT, ept_get_eptp(ept_data), 0); } vmx_restore_guest_msrs(v); @@ -1216,33 +1198,6 @@ static void vmx_update_guest_efer(struct vcpu *v) (v->arch.hvm_vcpu.guest_efer & EFER_SCE)); } -static void __ept_sync_domain(void *info) -{ - struct domain *d = info; - __invept(INVEPT_SINGLE_CONTEXT, ept_get_eptp(d), 0); -} - -void ept_sync_domain(struct domain *d) -{ - /* Only if using EPT and this domain has some VCPUs to dirty. */ - if ( !paging_mode_hap(d) || !d->vcpu || !d->vcpu[0] ) - return; - - ASSERT(local_irq_is_enabled()); - - /* - * Flush active cpus synchronously. Flush others the next time this domain - * is scheduled onto them. We accept the race of other CPUs adding to - * the ept_synced mask before on_selected_cpus() reads it, resulting in - * unnecessary extra flushes, to avoid allocating a cpumask_t on the stack. - */ - cpumask_and(d->arch.hvm_domain.vmx.ept_synced, - d->domain_dirty_cpumask, &cpu_online_map); - - on_selected_cpus(d->arch.hvm_domain.vmx.ept_synced, - __ept_sync_domain, d, 1); -} - void nvmx_enqueue_n2_exceptions(struct vcpu *v, unsigned long intr_fields, int error_code) { diff --git a/xen/arch/x86/mm/p2m-ept.c b/xen/arch/x86/mm/p2m-ept.c index c964f542b4..db3e3d45d3 100644 --- a/xen/arch/x86/mm/p2m-ept.c +++ b/xen/arch/x86/mm/p2m-ept.c @@ -291,9 +291,11 @@ ept_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn, int need_modify_vtd_table = 1; int vtd_pte_present = 0; int needs_sync = 1; - struct domain *d = p2m->domain; ept_entry_t old_entry = { .epte = 0 }; + struct ept_data *ept = &p2m->ept; + struct domain *d = p2m->domain; + ASSERT(ept); /* * the caller must make sure: * 1. passing valid gfn and mfn at order boundary. @@ -301,17 +303,17 @@ ept_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn, * 3. passing a valid order. */ if ( ((gfn | mfn_x(mfn)) & ((1UL << order) - 1)) || - ((u64)gfn >> ((ept_get_wl(d) + 1) * EPT_TABLE_ORDER)) || + ((u64)gfn >> ((ept_get_wl(ept) + 1) * EPT_TABLE_ORDER)) || (order % EPT_TABLE_ORDER) ) return 0; - ASSERT((target == 2 && hvm_hap_has_1gb(d)) || - (target == 1 && hvm_hap_has_2mb(d)) || + ASSERT((target == 2 && hvm_hap_has_1gb()) || + (target == 1 && hvm_hap_has_2mb()) || (target == 0)); - table = map_domain_page(ept_get_asr(d)); + table = map_domain_page(pagetable_get_pfn(p2m_get_pagetable(p2m))); - for ( i = ept_get_wl(d); i > target; i-- ) + for ( i = ept_get_wl(ept); i > target; i-- ) { ret = ept_next_level(p2m, 0, &table, &gfn_remainder, i); if ( !ret ) @@ -439,9 +441,11 @@ out: unmap_domain_page(table); if ( needs_sync ) - ept_sync_domain(p2m->domain); + ept_sync_domain(p2m); - if ( rv && iommu_enabled && need_iommu(p2m->domain) && need_modify_vtd_table ) + /* For non-nested p2m, may need to change VT-d page table.*/ + if ( rv && !p2m_is_nestedp2m(p2m) && iommu_enabled && + need_iommu(p2m->domain) && need_modify_vtd_table ) { if ( iommu_hap_pt_share ) iommu_pte_flush(d, gfn, (u64*)ept_entry, order, vtd_pte_present); @@ -488,14 +492,14 @@ static mfn_t ept_get_entry(struct p2m_domain *p2m, unsigned long gfn, p2m_type_t *t, p2m_access_t* a, p2m_query_t q, unsigned int *page_order) { - struct domain *d = p2m->domain; - ept_entry_t *table = map_domain_page(ept_get_asr(d)); + ept_entry_t *table = map_domain_page(pagetable_get_pfn(p2m_get_pagetable(p2m))); unsigned long gfn_remainder = gfn; ept_entry_t *ept_entry; u32 index; int i; int ret = 0; mfn_t mfn = _mfn(INVALID_MFN); + struct ept_data *ept = &p2m->ept; *t = p2m_mmio_dm; *a = p2m_access_n; @@ -506,7 +510,7 @@ static mfn_t ept_get_entry(struct p2m_domain *p2m, /* Should check if gfn obeys GAW here. */ - for ( i = ept_get_wl(d); i > 0; i-- ) + for ( i = ept_get_wl(ept); i > 0; i-- ) { retry: ret = ept_next_level(p2m, 1, &table, &gfn_remainder, i); @@ -588,19 +592,20 @@ out: static ept_entry_t ept_get_entry_content(struct p2m_domain *p2m, unsigned long gfn, int *level) { - ept_entry_t *table = map_domain_page(ept_get_asr(p2m->domain)); + ept_entry_t *table = map_domain_page(pagetable_get_pfn(p2m_get_pagetable(p2m))); unsigned long gfn_remainder = gfn; ept_entry_t *ept_entry; ept_entry_t content = { .epte = 0 }; u32 index; int i; int ret=0; + struct ept_data *ept = &p2m->ept; /* This pfn is higher than the highest the p2m map currently holds */ if ( gfn > p2m->max_mapped_pfn ) goto out; - for ( i = ept_get_wl(p2m->domain); i > 0; i-- ) + for ( i = ept_get_wl(ept); i > 0; i-- ) { ret = ept_next_level(p2m, 1, &table, &gfn_remainder, i); if ( !ret || ret == GUEST_TABLE_POD_PAGE ) @@ -622,7 +627,8 @@ static ept_entry_t ept_get_entry_content(struct p2m_domain *p2m, void ept_walk_table(struct domain *d, unsigned long gfn) { struct p2m_domain *p2m = p2m_get_hostp2m(d); - ept_entry_t *table = map_domain_page(ept_get_asr(d)); + struct ept_data *ept = &p2m->ept; + ept_entry_t *table = map_domain_page(pagetable_get_pfn(p2m_get_pagetable(p2m))); unsigned long gfn_remainder = gfn; int i; @@ -638,7 +644,7 @@ void ept_walk_table(struct domain *d, unsigned long gfn) goto out; } - for ( i = ept_get_wl(d); i >= 0; i-- ) + for ( i = ept_get_wl(ept); i >= 0; i-- ) { ept_entry_t *ept_entry, *next; u32 index; @@ -778,24 +784,76 @@ static void ept_change_entry_type_page(mfn_t ept_page_mfn, int ept_page_level, static void ept_change_entry_type_global(struct p2m_domain *p2m, p2m_type_t ot, p2m_type_t nt) { - struct domain *d = p2m->domain; - if ( ept_get_asr(d) == 0 ) + struct ept_data *ept = &p2m->ept; + if ( ept_get_asr(ept) == 0 ) return; BUG_ON(p2m_is_grant(ot) || p2m_is_grant(nt)); BUG_ON(ot != nt && (ot == p2m_mmio_direct || nt == p2m_mmio_direct)); - ept_change_entry_type_page(_mfn(ept_get_asr(d)), ept_get_wl(d), ot, nt); + ept_change_entry_type_page(_mfn(ept_get_asr(ept)), + ept_get_wl(ept), ot, nt); + + ept_sync_domain(p2m); +} + +static void __ept_sync_domain(void *info) +{ + struct ept_data *ept = &((struct p2m_domain *)info)->ept; - ept_sync_domain(d); + __invept(INVEPT_SINGLE_CONTEXT, ept_get_eptp(ept), 0); } -void ept_p2m_init(struct p2m_domain *p2m) +void ept_sync_domain(struct p2m_domain *p2m) { + struct domain *d = p2m->domain; + struct ept_data *ept = &p2m->ept; + /* Only if using EPT and this domain has some VCPUs to dirty. */ + if ( !paging_mode_hap(d) || !d->vcpu || !d->vcpu[0] ) + return; + + ASSERT(local_irq_is_enabled()); + + /* + * Flush active cpus synchronously. Flush others the next time this domain + * is scheduled onto them. We accept the race of other CPUs adding to + * the ept_synced mask before on_selected_cpus() reads it, resulting in + * unnecessary extra flushes, to avoid allocating a cpumask_t on the stack. + */ + cpumask_and(ept_get_synced_mask(ept), + d->domain_dirty_cpumask, &cpu_online_map); + + on_selected_cpus(ept_get_synced_mask(ept), + __ept_sync_domain, p2m, 1); +} + +int ept_p2m_init(struct p2m_domain *p2m) +{ + struct ept_data *ept = &p2m->ept; + p2m->set_entry = ept_set_entry; p2m->get_entry = ept_get_entry; p2m->change_entry_type_global = ept_change_entry_type_global; p2m->audit_p2m = NULL; + + /* Set the memory type used when accessing EPT paging structures. */ + ept->ept_mt = EPT_DEFAULT_MT; + + /* set EPT page-walk length, now it's actual walk length - 1, i.e. 3 */ + ept->ept_wl = 3; + + if ( !zalloc_cpumask_var(&ept->synced_mask) ) + return -ENOMEM; + + on_each_cpu(__ept_sync_domain, p2m, 1); + + return 0; +} + +void ept_p2m_uninit(struct p2m_domain *p2m) +{ + struct ept_data *ept = &p2m->ept; + free_cpumask_var(ept->synced_mask); } static void ept_dump_p2m_table(unsigned char key) @@ -811,6 +869,7 @@ static void ept_dump_p2m_table(unsigned char key) unsigned long gfn, gfn_remainder; unsigned long record_counter = 0; struct p2m_domain *p2m; + struct ept_data *ept; for_each_domain(d) { @@ -818,15 +877,16 @@ static void ept_dump_p2m_table(unsigned char key) continue; p2m = p2m_get_hostp2m(d); + ept = &p2m->ept; printk("\ndomain%d EPT p2m table: \n", d->domain_id); for ( gfn = 0; gfn <= p2m->max_mapped_pfn; gfn += (1 << order) ) { gfn_remainder = gfn; mfn = _mfn(INVALID_MFN); - table = map_domain_page(ept_get_asr(d)); + table = map_domain_page(pagetable_get_pfn(p2m_get_pagetable(p2m))); - for ( i = ept_get_wl(d); i > 0; i-- ) + for ( i = ept_get_wl(ept); i > 0; i-- ) { ret = ept_next_level(p2m, 1, &table, &gfn_remainder, i); if ( ret != GUEST_TABLE_NORMAL_PAGE ) diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c index 41a461b55b..de1dd822ea 100644 --- a/xen/arch/x86/mm/p2m.c +++ b/xen/arch/x86/mm/p2m.c @@ -57,8 +57,10 @@ boolean_param("hap_2mb", opt_hap_2mb); /* Init the datastructures for later use by the p2m code */ -static void p2m_initialise(struct domain *d, struct p2m_domain *p2m) +static int p2m_initialise(struct domain *d, struct p2m_domain *p2m) { + int ret = 0; + mm_rwlock_init(&p2m->lock); mm_lock_init(&p2m->pod.lock); INIT_LIST_HEAD(&p2m->np2m_list); @@ -72,27 +74,82 @@ static void p2m_initialise(struct domain *d, struct p2m_domain *p2m) p2m->np2m_base = P2M_BASE_EADDR; if ( hap_enabled(d) && cpu_has_vmx ) - ept_p2m_init(p2m); + ret = ept_p2m_init(p2m); else p2m_pt_init(p2m); - return; + return ret; +} + +static struct p2m_domain *p2m_init_one(struct domain *d) +{ + struct p2m_domain *p2m = xzalloc(struct p2m_domain); + + if ( !p2m ) + return NULL; + + if ( !zalloc_cpumask_var(&p2m->dirty_cpumask) ) + goto free_p2m; + + if ( p2m_initialise(d, p2m) ) + goto free_cpumask; + return p2m; + +free_cpumask: + free_cpumask_var(p2m->dirty_cpumask); +free_p2m: + xfree(p2m); + return NULL; } -static int -p2m_init_nestedp2m(struct domain *d) +static void p2m_free_one(struct p2m_domain *p2m) +{ + if ( hap_enabled(p2m->domain) && cpu_has_vmx ) + ept_p2m_uninit(p2m); + free_cpumask_var(p2m->dirty_cpumask); + xfree(p2m); +} + +static int p2m_init_hostp2m(struct domain *d) +{ + struct p2m_domain *p2m = p2m_init_one(d); + + if ( p2m ) + { + d->arch.p2m = p2m; + return 0; + } + return -ENOMEM; +} + +static void p2m_teardown_hostp2m(struct domain *d) +{ + /* Iterate over all p2m tables per domain */ + struct p2m_domain *p2m = p2m_get_hostp2m(d); + + if ( p2m ) + { + p2m_free_one(p2m); + d->arch.p2m = NULL; + } +} + +static void p2m_teardown_nestedp2m(struct domain *d); + +static int p2m_init_nestedp2m(struct domain *d) { uint8_t i; struct p2m_domain *p2m; mm_lock_init(&d->arch.nested_p2m_lock); - for (i = 0; i < MAX_NESTEDP2M; i++) { - d->arch.nested_p2m[i] = p2m = xzalloc(struct p2m_domain); - if (p2m == NULL) - return -ENOMEM; - if ( !zalloc_cpumask_var(&p2m->dirty_cpumask) ) + for (i = 0; i < MAX_NESTEDP2M; i++) + { + d->arch.nested_p2m[i] = p2m = p2m_init_one(d); + if ( p2m == NULL ) + { + p2m_teardown_nestedp2m(d); return -ENOMEM; - p2m_initialise(d, p2m); + } p2m->write_p2m_entry = nestedp2m_write_p2m_entry; list_add(&p2m->np2m_list, &p2m_get_hostp2m(d)->np2m_list); } @@ -100,27 +157,37 @@ p2m_init_nestedp2m(struct domain *d) return 0; } -int p2m_init(struct domain *d) +static void p2m_teardown_nestedp2m(struct domain *d) { + uint8_t i; struct p2m_domain *p2m; - int rc; - p2m_get_hostp2m(d) = p2m = xzalloc(struct p2m_domain); - if ( p2m == NULL ) - return -ENOMEM; - if ( !zalloc_cpumask_var(&p2m->dirty_cpumask) ) + for (i = 0; i < MAX_NESTEDP2M; i++) { - xfree(p2m); - return -ENOMEM; + if ( !d->arch.nested_p2m[i] ) + continue; + p2m = d->arch.nested_p2m[i]; + list_del(&p2m->np2m_list); + p2m_free_one(p2m); + d->arch.nested_p2m[i] = NULL; } - p2m_initialise(d, p2m); +} + +int p2m_init(struct domain *d) +{ + int rc; + + rc = p2m_init_hostp2m(d); + if ( rc ) + return rc; /* Must initialise nestedp2m unconditionally * since nestedhvm_enabled(d) returns false here. * (p2m_init runs too early for HVM_PARAM_* options) */ rc = p2m_init_nestedp2m(d); - if ( rc ) - p2m_final_teardown(d); + if ( rc ) + p2m_teardown_hostp2m(d); + return rc; } @@ -421,28 +488,12 @@ void p2m_teardown(struct p2m_domain *p2m) p2m_unlock(p2m); } -static void p2m_teardown_nestedp2m(struct domain *d) -{ - uint8_t i; - - for (i = 0; i < MAX_NESTEDP2M; i++) { - if ( !d->arch.nested_p2m[i] ) - continue; - free_cpumask_var(d->arch.nested_p2m[i]->dirty_cpumask); - xfree(d->arch.nested_p2m[i]); - d->arch.nested_p2m[i] = NULL; - } -} - void p2m_final_teardown(struct domain *d) { /* Iterate over all p2m tables per domain */ - if ( d->arch.p2m ) - { - free_cpumask_var(d->arch.p2m->dirty_cpumask); - xfree(d->arch.p2m); - d->arch.p2m = NULL; - } + struct p2m_domain *p2m = p2m_get_hostp2m(d); + if ( p2m ) + p2m_teardown_hostp2m(d); /* We must teardown unconditionally because * we initialise them unconditionally. diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h b/xen/include/asm-x86/hvm/vmx/vmcs.h index 4e183c4358..51df81e4f8 100644 --- a/xen/include/asm-x86/hvm/vmx/vmcs.h +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h @@ -56,26 +56,27 @@ struct vmx_msr_state { #define EPT_DEFAULT_MT MTRR_TYPE_WRBACK -struct vmx_domain { - unsigned long apic_access_mfn; +struct ept_data { union { - struct { + struct { u64 ept_mt :3, ept_wl :3, rsvd :6, asr :52; }; u64 eptp; - } ept_control; - cpumask_var_t ept_synced; + }; + cpumask_var_t synced_mask; +}; + +struct vmx_domain { + unsigned long apic_access_mfn; }; -#define ept_get_wl(d) \ - ((d)->arch.hvm_domain.vmx.ept_control.ept_wl) -#define ept_get_asr(d) \ - ((d)->arch.hvm_domain.vmx.ept_control.asr) -#define ept_get_eptp(d) \ - ((d)->arch.hvm_domain.vmx.ept_control.eptp) +#define ept_get_wl(ept) ((ept)->ept_wl) +#define ept_get_asr(ept) ((ept)->asr) +#define ept_get_eptp(ept) ((ept)->eptp) +#define ept_get_synced_mask(ept) ((ept)->synced_mask) struct arch_vmx_struct { /* Virtual address of VMCS. */ diff --git a/xen/include/asm-x86/hvm/vmx/vmx.h b/xen/include/asm-x86/hvm/vmx/vmx.h index c73946f99f..d4d6feb461 100644 --- a/xen/include/asm-x86/hvm/vmx/vmx.h +++ b/xen/include/asm-x86/hvm/vmx/vmx.h @@ -363,7 +363,7 @@ static inline void ept_sync_all(void) __invept(INVEPT_ALL_CONTEXT, 0, 0); } -void ept_sync_domain(struct domain *d); +void ept_sync_domain(struct p2m_domain *p2m); static inline void vpid_sync_vcpu_gva(struct vcpu *v, unsigned long gva) { @@ -425,12 +425,18 @@ void vmx_get_segment_register(struct vcpu *, enum x86_segment, void vmx_inject_extint(int trap); void vmx_inject_nmi(void); -void ept_p2m_init(struct p2m_domain *p2m); +int ept_p2m_init(struct p2m_domain *p2m); +void ept_p2m_uninit(struct p2m_domain *p2m); + void ept_walk_table(struct domain *d, unsigned long gfn); void setup_ept_dump(void); void update_guest_eip(void); +int alloc_p2m_hap_data(struct p2m_domain *p2m); +void free_p2m_hap_data(struct p2m_domain *p2m); +void p2m_init_hap_data(struct p2m_domain *p2m); + /* EPT violation qualifications definitions */ #define _EPT_READ_VIOLATION 0 #define EPT_READ_VIOLATION (1UL<<_EPT_READ_VIOLATION) diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h index 4205a13764..538c9e55ed 100644 --- a/xen/include/asm-x86/p2m.h +++ b/xen/include/asm-x86/p2m.h @@ -277,6 +277,10 @@ struct p2m_domain { mm_lock_t lock; /* Locking of private pod structs, * * not relying on the p2m lock. */ } pod; + union { + struct ept_data ept; + /* NPT-equivalent structure could be added here. */ + }; }; /* get host p2m table */