ia64/xen-unstable
changeset 16017:f71b7d6ad5d8
merge with xen-unstable.hg
author | Alex Williamson <alex.williamson@hp.com> |
---|---|
date | Mon Oct 01 13:59:37 2007 -0600 (2007-10-01) |
parents | d3665dc74a41 5c7afb32df99 |
children | 9fbbba4c23fb |
files |
line diff
1.1 --- a/tools/python/xen/xm/main.py Mon Oct 01 09:59:24 2007 -0600 1.2 +++ b/tools/python/xen/xm/main.py Mon Oct 01 13:59:37 2007 -0600 1.3 @@ -55,6 +55,9 @@ from xen.util.acmpolicy import ACM_LABEL 1.4 1.5 import XenAPI 1.6 1.7 +import inspect 1.8 +from xen.xend import XendOptions 1.9 +xoptions = XendOptions.instance() 1.10 1.11 # getopt.gnu_getopt is better, but only exists in Python 2.3+. Use 1.12 # getopt.getopt if gnu_getopt is not available. This will mean that options 1.13 @@ -1595,7 +1598,31 @@ def xm_sched_credit(args): 1.14 err(str(result)) 1.15 1.16 def xm_info(args): 1.17 - arg_check(args, "info", 0) 1.18 + arg_check(args, "info", 0, 1) 1.19 + 1.20 + try: 1.21 + (options, params) = getopt.gnu_getopt(args, 'c', ['config']) 1.22 + except getopt.GetoptError, opterr: 1.23 + err(opterr) 1.24 + usage('info') 1.25 + 1.26 + show_xend_config = 0 1.27 + for (k, v) in options: 1.28 + if k in ['-c', '--config']: 1.29 + show_xend_config = 1 1.30 + 1.31 + if show_xend_config: 1.32 + for name, obj in inspect.getmembers(xoptions): 1.33 + if not inspect.ismethod(obj): 1.34 + if name == "config": 1.35 + for x in obj[1:]: 1.36 + if len(x) < 2: 1.37 + print "%-38s: (none)" % x[0] 1.38 + else: 1.39 + print "%-38s:" % x[0], x[1] 1.40 + else: 1.41 + print "%-38s:" % name, obj 1.42 + return 1.43 1.44 if serverType == SERVER_XEN_API: 1.45
2.1 --- a/xen/arch/x86/hvm/hvm.c Mon Oct 01 09:59:24 2007 -0600 2.2 +++ b/xen/arch/x86/hvm/hvm.c Mon Oct 01 13:59:37 2007 -0600 2.3 @@ -49,6 +49,10 @@ 2.4 #include <public/version.h> 2.5 #include <public/memory.h> 2.6 2.7 +/* Xen command-line option to disable hardware-assisted paging */ 2.8 +static int opt_hap_disabled; 2.9 +invbool_param("hap", opt_hap_disabled); 2.10 + 2.11 int hvm_enabled __read_mostly; 2.12 2.13 unsigned int opt_hvm_debug_level __read_mostly; 2.14 @@ -74,6 +78,14 @@ void hvm_enable(struct hvm_function_tabl 2.15 2.16 hvm_funcs = *fns; 2.17 hvm_enabled = 1; 2.18 + 2.19 + if ( hvm_funcs.hap_supported ) 2.20 + { 2.21 + if ( opt_hap_disabled ) 2.22 + hvm_funcs.hap_supported = 0; 2.23 + printk("HVM: Hardware Assisted Paging %sabled\n", 2.24 + hvm_funcs.hap_supported ? "en" : "dis"); 2.25 + } 2.26 } 2.27 2.28 void hvm_set_guest_time(struct vcpu *v, u64 gtime) 2.29 @@ -325,6 +337,34 @@ static int hvm_load_cpu_ctxt(struct doma 2.30 if ( hvm_load_entry(CPU, h, &ctxt) != 0 ) 2.31 return -EINVAL; 2.32 2.33 + /* Sanity check some control registers. */ 2.34 + if ( (ctxt.cr0 & HVM_CR0_GUEST_RESERVED_BITS) || 2.35 + !(ctxt.cr0 & X86_CR0_ET) || 2.36 + ((ctxt.cr0 & (X86_CR0_PE|X86_CR0_PG)) == X86_CR0_PG) ) 2.37 + { 2.38 + gdprintk(XENLOG_ERR, "HVM restore: bad CR0 0x%"PRIx64"\n", 2.39 + ctxt.msr_efer); 2.40 + return -EINVAL; 2.41 + } 2.42 + 2.43 + if ( ctxt.cr4 & HVM_CR4_GUEST_RESERVED_BITS ) 2.44 + { 2.45 + gdprintk(XENLOG_ERR, "HVM restore: bad CR4 0x%"PRIx64"\n", 2.46 + ctxt.msr_efer); 2.47 + return -EINVAL; 2.48 + } 2.49 + 2.50 + if ( (ctxt.msr_efer & ~(EFER_LME | EFER_NX | EFER_SCE)) || 2.51 + ((sizeof(long) != 8) && (ctxt.msr_efer & EFER_LME)) || 2.52 + (!cpu_has_nx && (ctxt.msr_efer & EFER_NX)) || 2.53 + (!cpu_has_syscall && (ctxt.msr_efer & EFER_SCE)) || 2.54 + ((ctxt.msr_efer & (EFER_LME|EFER_LMA)) == EFER_LMA) ) 2.55 + { 2.56 + gdprintk(XENLOG_ERR, "HVM restore: bad EFER 0x%"PRIx64"\n", 2.57 + ctxt.msr_efer); 2.58 + return -EINVAL; 2.59 + } 2.60 + 2.61 /* Architecture-specific vmcs/vmcb bits */ 2.62 if ( hvm_funcs.load_cpu_ctxt(v, &ctxt) < 0 ) 2.63 return -EINVAL; 2.64 @@ -520,6 +560,39 @@ void hvm_triple_fault(void) 2.65 domain_shutdown(v->domain, SHUTDOWN_reboot); 2.66 } 2.67 2.68 +int hvm_set_efer(uint64_t value) 2.69 +{ 2.70 + struct vcpu *v = current; 2.71 + 2.72 + value &= ~EFER_LMA; 2.73 + 2.74 + if ( (value & ~(EFER_LME | EFER_NX | EFER_SCE)) || 2.75 + ((sizeof(long) != 8) && (value & EFER_LME)) || 2.76 + (!cpu_has_nx && (value & EFER_NX)) || 2.77 + (!cpu_has_syscall && (value & EFER_SCE)) ) 2.78 + { 2.79 + gdprintk(XENLOG_WARNING, "Trying to set reserved bit in " 2.80 + "EFER: %"PRIx64"\n", value); 2.81 + hvm_inject_exception(TRAP_gp_fault, 0, 0); 2.82 + return 0; 2.83 + } 2.84 + 2.85 + if ( ((value ^ v->arch.hvm_vcpu.guest_efer) & EFER_LME) && 2.86 + hvm_paging_enabled(v) ) 2.87 + { 2.88 + gdprintk(XENLOG_WARNING, 2.89 + "Trying to change EFER.LME with paging enabled\n"); 2.90 + hvm_inject_exception(TRAP_gp_fault, 0, 0); 2.91 + return 0; 2.92 + } 2.93 + 2.94 + value |= v->arch.hvm_vcpu.guest_efer & EFER_LMA; 2.95 + v->arch.hvm_vcpu.guest_efer = value; 2.96 + hvm_update_guest_efer(v); 2.97 + 2.98 + return 1; 2.99 +} 2.100 + 2.101 int hvm_set_cr0(unsigned long value) 2.102 { 2.103 struct vcpu *v = current;
3.1 --- a/xen/arch/x86/hvm/svm/svm.c Mon Oct 01 09:59:24 2007 -0600 3.2 +++ b/xen/arch/x86/hvm/svm/svm.c Mon Oct 01 13:59:37 2007 -0600 3.3 @@ -69,8 +69,7 @@ static void *hsa[NR_CPUS] __read_mostly; 3.4 /* vmcb used for extended host state */ 3.5 static void *root_vmcb[NR_CPUS] __read_mostly; 3.6 3.7 -/* hardware assisted paging bits */ 3.8 -extern int opt_hap_enabled; 3.9 +static void svm_update_guest_efer(struct vcpu *v); 3.10 3.11 static void inline __update_guest_eip( 3.12 struct cpu_user_regs *regs, int inst_len) 3.13 @@ -106,22 +105,10 @@ static void svm_cpu_down(void) 3.14 write_efer(read_efer() & ~EFER_SVME); 3.15 } 3.16 3.17 -static int svm_lme_is_set(struct vcpu *v) 3.18 -{ 3.19 -#ifdef __x86_64__ 3.20 - u64 guest_efer = v->arch.hvm_vcpu.guest_efer; 3.21 - return guest_efer & EFER_LME; 3.22 -#else 3.23 - return 0; 3.24 -#endif 3.25 -} 3.26 - 3.27 static enum handler_return long_mode_do_msr_write(struct cpu_user_regs *regs) 3.28 { 3.29 u64 msr_content = (u32)regs->eax | ((u64)regs->edx << 32); 3.30 u32 ecx = regs->ecx; 3.31 - struct vcpu *v = current; 3.32 - struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; 3.33 3.34 HVM_DBG_LOG(DBG_LEVEL_0, "msr %x msr_content %"PRIx64, 3.35 ecx, msr_content); 3.36 @@ -129,47 +116,8 @@ static enum handler_return long_mode_do_ 3.37 switch ( ecx ) 3.38 { 3.39 case MSR_EFER: 3.40 - /* Offending reserved bit will cause #GP. */ 3.41 -#ifdef __x86_64__ 3.42 - if ( (msr_content & ~(EFER_LME | EFER_LMA | EFER_NX | EFER_SCE)) || 3.43 -#else 3.44 - if ( (msr_content & ~(EFER_NX | EFER_SCE)) || 3.45 -#endif 3.46 - (!cpu_has_nx && (msr_content & EFER_NX)) || 3.47 - (!cpu_has_syscall && (msr_content & EFER_SCE)) ) 3.48 - { 3.49 - gdprintk(XENLOG_WARNING, "Trying to set reserved bit in " 3.50 - "EFER: %"PRIx64"\n", msr_content); 3.51 - goto gp_fault; 3.52 - } 3.53 - 3.54 - if ( (msr_content & EFER_LME) && !svm_lme_is_set(v) ) 3.55 - { 3.56 - /* EFER.LME transition from 0 to 1. */ 3.57 - if ( hvm_paging_enabled(v) || 3.58 - !(v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_PAE) ) 3.59 - { 3.60 - gdprintk(XENLOG_WARNING, "Trying to set LME bit when " 3.61 - "in paging mode or PAE bit is not set\n"); 3.62 - goto gp_fault; 3.63 - } 3.64 - } 3.65 - else if ( !(msr_content & EFER_LME) && svm_lme_is_set(v) ) 3.66 - { 3.67 - /* EFER.LME transistion from 1 to 0. */ 3.68 - if ( hvm_paging_enabled(v) ) 3.69 - { 3.70 - gdprintk(XENLOG_WARNING, 3.71 - "Trying to clear EFER.LME while paging enabled\n"); 3.72 - goto gp_fault; 3.73 - } 3.74 - } 3.75 - 3.76 - v->arch.hvm_vcpu.guest_efer = msr_content; 3.77 - vmcb->efer = msr_content | EFER_SVME; 3.78 - if ( !hvm_paging_enabled(v) ) 3.79 - vmcb->efer &= ~(EFER_LME | EFER_LMA); 3.80 - 3.81 + if ( !hvm_set_efer(msr_content) ) 3.82 + return HNDL_exception_raised; 3.83 break; 3.84 3.85 case MSR_K8_MC4_MISC: /* Threshold register */ 3.86 @@ -185,10 +133,6 @@ static enum handler_return long_mode_do_ 3.87 } 3.88 3.89 return HNDL_done; 3.90 - 3.91 - gp_fault: 3.92 - svm_inject_exception(v, TRAP_gp_fault, 1, 0); 3.93 - return HNDL_exception_raised; 3.94 } 3.95 3.96 3.97 @@ -452,11 +396,7 @@ static void svm_load_cpu_state(struct vc 3.98 vmcb->cstar = data->msr_cstar; 3.99 vmcb->sfmask = data->msr_syscall_mask; 3.100 v->arch.hvm_vcpu.guest_efer = data->msr_efer; 3.101 - vmcb->efer = data->msr_efer | EFER_SVME; 3.102 - /* VMCB's EFER.LME isn't set unless we're actually in long mode 3.103 - * (see long_mode_do_msr_write()) */ 3.104 - if ( !(vmcb->efer & EFER_LMA) ) 3.105 - vmcb->efer &= ~EFER_LME; 3.106 + svm_update_guest_efer(v); 3.107 3.108 hvm_set_guest_time(v, data->tsc); 3.109 } 3.110 @@ -546,14 +486,11 @@ static void svm_update_guest_cr(struct v 3.111 3.112 static void svm_update_guest_efer(struct vcpu *v) 3.113 { 3.114 -#ifdef __x86_64__ 3.115 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; 3.116 3.117 - if ( v->arch.hvm_vcpu.guest_efer & EFER_LMA ) 3.118 - vmcb->efer |= EFER_LME | EFER_LMA; 3.119 - else 3.120 - vmcb->efer &= ~(EFER_LME | EFER_LMA); 3.121 -#endif 3.122 + vmcb->efer = (v->arch.hvm_vcpu.guest_efer | EFER_SVME) & ~EFER_LME; 3.123 + if ( vmcb->efer & EFER_LMA ) 3.124 + vmcb->efer |= EFER_LME; 3.125 } 3.126 3.127 static void svm_flush_guest_tlbs(void) 3.128 @@ -936,18 +873,14 @@ static struct hvm_function_table svm_fun 3.129 .event_pending = svm_event_pending 3.130 }; 3.131 3.132 -static void svm_npt_detect(void) 3.133 +static int svm_npt_detect(void) 3.134 { 3.135 u32 eax, ebx, ecx, edx; 3.136 3.137 /* Check CPUID for nested paging support. */ 3.138 cpuid(0x8000000A, &eax, &ebx, &ecx, &edx); 3.139 3.140 - if ( !(edx & 1) && opt_hap_enabled ) 3.141 - { 3.142 - printk("SVM: Nested paging is not supported by this CPU.\n"); 3.143 - opt_hap_enabled = 0; 3.144 - } 3.145 + return (edx & 1); 3.146 } 3.147 3.148 int start_svm(struct cpuinfo_x86 *c) 3.149 @@ -978,8 +911,6 @@ int start_svm(struct cpuinfo_x86 *c) 3.150 3.151 write_efer(read_efer() | EFER_SVME); 3.152 3.153 - svm_npt_detect(); 3.154 - 3.155 /* Initialize the HSA for this core. */ 3.156 phys_hsa = (u64) virt_to_maddr(hsa[cpu]); 3.157 phys_hsa_lo = (u32) phys_hsa; 3.158 @@ -994,11 +925,10 @@ int start_svm(struct cpuinfo_x86 *c) 3.159 3.160 setup_vmcb_dump(); 3.161 3.162 + svm_function_table.hap_supported = svm_npt_detect(); 3.163 + 3.164 hvm_enable(&svm_function_table); 3.165 3.166 - if ( opt_hap_enabled ) 3.167 - printk("SVM: Nested paging enabled.\n"); 3.168 - 3.169 return 1; 3.170 } 3.171
4.1 --- a/xen/arch/x86/hvm/vmx/vmx.c Mon Oct 01 09:59:24 2007 -0600 4.2 +++ b/xen/arch/x86/hvm/vmx/vmx.c Mon Oct 01 13:59:37 2007 -0600 4.3 @@ -199,42 +199,8 @@ static enum handler_return long_mode_do_ 4.4 switch ( ecx ) 4.5 { 4.6 case MSR_EFER: 4.7 - /* offending reserved bit will cause #GP */ 4.8 - if ( (msr_content & ~(EFER_LME | EFER_LMA | EFER_NX | EFER_SCE)) || 4.9 - (!cpu_has_nx && (msr_content & EFER_NX)) || 4.10 - (!cpu_has_syscall && (msr_content & EFER_SCE)) ) 4.11 - { 4.12 - gdprintk(XENLOG_WARNING, "Trying to set reserved bit in " 4.13 - "EFER: %"PRIx64"\n", msr_content); 4.14 - goto gp_fault; 4.15 - } 4.16 - 4.17 - if ( (msr_content & EFER_LME) 4.18 - && !(v->arch.hvm_vcpu.guest_efer & EFER_LME) ) 4.19 - { 4.20 - if ( unlikely(hvm_paging_enabled(v)) ) 4.21 - { 4.22 - gdprintk(XENLOG_WARNING, 4.23 - "Trying to set EFER.LME with paging enabled\n"); 4.24 - goto gp_fault; 4.25 - } 4.26 - } 4.27 - else if ( !(msr_content & EFER_LME) 4.28 - && (v->arch.hvm_vcpu.guest_efer & EFER_LME) ) 4.29 - { 4.30 - if ( unlikely(hvm_paging_enabled(v)) ) 4.31 - { 4.32 - gdprintk(XENLOG_WARNING, 4.33 - "Trying to clear EFER.LME with paging enabled\n"); 4.34 - goto gp_fault; 4.35 - } 4.36 - } 4.37 - 4.38 - if ( (msr_content ^ v->arch.hvm_vcpu.guest_efer) & (EFER_NX|EFER_SCE) ) 4.39 - write_efer((read_efer() & ~(EFER_NX|EFER_SCE)) | 4.40 - (msr_content & (EFER_NX|EFER_SCE))); 4.41 - 4.42 - v->arch.hvm_vcpu.guest_efer = msr_content; 4.43 + if ( !hvm_set_efer(msr_content) ) 4.44 + goto exception_raised; 4.45 break; 4.46 4.47 case MSR_FS_BASE: 4.48 @@ -285,6 +251,7 @@ static enum handler_return long_mode_do_ 4.49 HVM_DBG_LOG(DBG_LEVEL_0, "Not cano address of msr write %x", ecx); 4.50 gp_fault: 4.51 vmx_inject_hw_exception(v, TRAP_gp_fault, 0); 4.52 + exception_raised: 4.53 return HNDL_exception_raised; 4.54 } 4.55 4.56 @@ -380,7 +347,8 @@ static enum handler_return long_mode_do_ 4.57 u64 msr_content = 0; 4.58 struct vcpu *v = current; 4.59 4.60 - switch ( regs->ecx ) { 4.61 + switch ( regs->ecx ) 4.62 + { 4.63 case MSR_EFER: 4.64 msr_content = v->arch.hvm_vcpu.guest_efer; 4.65 break; 4.66 @@ -398,25 +366,12 @@ static enum handler_return long_mode_do_ 4.67 static enum handler_return long_mode_do_msr_write(struct cpu_user_regs *regs) 4.68 { 4.69 u64 msr_content = regs->eax | ((u64)regs->edx << 32); 4.70 - struct vcpu *v = current; 4.71 4.72 switch ( regs->ecx ) 4.73 { 4.74 case MSR_EFER: 4.75 - /* offending reserved bit will cause #GP */ 4.76 - if ( (msr_content & ~EFER_NX) || 4.77 - (!cpu_has_nx && (msr_content & EFER_NX)) ) 4.78 - { 4.79 - gdprintk(XENLOG_WARNING, "Trying to set reserved bit in " 4.80 - "EFER: %"PRIx64"\n", msr_content); 4.81 - vmx_inject_hw_exception(v, TRAP_gp_fault, 0); 4.82 + if ( !hvm_set_efer(msr_content) ) 4.83 return HNDL_exception_raised; 4.84 - } 4.85 - 4.86 - if ( (msr_content ^ v->arch.hvm_vcpu.guest_efer) & EFER_NX ) 4.87 - write_efer((read_efer() & ~EFER_NX) | (msr_content & EFER_NX)); 4.88 - 4.89 - v->arch.hvm_vcpu.guest_efer = msr_content; 4.90 break; 4.91 4.92 default: 4.93 @@ -1096,6 +1051,10 @@ static void vmx_update_guest_efer(struct 4.94 4.95 vmx_vmcs_exit(v); 4.96 #endif 4.97 + 4.98 + if ( v == current ) 4.99 + write_efer((read_efer() & ~(EFER_NX|EFER_SCE)) | 4.100 + (v->arch.hvm_vcpu.guest_efer & (EFER_NX|EFER_SCE))); 4.101 } 4.102 4.103 static void vmx_flush_guest_tlbs(void)
5.1 --- a/xen/arch/x86/hvm/vmx/vtd/intel-iommu.c Mon Oct 01 09:59:24 2007 -0600 5.2 +++ b/xen/arch/x86/hvm/vmx/vtd/intel-iommu.c Mon Oct 01 13:59:37 2007 -0600 5.3 @@ -134,7 +134,7 @@ static int device_context_mapped(struct 5.4 #define level_mask(l) (((u64)(-1)) << level_to_offset_bits(l)) 5.5 #define level_size(l) (1 << level_to_offset_bits(l)) 5.6 #define align_to_level(addr, l) ((addr + level_size(l) - 1) & level_mask(l)) 5.7 -static struct dma_pte *addr_to_dma_pte(struct domain *domain, u64 addr) 5.8 +static struct page_info *addr_to_dma_page(struct domain *domain, u64 addr) 5.9 { 5.10 struct hvm_iommu *hd = domain_hvm_iommu(domain); 5.11 struct acpi_drhd_unit *drhd; 5.12 @@ -144,6 +144,8 @@ static struct dma_pte *addr_to_dma_pte(s 5.13 int level = agaw_to_level(hd->agaw); 5.14 int offset; 5.15 unsigned long flags; 5.16 + struct page_info *pg = NULL; 5.17 + u64 *vaddr = NULL; 5.18 5.19 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list); 5.20 iommu = drhd->iommu; 5.21 @@ -153,79 +155,105 @@ static struct dma_pte *addr_to_dma_pte(s 5.22 if ( !hd->pgd ) 5.23 { 5.24 pgd = (struct dma_pte *)alloc_xenheap_page(); 5.25 - if ( !pgd && !hd->pgd ) 5.26 + if ( !pgd ) 5.27 { 5.28 spin_unlock_irqrestore(&hd->mapping_lock, flags); 5.29 return NULL; 5.30 } 5.31 - memset((u8*)pgd, 0, PAGE_SIZE); 5.32 - if ( !hd->pgd ) 5.33 - hd->pgd = pgd; 5.34 - else /* somebody is fast */ 5.35 - free_xenheap_page((void *) pgd); 5.36 + memset(pgd, 0, PAGE_SIZE); 5.37 + hd->pgd = pgd; 5.38 } 5.39 + 5.40 parent = hd->pgd; 5.41 - while ( level > 0 ) 5.42 + while ( level > 1 ) 5.43 { 5.44 - u8 *tmp; 5.45 offset = address_level_offset(addr, level); 5.46 pte = &parent[offset]; 5.47 - if ( level == 1 ) 5.48 - break; 5.49 + 5.50 if ( dma_pte_addr(*pte) == 0 ) 5.51 { 5.52 - tmp = alloc_xenheap_page(); 5.53 - memset(tmp, 0, PAGE_SIZE); 5.54 - iommu_flush_cache_page(iommu, tmp); 5.55 - 5.56 - if ( !tmp && dma_pte_addr(*pte) == 0 ) 5.57 + pg = alloc_domheap_page(NULL); 5.58 + vaddr = map_domain_page(mfn_x(page_to_mfn(pg))); 5.59 + if ( !vaddr ) 5.60 { 5.61 spin_unlock_irqrestore(&hd->mapping_lock, flags); 5.62 return NULL; 5.63 } 5.64 - if ( dma_pte_addr(*pte) == 0 ) 5.65 + memset(vaddr, 0, PAGE_SIZE); 5.66 + iommu_flush_cache_page(iommu, vaddr); 5.67 + 5.68 + dma_set_pte_addr(*pte, page_to_maddr(pg)); 5.69 + 5.70 + /* 5.71 + * high level table always sets r/w, last level 5.72 + * page table control read/write 5.73 + */ 5.74 + dma_set_pte_readable(*pte); 5.75 + dma_set_pte_writable(*pte); 5.76 + iommu_flush_cache_entry(iommu, pte); 5.77 + } 5.78 + else 5.79 + { 5.80 + pg = maddr_to_page(pte->val); 5.81 + vaddr = map_domain_page(mfn_x(page_to_mfn(pg))); 5.82 + if ( !vaddr ) 5.83 { 5.84 - dma_set_pte_addr(*pte, 5.85 - virt_to_maddr(tmp)); 5.86 - /* 5.87 - * high level table always sets r/w, last level 5.88 - * page table control read/write 5.89 - */ 5.90 - dma_set_pte_readable(*pte); 5.91 - dma_set_pte_writable(*pte); 5.92 - iommu_flush_cache_entry(iommu, pte); 5.93 - } else /* somebody is fast */ 5.94 - free_xenheap_page(tmp); 5.95 + spin_unlock_irqrestore(&hd->mapping_lock, flags); 5.96 + return NULL; 5.97 + } 5.98 } 5.99 - parent = maddr_to_virt(dma_pte_addr(*pte)); 5.100 + 5.101 + if ( parent != hd->pgd ) 5.102 + unmap_domain_page(parent); 5.103 + 5.104 + if ( level == 2 && vaddr ) 5.105 + { 5.106 + unmap_domain_page(vaddr); 5.107 + break; 5.108 + } 5.109 + 5.110 + parent = (struct dma_pte *)vaddr; 5.111 + vaddr = NULL; 5.112 level--; 5.113 } 5.114 + 5.115 spin_unlock_irqrestore(&hd->mapping_lock, flags); 5.116 - return pte; 5.117 + return pg; 5.118 } 5.119 5.120 -/* return address's pte at specific level */ 5.121 -static struct dma_pte *dma_addr_level_pte(struct domain *domain, u64 addr, 5.122 - int level) 5.123 +/* return address's page at specific level */ 5.124 +static struct page_info *dma_addr_level_page(struct domain *domain, 5.125 + u64 addr, int level) 5.126 { 5.127 struct hvm_iommu *hd = domain_hvm_iommu(domain); 5.128 struct dma_pte *parent, *pte = NULL; 5.129 int total = agaw_to_level(hd->agaw); 5.130 int offset; 5.131 + struct page_info *pg = NULL; 5.132 5.133 parent = hd->pgd; 5.134 while ( level <= total ) 5.135 { 5.136 offset = address_level_offset(addr, total); 5.137 pte = &parent[offset]; 5.138 - if ( level == total ) 5.139 - return pte; 5.140 + if ( dma_pte_addr(*pte) == 0 ) 5.141 + { 5.142 + if ( parent != hd->pgd ) 5.143 + unmap_domain_page(parent); 5.144 + break; 5.145 + } 5.146 5.147 - if ( dma_pte_addr(*pte) == 0 ) 5.148 - break; 5.149 - parent = maddr_to_virt(dma_pte_addr(*pte)); 5.150 + pg = maddr_to_page(pte->val); 5.151 + if ( parent != hd->pgd ) 5.152 + unmap_domain_page(parent); 5.153 + 5.154 + if ( level == total ) 5.155 + return pg; 5.156 + 5.157 + parent = map_domain_page(mfn_x(page_to_mfn(pg))); 5.158 total--; 5.159 } 5.160 + 5.161 return NULL; 5.162 } 5.163 5.164 @@ -506,12 +534,16 @@ static void dma_pte_clear_one(struct dom 5.165 struct acpi_drhd_unit *drhd; 5.166 struct iommu *iommu; 5.167 struct dma_pte *pte = NULL; 5.168 + struct page_info *pg = NULL; 5.169 5.170 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list); 5.171 5.172 /* get last level pte */ 5.173 - pte = dma_addr_level_pte(domain, addr, 1); 5.174 - 5.175 + pg = dma_addr_level_page(domain, addr, 1); 5.176 + if ( !pg ) 5.177 + return; 5.178 + pte = (struct dma_pte *)map_domain_page(mfn_x(page_to_mfn(pg))); 5.179 + pte += address_level_offset(addr, 1); 5.180 if ( pte ) 5.181 { 5.182 dma_clear_pte(*pte); 5.183 @@ -559,6 +591,7 @@ void dma_pte_free_pagetable(struct domai 5.184 int total = agaw_to_level(hd->agaw); 5.185 int level; 5.186 u32 tmp; 5.187 + struct page_info *pg = NULL; 5.188 5.189 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list); 5.190 iommu = drhd->iommu; 5.191 @@ -576,13 +609,16 @@ void dma_pte_free_pagetable(struct domai 5.192 5.193 while ( tmp < end ) 5.194 { 5.195 - pte = dma_addr_level_pte(domain, tmp, level); 5.196 - if ( pte ) 5.197 - { 5.198 - free_xenheap_page((void *) maddr_to_virt(dma_pte_addr(*pte))); 5.199 - dma_clear_pte(*pte); 5.200 - iommu_flush_cache_entry(iommu, pte); 5.201 - } 5.202 + pg = dma_addr_level_page(domain, tmp, level); 5.203 + if ( !pg ) 5.204 + return; 5.205 + pte = (struct dma_pte *)map_domain_page(mfn_x(page_to_mfn(pg))); 5.206 + pte += address_level_offset(tmp, level); 5.207 + dma_clear_pte(*pte); 5.208 + iommu_flush_cache_entry(iommu, pte); 5.209 + unmap_domain_page(pte); 5.210 + free_domheap_page(pg); 5.211 + 5.212 tmp += level_size(level); 5.213 } 5.214 level++; 5.215 @@ -1445,6 +1481,7 @@ int iommu_map_page(struct domain *d, pad 5.216 struct acpi_drhd_unit *drhd; 5.217 struct iommu *iommu; 5.218 struct dma_pte *pte = NULL; 5.219 + struct page_info *pg = NULL; 5.220 5.221 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list); 5.222 iommu = drhd->iommu; 5.223 @@ -1453,12 +1490,15 @@ int iommu_map_page(struct domain *d, pad 5.224 if ( ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) ) 5.225 return 0; 5.226 5.227 - pte = addr_to_dma_pte(d, gfn << PAGE_SHIFT_4K); 5.228 - if ( !pte ) 5.229 + pg = addr_to_dma_page(d, gfn << PAGE_SHIFT_4K); 5.230 + if ( !pg ) 5.231 return -ENOMEM; 5.232 + pte = (struct dma_pte *)map_domain_page(mfn_x(page_to_mfn(pg))); 5.233 + pte += mfn & LEVEL_MASK; 5.234 dma_set_pte_addr(*pte, mfn << PAGE_SHIFT_4K); 5.235 dma_set_pte_prot(*pte, DMA_PTE_READ | DMA_PTE_WRITE); 5.236 iommu_flush_cache_entry(iommu, pte); 5.237 + unmap_domain_page(pte); 5.238 5.239 for_each_drhd_unit ( drhd ) 5.240 { 5.241 @@ -1477,7 +1517,6 @@ int iommu_unmap_page(struct domain *d, d 5.242 { 5.243 struct acpi_drhd_unit *drhd; 5.244 struct iommu *iommu; 5.245 - struct dma_pte *pte = NULL; 5.246 5.247 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list); 5.248 iommu = drhd->iommu; 5.249 @@ -1486,10 +1525,8 @@ int iommu_unmap_page(struct domain *d, d 5.250 if ( ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) ) 5.251 return 0; 5.252 5.253 - /* get last level pte */ 5.254 - pte = dma_addr_level_pte(d, gfn << PAGE_SHIFT_4K, 1); 5.255 dma_pte_clear_one(d, gfn << PAGE_SHIFT_4K); 5.256 - 5.257 + 5.258 return 0; 5.259 } 5.260 5.261 @@ -1501,6 +1538,7 @@ int iommu_page_mapping(struct domain *do 5.262 unsigned long start_pfn, end_pfn; 5.263 struct dma_pte *pte = NULL; 5.264 int index; 5.265 + struct page_info *pg = NULL; 5.266 5.267 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list); 5.268 iommu = drhd->iommu; 5.269 @@ -1513,12 +1551,15 @@ int iommu_page_mapping(struct domain *do 5.270 index = 0; 5.271 while ( start_pfn < end_pfn ) 5.272 { 5.273 - pte = addr_to_dma_pte(domain, iova + PAGE_SIZE_4K * index); 5.274 - if ( !pte ) 5.275 + pg = addr_to_dma_page(domain, iova + PAGE_SIZE_4K * index); 5.276 + if ( !pg ) 5.277 return -ENOMEM; 5.278 + pte = (struct dma_pte *)map_domain_page(mfn_x(page_to_mfn(pg))); 5.279 + pte += start_pfn & LEVEL_MASK; 5.280 dma_set_pte_addr(*pte, start_pfn << PAGE_SHIFT_4K); 5.281 dma_set_pte_prot(*pte, prot); 5.282 iommu_flush_cache_entry(iommu, pte); 5.283 + unmap_domain_page(pte); 5.284 start_pfn++; 5.285 index++; 5.286 } 5.287 @@ -1537,12 +1578,8 @@ int iommu_page_mapping(struct domain *do 5.288 5.289 int iommu_page_unmapping(struct domain *domain, dma_addr_t addr, size_t size) 5.290 { 5.291 - struct dma_pte *pte = NULL; 5.292 + dma_pte_clear_range(domain, addr, addr + size); 5.293 5.294 - /* get last level pte */ 5.295 - pte = dma_addr_level_pte(domain, addr, 1); 5.296 - dma_pte_clear_range(domain, addr, addr + size); 5.297 - 5.298 return 0; 5.299 } 5.300
6.1 --- a/xen/arch/x86/mm/paging.c Mon Oct 01 09:59:24 2007 -0600 6.2 +++ b/xen/arch/x86/mm/paging.c Mon Oct 01 13:59:37 2007 -0600 6.3 @@ -28,9 +28,7 @@ 6.4 #include <asm/guest_access.h> 6.5 #include <xsm/xsm.h> 6.6 6.7 -/* Xen command-line option to enable hardware-assisted paging */ 6.8 -int opt_hap_enabled; 6.9 -boolean_param("hap", opt_hap_enabled); 6.10 +#define hap_enabled(d) (hvm_funcs.hap_supported && is_hvm_domain(d)) 6.11 6.12 /* Printouts */ 6.13 #define PAGING_PRINTK(_f, _a...) \ 6.14 @@ -363,14 +361,14 @@ void paging_domain_init(struct domain *d 6.15 shadow_domain_init(d); 6.16 6.17 /* ... but we will use hardware assistance if it's available. */ 6.18 - if ( opt_hap_enabled && is_hvm_domain(d) ) 6.19 + if ( hap_enabled(d) ) 6.20 hap_domain_init(d); 6.21 } 6.22 6.23 /* vcpu paging struct initialization goes here */ 6.24 void paging_vcpu_init(struct vcpu *v) 6.25 { 6.26 - if ( opt_hap_enabled && is_hvm_vcpu(v) ) 6.27 + if ( hap_enabled(v->domain) ) 6.28 hap_vcpu_init(v); 6.29 else 6.30 shadow_vcpu_init(v); 6.31 @@ -434,7 +432,7 @@ int paging_domctl(struct domain *d, xen_ 6.32 } 6.33 6.34 /* Here, dispatch domctl to the appropriate paging code */ 6.35 - if ( opt_hap_enabled && is_hvm_domain(d) ) 6.36 + if ( hap_enabled(d) ) 6.37 return hap_domctl(d, sc, u_domctl); 6.38 else 6.39 return shadow_domctl(d, sc, u_domctl); 6.40 @@ -443,7 +441,7 @@ int paging_domctl(struct domain *d, xen_ 6.41 /* Call when destroying a domain */ 6.42 void paging_teardown(struct domain *d) 6.43 { 6.44 - if ( opt_hap_enabled && is_hvm_domain(d) ) 6.45 + if ( hap_enabled(d) ) 6.46 hap_teardown(d); 6.47 else 6.48 shadow_teardown(d); 6.49 @@ -455,7 +453,7 @@ void paging_teardown(struct domain *d) 6.50 /* Call once all of the references to the domain have gone away */ 6.51 void paging_final_teardown(struct domain *d) 6.52 { 6.53 - if ( opt_hap_enabled && is_hvm_domain(d) ) 6.54 + if ( hap_enabled(d) ) 6.55 hap_final_teardown(d); 6.56 else 6.57 shadow_final_teardown(d); 6.58 @@ -465,7 +463,7 @@ void paging_final_teardown(struct domain 6.59 * creation. */ 6.60 int paging_enable(struct domain *d, u32 mode) 6.61 { 6.62 - if ( opt_hap_enabled && is_hvm_domain(d) ) 6.63 + if ( hap_enabled(d) ) 6.64 return hap_enable(d, mode | PG_HAP_enable); 6.65 else 6.66 return shadow_enable(d, mode | PG_SH_enable);
7.1 --- a/xen/include/asm-x86/hvm/hvm.h Mon Oct 01 09:59:24 2007 -0600 7.2 +++ b/xen/include/asm-x86/hvm/hvm.h Mon Oct 01 13:59:37 2007 -0600 7.3 @@ -72,6 +72,9 @@ enum hvm_intack { 7.4 struct hvm_function_table { 7.5 char *name; 7.6 7.7 + /* Support Hardware-Assisted Paging? */ 7.8 + int hap_supported; 7.9 + 7.10 /* 7.11 * Initialise/destroy HVM domain/vcpu resources 7.12 */
8.1 --- a/xen/include/asm-x86/hvm/support.h Mon Oct 01 09:59:24 2007 -0600 8.2 +++ b/xen/include/asm-x86/hvm/support.h Mon Oct 01 13:59:37 2007 -0600 8.3 @@ -234,6 +234,7 @@ int hvm_do_hypercall(struct cpu_user_reg 8.4 void hvm_hlt(unsigned long rflags); 8.5 void hvm_triple_fault(void); 8.6 8.7 +int hvm_set_efer(uint64_t value); 8.8 int hvm_set_cr0(unsigned long value); 8.9 int hvm_set_cr3(unsigned long value); 8.10 int hvm_set_cr4(unsigned long value);
9.1 --- a/xen/include/xsm/acm/acm_core.h Mon Oct 01 09:59:24 2007 -0600 9.2 +++ b/xen/include/xsm/acm/acm_core.h Mon Oct 01 13:59:37 2007 -0600 9.3 @@ -154,7 +154,7 @@ static inline int acm_array_append_tuple 9.4 9.5 /* protos */ 9.6 int acm_init_domain_ssid(struct domain *, ssidref_t ssidref); 9.7 -void acm_free_domain_ssid(struct acm_ssid_domain *ssid); 9.8 +void acm_free_domain_ssid(struct domain *); 9.9 int acm_init_binary_policy(u32 policy_code); 9.10 int acm_set_policy(XEN_GUEST_HANDLE_64(void) buf, u32 buf_size); 9.11 int do_acm_set_policy(void *buf, u32 buf_size, int is_bootpolicy,
10.1 --- a/xen/include/xsm/acm/acm_hooks.h Mon Oct 01 09:59:24 2007 -0600 10.2 +++ b/xen/include/xsm/acm/acm_hooks.h Mon Oct 01 13:59:37 2007 -0600 10.3 @@ -258,7 +258,7 @@ static inline void acm_domain_destroy(st 10.4 acm_secondary_ops->domain_destroy(ssid, d); 10.5 /* free security ssid for the destroyed domain (also if null policy */ 10.6 acm_domain_ssid_off_list(ssid); 10.7 - acm_free_domain_ssid((struct acm_ssid_domain *)(ssid)); 10.8 + acm_free_domain_ssid(d); 10.9 } 10.10 } 10.11 10.12 @@ -294,7 +294,7 @@ static inline int acm_domain_create(stru 10.13 { 10.14 acm_domain_ssid_onto_list(d->ssid); 10.15 } else { 10.16 - acm_free_domain_ssid(d->ssid); 10.17 + acm_free_domain_ssid(d); 10.18 } 10.19 10.20 error_out:
11.1 --- a/xen/xsm/acm/acm_core.c Mon Oct 01 09:59:24 2007 -0600 11.2 +++ b/xen/xsm/acm/acm_core.c Mon Oct 01 13:59:37 2007 -0600 11.3 @@ -361,7 +361,7 @@ int acm_init_domain_ssid(struct domain * 11.4 { 11.5 printk("%s: ERROR instantiating individual ssids for domain 0x%02x.\n", 11.6 __func__, subj->domain_id); 11.7 - acm_free_domain_ssid(ssid); 11.8 + acm_free_domain_ssid(subj); 11.9 return ACM_INIT_SSID_ERROR; 11.10 } 11.11 11.12 @@ -372,8 +372,10 @@ int acm_init_domain_ssid(struct domain * 11.13 11.14 11.15 void 11.16 -acm_free_domain_ssid(struct acm_ssid_domain *ssid) 11.17 +acm_free_domain_ssid(struct domain *d) 11.18 { 11.19 + struct acm_ssid_domain *ssid = d->ssid; 11.20 + 11.21 /* domain is already gone, just ssid is left */ 11.22 if (ssid == NULL) 11.23 return; 11.24 @@ -387,6 +389,8 @@ acm_free_domain_ssid(struct acm_ssid_dom 11.25 ssid->secondary_ssid = NULL; 11.26 11.27 xfree(ssid); 11.28 + d->ssid = NULL; 11.29 + 11.30 printkd("%s: Freed individual domain ssid (domain=%02x).\n", 11.31 __func__, id); 11.32 }