direct-io.hg
changeset 14283:a7f6392ea850
[HVM] Add support for hardware-assisted paging
as the second implementation of the generic paging-assistance interface.
Signed-off-by: Wei Huang <wei.huang2@amd.com>
as the second implementation of the generic paging-assistance interface.
Signed-off-by: Wei Huang <wei.huang2@amd.com>
author | Tim Deegan <Tim.Deegan@xensource.com> |
---|---|
date | Thu Mar 08 10:54:56 2007 +0000 (2007-03-08) |
parents | 8117f6684991 |
children | 18cf0c56226d |
files | xen/arch/x86/hvm/hvm.c xen/arch/x86/hvm/svm/svm.c xen/arch/x86/hvm/svm/vmcb.c xen/arch/x86/mm/Makefile xen/arch/x86/mm/hap/Makefile xen/arch/x86/mm/hap/hap.c xen/arch/x86/mm/hap/private.h xen/arch/x86/mm/hap/support.c xen/arch/x86/mm/page-guest32.h xen/arch/x86/mm/paging.c xen/arch/x86/mm/shadow/page-guest32.h xen/arch/x86/mm/shadow/types.h xen/include/asm-x86/domain.h xen/include/asm-x86/hap.h |
line diff
1.1 --- a/xen/arch/x86/hvm/hvm.c Wed Mar 07 16:21:21 2007 +0000 1.2 +++ b/xen/arch/x86/hvm/hvm.c Thu Mar 08 10:54:56 2007 +0000 1.3 @@ -161,7 +161,8 @@ int hvm_domain_initialise(struct domain 1.4 spin_lock_init(&d->arch.hvm_domain.buffered_io_lock); 1.5 spin_lock_init(&d->arch.hvm_domain.irq_lock); 1.6 1.7 - rc = paging_enable(d, PG_SH_enable|PG_refcounts|PG_translate|PG_external); 1.8 + /* paging support will be determined inside paging.c */ 1.9 + rc = paging_enable(d, PG_refcounts|PG_translate|PG_external); 1.10 if ( rc != 0 ) 1.11 return rc; 1.12
2.1 --- a/xen/arch/x86/hvm/svm/svm.c Wed Mar 07 16:21:21 2007 +0000 2.2 +++ b/xen/arch/x86/hvm/svm/svm.c Thu Mar 08 10:54:56 2007 +0000 2.3 @@ -49,6 +49,7 @@ 2.4 #include <public/sched.h> 2.5 #include <asm/hvm/vpt.h> 2.6 #include <asm/hvm/trace.h> 2.7 +#include <asm/hap.h> 2.8 2.9 #define SVM_EXTRA_DEBUG 2.10 2.11 @@ -76,6 +77,10 @@ static void *root_vmcb[NR_CPUS] __read_m 2.12 /* physical address of above for host VMSAVE/VMLOAD */ 2.13 u64 root_vmcb_pa[NR_CPUS] __read_mostly; 2.14 2.15 +/* hardware assisted paging bits */ 2.16 +extern int opt_hap_enabled; 2.17 +extern int hap_capable_system; 2.18 + 2.19 static inline void svm_inject_exception(struct vcpu *v, int trap, 2.20 int ev, int error_code) 2.21 { 2.22 @@ -240,7 +245,9 @@ static inline int long_mode_do_msr_write 2.23 /* 2.24 * Check for EFER.LME transitions from 0->1 or 1->0. Do the 2.25 * sanity checks and then make sure that both EFER.LME and 2.26 - * EFER.LMA are cleared. 2.27 + * EFER.LMA are cleared. (EFER.LME can't be set in the vmcb 2.28 + * until the guest also sets CR0.PG, since even if the guest has 2.29 + * paging "disabled", the vmcb's CR0 always has PG set.) 2.30 */ 2.31 if ( (msr_content & EFER_LME) && !svm_lme_is_set(v) ) 2.32 { 2.33 @@ -269,10 +276,12 @@ static inline int long_mode_do_msr_write 2.34 2.35 vmcb->efer &= ~(EFER_LME | EFER_LMA); 2.36 } 2.37 + 2.38 #endif /* __x86_64__ */ 2.39 2.40 /* update the guest EFER's shadow with the intended value */ 2.41 v->arch.hvm_svm.cpu_shadow_efer = msr_content; 2.42 + 2.43 break; 2.44 2.45 #ifdef __x86_64__ 2.46 @@ -902,6 +911,10 @@ static void arch_svm_do_launch(struct vc 2.47 { 2.48 svm_do_launch(v); 2.49 2.50 + if ( paging_mode_hap(v->domain) ) { 2.51 + v->arch.hvm_svm.vmcb->h_cr3 = pagetable_get_paddr(v->domain->arch.phys_table); 2.52 + } 2.53 + 2.54 if ( v->vcpu_id != 0 ) 2.55 { 2.56 cpu_user_regs_t *regs = ¤t->arch.guest_context.user_regs; 2.57 @@ -1008,6 +1021,21 @@ static struct hvm_function_table svm_fun 2.58 .event_injection_faulted = svm_event_injection_faulted 2.59 }; 2.60 2.61 +void svm_npt_detect(void) 2.62 +{ 2.63 + u32 eax, ebx, ecx, edx; 2.64 + 2.65 + /* check CPUID for nested paging support */ 2.66 + cpuid(0x8000000A, &eax, &ebx, &ecx, &edx); 2.67 + if ( edx & 0x01 ) { /* nested paging */ 2.68 + hap_capable_system = 1; 2.69 + } 2.70 + else if ( opt_hap_enabled ) { 2.71 + printk(" nested paging is not supported by this CPU.\n"); 2.72 + hap_capable_system = 0; /* no nested paging, we disable flag. */ 2.73 + } 2.74 +} 2.75 + 2.76 int start_svm(void) 2.77 { 2.78 u32 eax, ecx, edx; 2.79 @@ -1038,6 +1066,8 @@ int start_svm(void) 2.80 wrmsr(MSR_EFER, eax, edx); 2.81 printk("AMD SVM Extension is enabled for cpu %d.\n", cpu ); 2.82 2.83 + svm_npt_detect(); 2.84 + 2.85 /* Initialize the HSA for this core */ 2.86 phys_hsa = (u64) virt_to_maddr(hsa[cpu]); 2.87 phys_hsa_lo = (u32) phys_hsa; 2.88 @@ -1074,6 +1104,18 @@ void arch_svm_do_resume(struct vcpu *v) 2.89 } 2.90 } 2.91 2.92 +static int svm_do_nested_pgfault(paddr_t gpa, struct cpu_user_regs *regs) 2.93 +{ 2.94 + if (mmio_space(gpa)) { 2.95 + handle_mmio(gpa); 2.96 + return 1; 2.97 + } 2.98 + 2.99 + /* We should not reach here. Otherwise, P2M table is not correct.*/ 2.100 + return 0; 2.101 +} 2.102 + 2.103 + 2.104 static int svm_do_page_fault(unsigned long va, struct cpu_user_regs *regs) 2.105 { 2.106 HVM_DBG_LOG(DBG_LEVEL_VMMU, 2.107 @@ -1700,6 +1742,52 @@ static void svm_io_instruction(struct vc 2.108 } 2.109 } 2.110 2.111 +static int npt_set_cr0(unsigned long value) 2.112 +{ 2.113 + struct vcpu *v = current; 2.114 + struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; 2.115 + 2.116 + ASSERT(vmcb); 2.117 + 2.118 + /* ET is reserved and should be always be 1*/ 2.119 + value |= X86_CR0_ET; 2.120 + 2.121 + /* Check whether the guest is about to turn on long mode. 2.122 + * If it is, set EFER.LME and EFER.LMA. Update the shadow EFER.LMA 2.123 + * bit too, so svm_long_mode_enabled() will work. 2.124 + */ 2.125 + if ( (value & X86_CR0_PG) && svm_lme_is_set(v) && 2.126 + (vmcb->cr4 & X86_CR4_PAE) && (vmcb->cr0 & X86_CR0_PE) ) 2.127 + { 2.128 + v->arch.hvm_svm.cpu_shadow_efer |= EFER_LMA; 2.129 + vmcb->efer |= EFER_LMA | EFER_LME; 2.130 + } 2.131 + 2.132 + /* Whenever CR0.PG is cleared under long mode, LMA will be cleared 2.133 + * immediatly. We emulate this process for svm_long_mode_enabled(). 2.134 + */ 2.135 + if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PE ) 2.136 + { 2.137 + if ( svm_long_mode_enabled(v) ) 2.138 + { 2.139 + v->arch.hvm_svm.cpu_shadow_efer &= ~EFER_LMA; 2.140 + } 2.141 + } 2.142 + 2.143 + vmcb->cr0 = value | X86_CR0_WP; 2.144 + v->arch.hvm_svm.cpu_shadow_cr0 = value; 2.145 + 2.146 + /* TS cleared? Then initialise FPU now. */ 2.147 + if ( !(value & X86_CR0_TS) ) { 2.148 + setup_fpu(v); 2.149 + vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_NM; 2.150 + } 2.151 + 2.152 + paging_update_paging_modes(v); 2.153 + 2.154 + return 1; 2.155 +} 2.156 + 2.157 static int svm_set_cr0(unsigned long value) 2.158 { 2.159 struct vcpu *v = current; 2.160 @@ -1797,6 +1885,85 @@ static int svm_set_cr0(unsigned long val 2.161 return 1; 2.162 } 2.163 2.164 +// 2.165 +// nested paging functions 2.166 +// 2.167 + 2.168 +static int npt_mov_to_cr(int gpreg, int cr, struct cpu_user_regs *regs) 2.169 +{ 2.170 + unsigned long value; 2.171 + struct vcpu *v = current; 2.172 + struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; 2.173 + struct vlapic *vlapic = vcpu_vlapic(v); 2.174 + 2.175 + ASSERT(vmcb); 2.176 + 2.177 + value = get_reg(gpreg, regs, vmcb); 2.178 + 2.179 + switch (cr) { 2.180 + case 0: 2.181 + return npt_set_cr0(value); 2.182 + 2.183 + case 3: 2.184 + vmcb->cr3 = value; 2.185 + v->arch.hvm_svm.cpu_cr3 = value; 2.186 + break; 2.187 + 2.188 + case 4: /* CR4 */ 2.189 + vmcb->cr4 = value; 2.190 + v->arch.hvm_svm.cpu_shadow_cr4 = value; 2.191 + paging_update_paging_modes(v); 2.192 + break; 2.193 + 2.194 + case 8: 2.195 + vlapic_set_reg(vlapic, APIC_TASKPRI, ((value & 0x0F) << 4)); 2.196 + vmcb->vintr.fields.tpr = value & 0x0F; 2.197 + break; 2.198 + 2.199 + default: 2.200 + gdprintk(XENLOG_ERR, "invalid cr: %d\n", cr); 2.201 + domain_crash(v->domain); 2.202 + return 0; 2.203 + } 2.204 + 2.205 + return 1; 2.206 +} 2.207 + 2.208 +static void npt_mov_from_cr(int cr, int gp, struct cpu_user_regs *regs) 2.209 +{ 2.210 + unsigned long value = 0; 2.211 + struct vcpu *v = current; 2.212 + struct vmcb_struct *vmcb; 2.213 + struct vlapic *vlapic = vcpu_vlapic(v); 2.214 + 2.215 + vmcb = v->arch.hvm_svm.vmcb; 2.216 + ASSERT(vmcb); 2.217 + 2.218 + switch(cr) { 2.219 + case 0: 2.220 + value = (unsigned long) v->arch.hvm_svm.cpu_shadow_cr0; 2.221 + break; 2.222 + case 2: 2.223 + value = vmcb->cr2; 2.224 + break; 2.225 + case 3: 2.226 + value = (unsigned long) v->arch.hvm_svm.cpu_cr3; 2.227 + break; 2.228 + case 4: 2.229 + value = (unsigned long) v->arch.hvm_svm.cpu_shadow_cr4; 2.230 + break; 2.231 + case 8: 2.232 + value = (unsigned long)vlapic_get_reg(vlapic, APIC_TASKPRI); 2.233 + value = (value & 0xF0) >> 4; 2.234 + break; 2.235 + default: 2.236 + domain_crash(v->domain); 2.237 + return; 2.238 + } 2.239 + 2.240 + set_reg(gp, value, regs, vmcb); 2.241 +} 2.242 + 2.243 /* 2.244 * Read from control registers. CR0 and CR4 are read from the shadow. 2.245 */ 2.246 @@ -2043,12 +2210,18 @@ static int svm_cr_access(struct vcpu *v, 2.247 { 2.248 case INSTR_MOV2CR: 2.249 gpreg = decode_src_reg(prefix, buffer[index+2]); 2.250 - result = mov_to_cr(gpreg, cr, regs); 2.251 + if ( paging_mode_hap(v->domain) ) 2.252 + result = npt_mov_to_cr(gpreg, cr, regs); 2.253 + else 2.254 + result = mov_to_cr(gpreg, cr, regs); 2.255 break; 2.256 2.257 case INSTR_MOVCR2: 2.258 gpreg = decode_src_reg(prefix, buffer[index+2]); 2.259 - mov_from_cr(cr, gpreg, regs); 2.260 + if ( paging_mode_hap(v->domain) ) 2.261 + npt_mov_from_cr(cr, gpreg, regs); 2.262 + else 2.263 + mov_from_cr(cr, gpreg, regs); 2.264 break; 2.265 2.266 case INSTR_CLTS: 2.267 @@ -2075,7 +2248,10 @@ static int svm_cr_access(struct vcpu *v, 2.268 if (svm_dbg_on) 2.269 printk("CR0-LMSW CR0 - New value=%lx\n", value); 2.270 2.271 - result = svm_set_cr0(value); 2.272 + if ( paging_mode_hap(v->domain) ) 2.273 + result = npt_set_cr0(value); 2.274 + else 2.275 + result = svm_set_cr0(value); 2.276 break; 2.277 2.278 case INSTR_SMSW: 2.279 @@ -2359,6 +2535,11 @@ static int svm_do_vmmcall_reset_to_realm 2.280 vmcb->cr4 = SVM_CR4_HOST_MASK; 2.281 v->arch.hvm_svm.cpu_shadow_cr4 = 0; 2.282 2.283 + if ( paging_mode_hap(v->domain) ) { 2.284 + vmcb->cr0 = v->arch.hvm_svm.cpu_shadow_cr0; 2.285 + vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4; 2.286 + } 2.287 + 2.288 /* This will jump to ROMBIOS */ 2.289 vmcb->rip = 0xFFF0; 2.290 2.291 @@ -3042,6 +3223,15 @@ asmlinkage void svm_vmexit_handler(struc 2.292 hvm_triple_fault(); 2.293 break; 2.294 2.295 + case VMEXIT_NPF: 2.296 + { 2.297 + regs->error_code = vmcb->exitinfo1; 2.298 + if ( !svm_do_nested_pgfault(vmcb->exitinfo2, regs) ) { 2.299 + domain_crash(v->domain); 2.300 + } 2.301 + break; 2.302 + } 2.303 + 2.304 default: 2.305 exit_and_crash: 2.306 gdprintk(XENLOG_ERR, "unexpected VMEXIT: exit reason = 0x%x, "
3.1 --- a/xen/arch/x86/hvm/svm/vmcb.c Wed Mar 07 16:21:21 2007 +0000 3.2 +++ b/xen/arch/x86/hvm/svm/vmcb.c Thu Mar 08 10:54:56 2007 +0000 3.3 @@ -201,6 +201,13 @@ static int construct_vmcb(struct vcpu *v 3.4 3.5 arch_svm->vmcb->exception_intercepts = MONITOR_DEFAULT_EXCEPTION_BITMAP; 3.6 3.7 + if ( paging_mode_hap(v->domain) ) { 3.8 + vmcb->cr0 = arch_svm->cpu_shadow_cr0; 3.9 + vmcb->np_enable = 1; /* enable nested paging */ 3.10 + vmcb->g_pat = 0x0007040600070406ULL; /* guest PAT */ 3.11 + vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_PG; 3.12 + } 3.13 + 3.14 return 0; 3.15 } 3.16 3.17 @@ -310,7 +317,8 @@ void svm_dump_vmcb(const char *from, str 3.18 printk("KernGSBase = 0x%016llx PAT = 0x%016llx \n", 3.19 (unsigned long long) vmcb->kerngsbase, 3.20 (unsigned long long) vmcb->g_pat); 3.21 - 3.22 + printk("H_CR3 = 0x%016llx\n", (unsigned long long)vmcb->h_cr3); 3.23 + 3.24 /* print out all the selectors */ 3.25 svm_dump_sel("CS", &vmcb->cs); 3.26 svm_dump_sel("DS", &vmcb->ds);
4.1 --- a/xen/arch/x86/mm/Makefile Wed Mar 07 16:21:21 2007 +0000 4.2 +++ b/xen/arch/x86/mm/Makefile Thu Mar 08 10:54:56 2007 +0000 4.3 @@ -1,4 +1,5 @@ 4.4 subdir-y += shadow 4.5 +subdir-y += hap 4.6 4.7 obj-y += paging.o 4.8 obj-y += p2m.o
5.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 5.2 +++ b/xen/arch/x86/mm/hap/Makefile Thu Mar 08 10:54:56 2007 +0000 5.3 @@ -0,0 +1,2 @@ 5.4 +obj-y += hap.o 5.5 +obj-y += support.o
6.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 6.2 +++ b/xen/arch/x86/mm/hap/hap.c Thu Mar 08 10:54:56 2007 +0000 6.3 @@ -0,0 +1,708 @@ 6.4 +/****************************************************************************** 6.5 + * arch/x86/mm/hap/hap.c 6.6 + * 6.7 + * hardware assisted paging 6.8 + * Copyright (c) 2007 Advanced Micro Devices (Wei Huang) 6.9 + * Parts of this code are Copyright (c) 2007 by XenSource Inc. 6.10 + * 6.11 + * This program is free software; you can redistribute it and/or modify 6.12 + * it under the terms of the GNU General Public License as published by 6.13 + * the Free Software Foundation; either version 2 of the License, or 6.14 + * (at your option) any later version. 6.15 + * 6.16 + * This program is distributed in the hope that it will be useful, 6.17 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 6.18 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 6.19 + * GNU General Public License for more details. 6.20 + * 6.21 + * You should have received a copy of the GNU General Public License 6.22 + * along with this program; if not, write to the Free Software 6.23 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 6.24 + */ 6.25 + 6.26 +#include <xen/config.h> 6.27 +#include <xen/types.h> 6.28 +#include <xen/mm.h> 6.29 +#include <xen/trace.h> 6.30 +#include <xen/sched.h> 6.31 +#include <xen/perfc.h> 6.32 +#include <xen/irq.h> 6.33 +#include <xen/domain_page.h> 6.34 +#include <xen/guest_access.h> 6.35 +#include <xen/keyhandler.h> 6.36 +#include <asm/event.h> 6.37 +#include <asm/page.h> 6.38 +#include <asm/current.h> 6.39 +#include <asm/flushtlb.h> 6.40 +#include <asm/shared.h> 6.41 +#include <asm/hap.h> 6.42 +#include <asm/paging.h> 6.43 +#include <asm/domain.h> 6.44 + 6.45 +#include "private.h" 6.46 + 6.47 +/* Override macros from asm/page.h to make them work with mfn_t */ 6.48 +#undef mfn_to_page 6.49 +#define mfn_to_page(_m) (frame_table + mfn_x(_m)) 6.50 +#undef mfn_valid 6.51 +#define mfn_valid(_mfn) (mfn_x(_mfn) < max_page) 6.52 +#undef page_to_mfn 6.53 +#define page_to_mfn(_pg) (_mfn((_pg) - frame_table)) 6.54 + 6.55 +/************************************************/ 6.56 +/* HAP SUPPORT FUNCTIONS */ 6.57 +/************************************************/ 6.58 +mfn_t hap_alloc(struct domain *d, unsigned long backpointer) 6.59 +{ 6.60 + struct page_info *sp = NULL; 6.61 + void *p; 6.62 + 6.63 + ASSERT(hap_locked_by_me(d)); 6.64 + 6.65 + sp = list_entry(d->arch.paging.hap.freelists.next, struct page_info, list); 6.66 + list_del(&sp->list); 6.67 + d->arch.paging.hap.free_pages -= 1; 6.68 + 6.69 + /* Now safe to clear the page for reuse */ 6.70 + p = hap_map_domain_page(page_to_mfn(sp)); 6.71 + ASSERT(p != NULL); 6.72 + clear_page(p); 6.73 + hap_unmap_domain_page(p); 6.74 + 6.75 + return page_to_mfn(sp); 6.76 +} 6.77 + 6.78 +void hap_free(struct domain *d, mfn_t smfn) 6.79 +{ 6.80 + struct page_info *sp = mfn_to_page(smfn); 6.81 + 6.82 + ASSERT(hap_locked_by_me(d)); 6.83 + 6.84 + d->arch.paging.hap.free_pages += 1; 6.85 + list_add_tail(&sp->list, &d->arch.paging.hap.freelists); 6.86 +} 6.87 + 6.88 +static int hap_alloc_p2m_pages(struct domain *d) 6.89 +{ 6.90 + struct page_info *pg; 6.91 + 6.92 + ASSERT(hap_locked_by_me(d)); 6.93 + 6.94 + pg = mfn_to_page(hap_alloc(d, 0)); 6.95 + d->arch.paging.hap.p2m_pages += 1; 6.96 + d->arch.paging.hap.total_pages -= 1; 6.97 + 6.98 + page_set_owner(pg, d); 6.99 + pg->count_info = 1; 6.100 + list_add_tail(&pg->list, &d->arch.paging.hap.p2m_freelist); 6.101 + 6.102 + return 1; 6.103 +} 6.104 + 6.105 +struct page_info * hap_alloc_p2m_page(struct domain *d) 6.106 +{ 6.107 + struct list_head *entry; 6.108 + struct page_info *pg; 6.109 + mfn_t mfn; 6.110 + void *p; 6.111 + 6.112 + hap_lock(d); 6.113 + 6.114 + if ( list_empty(&d->arch.paging.hap.p2m_freelist) && 6.115 + !hap_alloc_p2m_pages(d) ) { 6.116 + hap_unlock(d); 6.117 + return NULL; 6.118 + } 6.119 + entry = d->arch.paging.hap.p2m_freelist.next; 6.120 + list_del(entry); 6.121 + 6.122 + hap_unlock(d); 6.123 + 6.124 + pg = list_entry(entry, struct page_info, list); 6.125 + mfn = page_to_mfn(pg); 6.126 + p = hap_map_domain_page(mfn); 6.127 + clear_page(p); 6.128 + hap_unmap_domain_page(p); 6.129 + 6.130 + return pg; 6.131 +} 6.132 + 6.133 +void hap_free_p2m_page(struct domain *d, struct page_info *pg) 6.134 +{ 6.135 + ASSERT(page_get_owner(pg) == d); 6.136 + /* Should have just the one ref we gave it in alloc_p2m_page() */ 6.137 + if ( (pg->count_info & PGC_count_mask) != 1 ) { 6.138 + HAP_ERROR("Odd p2m page count c=%#x t=%"PRtype_info"\n", 6.139 + pg->count_info, pg->u.inuse.type_info); 6.140 + } 6.141 + /* Free should not decrement domain's total allocation, since 6.142 + * these pages were allocated without an owner. */ 6.143 + page_set_owner(pg, NULL); 6.144 + free_domheap_pages(pg, 0); 6.145 + d->arch.paging.hap.p2m_pages--; 6.146 +} 6.147 + 6.148 +/* Return the size of the pool, rounded up to the nearest MB */ 6.149 +static unsigned int 6.150 +hap_get_allocation(struct domain *d) 6.151 +{ 6.152 + unsigned int pg = d->arch.paging.hap.total_pages; 6.153 + 6.154 + HERE_I_AM; 6.155 + return ((pg >> (20 - PAGE_SHIFT)) 6.156 + + ((pg & ((1 << (20 - PAGE_SHIFT)) - 1)) ? 1 : 0)); 6.157 +} 6.158 + 6.159 +/* Set the pool of pages to the required number of pages. 6.160 + * Returns 0 for success, non-zero for failure. */ 6.161 +static unsigned int 6.162 +hap_set_allocation(struct domain *d, unsigned int pages, int *preempted) 6.163 +{ 6.164 + struct page_info *sp; 6.165 + 6.166 + ASSERT(hap_locked_by_me(d)); 6.167 + 6.168 + while ( d->arch.paging.hap.total_pages != pages ) { 6.169 + if ( d->arch.paging.hap.total_pages < pages ) { 6.170 + /* Need to allocate more memory from domheap */ 6.171 + sp = alloc_domheap_pages(NULL, 0, 0); 6.172 + if ( sp == NULL ) { 6.173 + HAP_PRINTK("failed to allocate hap pages.\n"); 6.174 + return -ENOMEM; 6.175 + } 6.176 + d->arch.paging.hap.free_pages += 1; 6.177 + d->arch.paging.hap.total_pages += 1; 6.178 + list_add_tail(&sp->list, &d->arch.paging.hap.freelists); 6.179 + } 6.180 + else if ( d->arch.paging.hap.total_pages > pages ) { 6.181 + /* Need to return memory to domheap */ 6.182 + ASSERT(!list_empty(&d->arch.paging.hap.freelists)); 6.183 + sp = list_entry(d->arch.paging.hap.freelists.next, 6.184 + struct page_info, list); 6.185 + list_del(&sp->list); 6.186 + d->arch.paging.hap.free_pages -= 1; 6.187 + d->arch.paging.hap.total_pages -= 1; 6.188 + free_domheap_pages(sp, 0); 6.189 + } 6.190 + 6.191 + /* Check to see if we need to yield and try again */ 6.192 + if ( preempted && hypercall_preempt_check() ) { 6.193 + *preempted = 1; 6.194 + return 0; 6.195 + } 6.196 + } 6.197 + 6.198 + return 0; 6.199 +} 6.200 + 6.201 +#if CONFIG_PAGING_LEVELS == 4 6.202 +void hap_install_xen_entries_in_l4(struct vcpu *v, mfn_t gl4mfn, mfn_t sl4mfn) 6.203 +{ 6.204 + struct domain *d = v->domain; 6.205 + l4_pgentry_t *sl4e; 6.206 + 6.207 + sl4e = hap_map_domain_page(sl4mfn); 6.208 + ASSERT(sl4e != NULL); 6.209 + 6.210 + /* Copy the common Xen mappings from the idle domain */ 6.211 + memcpy(&sl4e[ROOT_PAGETABLE_FIRST_XEN_SLOT], 6.212 + &idle_pg_table[ROOT_PAGETABLE_FIRST_XEN_SLOT], 6.213 + ROOT_PAGETABLE_XEN_SLOTS * sizeof(l4_pgentry_t)); 6.214 + 6.215 + /* Install the per-domain mappings for this domain */ 6.216 + sl4e[l4_table_offset(PERDOMAIN_VIRT_START)] = 6.217 + l4e_from_pfn(mfn_x(page_to_mfn(virt_to_page(d->arch.mm_perdomain_l3))), 6.218 + __PAGE_HYPERVISOR); 6.219 + 6.220 + sl4e[l4_table_offset(LINEAR_PT_VIRT_START)] = 6.221 + l4e_from_pfn(mfn_x(gl4mfn), __PAGE_HYPERVISOR); 6.222 + 6.223 + /* install domain-specific P2M table */ 6.224 + sl4e[l4_table_offset(RO_MPT_VIRT_START)] = 6.225 + l4e_from_pfn(mfn_x(pagetable_get_mfn(d->arch.phys_table)), 6.226 + __PAGE_HYPERVISOR); 6.227 + 6.228 + hap_unmap_domain_page(sl4e); 6.229 +} 6.230 +#endif /* CONFIG_PAGING_LEVELS == 4 */ 6.231 + 6.232 +#if CONFIG_PAGING_LEVELS == 3 6.233 +void hap_install_xen_entries_in_l2h(struct vcpu *v, mfn_t sl2hmfn) 6.234 +{ 6.235 + struct domain *d = v->domain; 6.236 + l2_pgentry_t *sl2e; 6.237 + 6.238 + int i; 6.239 + 6.240 + sl2e = hap_map_domain_page(sl2hmfn); 6.241 + ASSERT(sl2e != NULL); 6.242 + 6.243 + /* Copy the common Xen mappings from the idle domain */ 6.244 + memcpy(&sl2e[L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1)], 6.245 + &idle_pg_table_l2[L2_PAGETABLE_FIRST_XEN_SLOT], 6.246 + L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t)); 6.247 + 6.248 + /* Install the per-domain mappings for this domain */ 6.249 + for ( i = 0; i < PDPT_L2_ENTRIES; i++ ) 6.250 + sl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i] = 6.251 + l2e_from_pfn( 6.252 + mfn_x(page_to_mfn(virt_to_page(d->arch.mm_perdomain_pt) + i)), 6.253 + __PAGE_HYPERVISOR); 6.254 + 6.255 + for ( i = 0; i < HAP_L3_PAGETABLE_ENTRIES; i++ ) 6.256 + sl2e[l2_table_offset(LINEAR_PT_VIRT_START) + i] = 6.257 + l2e_empty(); 6.258 + 6.259 + if ( paging_mode_translate(d) ) 6.260 + { 6.261 + /* Install the domain-specific p2m table */ 6.262 + l3_pgentry_t *p2m; 6.263 + ASSERT(pagetable_get_pfn(d->arch.phys_table) != 0); 6.264 + p2m = hap_map_domain_page(pagetable_get_mfn(d->arch.phys_table)); 6.265 + for ( i = 0; i < MACHPHYS_MBYTES>>1; i++ ) 6.266 + { 6.267 + sl2e[l2_table_offset(RO_MPT_VIRT_START) + i] = 6.268 + (l3e_get_flags(p2m[i]) & _PAGE_PRESENT) 6.269 + ? l2e_from_pfn(mfn_x(_mfn(l3e_get_pfn(p2m[i]))), 6.270 + __PAGE_HYPERVISOR) 6.271 + : l2e_empty(); 6.272 + } 6.273 + hap_unmap_domain_page(p2m); 6.274 + } 6.275 + 6.276 + hap_unmap_domain_page(sl2e); 6.277 +} 6.278 +#endif 6.279 + 6.280 +#if CONFIG_PAGING_LEVELS == 2 6.281 +void hap_install_xen_entries_in_l2(struct vcpu *v, mfn_t gl2mfn, mfn_t sl2mfn) 6.282 +{ 6.283 + struct domain *d = v->domain; 6.284 + l2_pgentry_t *sl2e; 6.285 + int i; 6.286 + 6.287 + sl2e = hap_map_domain_page(sl2mfn); 6.288 + ASSERT(sl2e != NULL); 6.289 + 6.290 + /* Copy the common Xen mappings from the idle domain */ 6.291 + memcpy(&sl2e[L2_PAGETABLE_FIRST_XEN_SLOT], 6.292 + &idle_pg_table[L2_PAGETABLE_FIRST_XEN_SLOT], 6.293 + L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t)); 6.294 + 6.295 + /* Install the per-domain mappings for this domain */ 6.296 + for ( i = 0; i < PDPT_L2_ENTRIES; i++ ) 6.297 + sl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i] = 6.298 + l2e_from_pfn( 6.299 + mfn_x(page_to_mfn(virt_to_page(d->arch.mm_perdomain_pt) + i)), 6.300 + __PAGE_HYPERVISOR); 6.301 + 6.302 + 6.303 + sl2e[l2_table_offset(LINEAR_PT_VIRT_START)] = 6.304 + l2e_from_pfn(mfn_x(gl2mfn), __PAGE_HYPERVISOR); 6.305 + 6.306 + /* install domain-specific P2M table */ 6.307 + sl2e[l2_table_offset(RO_MPT_VIRT_START)] = 6.308 + l2e_from_pfn(mfn_x(pagetable_get_mfn(d->arch.phys_table)), 6.309 + __PAGE_HYPERVISOR); 6.310 + 6.311 + hap_unmap_domain_page(sl2e); 6.312 +} 6.313 +#endif 6.314 + 6.315 +mfn_t hap_make_monitor_table(struct vcpu *v) 6.316 +{ 6.317 + struct domain *d = v->domain; 6.318 + 6.319 + ASSERT(pagetable_get_pfn(v->arch.monitor_table) == 0); 6.320 + 6.321 +#if CONFIG_PAGING_LEVELS == 4 6.322 + { 6.323 + mfn_t m4mfn; 6.324 + m4mfn = hap_alloc(d, 0); 6.325 + hap_install_xen_entries_in_l4(v, m4mfn, m4mfn); 6.326 + return m4mfn; 6.327 + } 6.328 +#elif CONFIG_PAGING_LEVELS == 3 6.329 + { 6.330 + mfn_t m3mfn, m2mfn; 6.331 + l3_pgentry_t *l3e; 6.332 + l2_pgentry_t *l2e; 6.333 + int i; 6.334 + 6.335 + m3mfn = hap_alloc(d, 0); 6.336 + 6.337 + /* Install a monitor l2 table in slot 3 of the l3 table. 6.338 + * This is used for all Xen entries, including linear maps 6.339 + */ 6.340 + m2mfn = hap_alloc(d, 0); 6.341 + l3e = hap_map_domain_page(m3mfn); 6.342 + l3e[3] = l3e_from_pfn(mfn_x(m2mfn), _PAGE_PRESENT); 6.343 + hap_install_xen_entries_in_l2h(v, m2mfn); 6.344 + /* Install the monitor's own linear map */ 6.345 + l2e = hap_map_domain_page(m2mfn); 6.346 + for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ ) 6.347 + l2e[l2_table_offset(LINEAR_PT_VIRT_START) + i] = 6.348 + (l3e_get_flags(l3e[i]) & _PAGE_PRESENT) 6.349 + ? l2e_from_pfn(l3e_get_pfn(l3e[i]), __PAGE_HYPERVISOR) 6.350 + : l2e_empty(); 6.351 + hap_unmap_domain_page(l2e); 6.352 + hap_unmap_domain_page(l3e); 6.353 + 6.354 + HAP_PRINTK("new monitor table: %#lx\n", mfn_x(m3mfn)); 6.355 + return m3mfn; 6.356 + } 6.357 +#else 6.358 + { 6.359 + mfn_t m2mfn; 6.360 + 6.361 + m2mfn = hap_alloc(d, 0); 6.362 + hap_install_xen_entries_in_l2(v, m2mfn, m2mfn); 6.363 + 6.364 + return m2mfn; 6.365 + } 6.366 +#endif 6.367 +} 6.368 + 6.369 +void hap_destroy_monitor_table(struct vcpu* v, mfn_t mmfn) 6.370 +{ 6.371 + struct domain *d = v->domain; 6.372 + 6.373 +#if CONFIG_PAGING_LEVELS == 4 6.374 + /* Need to destroy the l3 monitor page in slot 0 too */ 6.375 + { 6.376 + mfn_t m3mfn; 6.377 + l4_pgentry_t *l4e = hap_map_domain_page(mmfn); 6.378 + ASSERT(l4e_get_flags(l4e[0]) & _PAGE_PRESENT); 6.379 + m3mfn = _mfn(l4e_get_pfn(l4e[0])); 6.380 + hap_free(d, m3mfn); 6.381 + hap_unmap_domain_page(l4e); 6.382 + } 6.383 +#elif CONFIG_PAGING_LEVELS == 3 6.384 + /* Need to destroy the l2 monitor page in slot 4 too */ 6.385 + { 6.386 + l3_pgentry_t *l3e = hap_map_domain_page(mmfn); 6.387 + ASSERT(l3e_get_flags(l3e[3]) & _PAGE_PRESENT); 6.388 + hap_free(d, _mfn(l3e_get_pfn(l3e[3]))); 6.389 + hap_unmap_domain_page(l3e); 6.390 + } 6.391 +#endif 6.392 + 6.393 + /* Put the memory back in the pool */ 6.394 + hap_free(d, mmfn); 6.395 +} 6.396 + 6.397 +/************************************************/ 6.398 +/* HAP DOMAIN LEVEL FUNCTIONS */ 6.399 +/************************************************/ 6.400 +void hap_domain_init(struct domain *d) 6.401 +{ 6.402 + hap_lock_init(d); 6.403 + INIT_LIST_HEAD(&d->arch.paging.hap.freelists); 6.404 + INIT_LIST_HEAD(&d->arch.paging.hap.p2m_freelist); 6.405 +} 6.406 + 6.407 +/* return 0 for success, -errno for failure */ 6.408 +int hap_enable(struct domain *d, u32 mode) 6.409 +{ 6.410 + unsigned int old_pages; 6.411 + int rv = 0; 6.412 + 6.413 + HERE_I_AM; 6.414 + 6.415 + domain_pause(d); 6.416 + /* error check */ 6.417 + if ( (d == current->domain) ) { 6.418 + rv = -EINVAL; 6.419 + goto out; 6.420 + } 6.421 + 6.422 + old_pages = d->arch.paging.hap.total_pages; 6.423 + if ( old_pages == 0 ) { 6.424 + unsigned int r; 6.425 + hap_lock(d); 6.426 + r = hap_set_allocation(d, 256, NULL); 6.427 + hap_unlock(d); 6.428 + if ( r != 0 ) { 6.429 + hap_set_allocation(d, 0, NULL); 6.430 + rv = -ENOMEM; 6.431 + goto out; 6.432 + } 6.433 + } 6.434 + 6.435 + /* allocate P2m table */ 6.436 + if ( mode & PG_translate ) { 6.437 + rv = p2m_alloc_table(d, hap_alloc_p2m_page, hap_free_p2m_page); 6.438 + if ( rv != 0 ) 6.439 + goto out; 6.440 + } 6.441 + 6.442 + d->arch.paging.mode = mode | PG_SH_enable; 6.443 + 6.444 + out: 6.445 + domain_unpause(d); 6.446 + return rv; 6.447 +} 6.448 + 6.449 +void hap_final_teardown(struct domain *d) 6.450 +{ 6.451 + HERE_I_AM; 6.452 + 6.453 + if ( d->arch.paging.hap.total_pages != 0 ) 6.454 + hap_teardown(d); 6.455 + 6.456 + p2m_teardown(d); 6.457 +} 6.458 + 6.459 +void hap_teardown(struct domain *d) 6.460 +{ 6.461 + struct vcpu *v; 6.462 + mfn_t mfn; 6.463 + HERE_I_AM; 6.464 + 6.465 + ASSERT(test_bit(_DOMF_dying, &d->domain_flags)); 6.466 + ASSERT(d != current->domain); 6.467 + 6.468 + if ( !hap_locked_by_me(d) ) 6.469 + hap_lock(d); /* Keep various asserts happy */ 6.470 + 6.471 + if ( paging_mode_enabled(d) ) { 6.472 + /* release the monitor table held by each vcpu */ 6.473 + for_each_vcpu(d, v) { 6.474 + if ( v->arch.paging.mode && paging_mode_external(d) ) { 6.475 + mfn = pagetable_get_mfn(v->arch.monitor_table); 6.476 + if ( mfn_valid(mfn) && (mfn_x(mfn) != 0) ) 6.477 + hap_destroy_monitor_table(v, mfn); 6.478 + v->arch.monitor_table = pagetable_null(); 6.479 + } 6.480 + } 6.481 + } 6.482 + 6.483 + if ( d->arch.paging.hap.total_pages != 0 ) { 6.484 + HAP_PRINTK("teardown of domain %u starts." 6.485 + " pages total = %u, free = %u, p2m=%u\n", 6.486 + d->domain_id, 6.487 + d->arch.paging.hap.total_pages, 6.488 + d->arch.paging.hap.free_pages, 6.489 + d->arch.paging.hap.p2m_pages); 6.490 + hap_set_allocation(d, 0, NULL); 6.491 + HAP_PRINTK("teardown done." 6.492 + " pages total = %u, free = %u, p2m=%u\n", 6.493 + d->arch.paging.hap.total_pages, 6.494 + d->arch.paging.hap.free_pages, 6.495 + d->arch.paging.hap.p2m_pages); 6.496 + ASSERT(d->arch.paging.hap.total_pages == 0); 6.497 + } 6.498 + 6.499 + d->arch.paging.mode &= ~PG_log_dirty; 6.500 + 6.501 + hap_unlock(d); 6.502 +} 6.503 + 6.504 +int hap_domctl(struct domain *d, xen_domctl_shadow_op_t *sc, 6.505 + XEN_GUEST_HANDLE(void) u_domctl) 6.506 +{ 6.507 + int rc, preempted = 0; 6.508 + 6.509 + HERE_I_AM; 6.510 + 6.511 + if ( unlikely(d == current->domain) ) { 6.512 + gdprintk(XENLOG_INFO, "Don't try to do a hap op on yourself!\n"); 6.513 + return -EINVAL; 6.514 + } 6.515 + 6.516 + switch ( sc->op ) { 6.517 + case XEN_DOMCTL_SHADOW_OP_OFF: 6.518 + case XEN_DOMCTL_SHADOW_OP_ENABLE_TEST: 6.519 + case XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY: 6.520 + case XEN_DOMCTL_SHADOW_OP_ENABLE_TRANSLATE: 6.521 + case XEN_DOMCTL_SHADOW_OP_CLEAN: 6.522 + case XEN_DOMCTL_SHADOW_OP_PEEK: 6.523 + case XEN_DOMCTL_SHADOW_OP_ENABLE: 6.524 + HAP_ERROR("Bad hap domctl op %u\n", sc->op); 6.525 + domain_crash(d); 6.526 + return -EINVAL; 6.527 + case XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION: 6.528 + hap_lock(d); 6.529 + rc = hap_set_allocation(d, sc->mb << (20 - PAGE_SHIFT), &preempted); 6.530 + hap_unlock(d); 6.531 + if ( preempted ) 6.532 + /* Not finished. Set up to re-run the call. */ 6.533 + rc = hypercall_create_continuation(__HYPERVISOR_domctl, "h", 6.534 + u_domctl); 6.535 + else 6.536 + /* Finished. Return the new allocation */ 6.537 + sc->mb = hap_get_allocation(d); 6.538 + return rc; 6.539 + case XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION: 6.540 + sc->mb = hap_get_allocation(d); 6.541 + return 0; 6.542 + default: 6.543 + HAP_ERROR("Bad hap domctl op %u\n", sc->op); 6.544 + return -EINVAL; 6.545 + } 6.546 +} 6.547 + 6.548 +void hap_vcpu_init(struct vcpu *v) 6.549 +{ 6.550 + v->arch.paging.mode = &hap_paging_real_mode; 6.551 +} 6.552 +/************************************************/ 6.553 +/* HAP PAGING MODE FUNCTIONS */ 6.554 +/************************************************/ 6.555 +/* In theory, hap should not intercept guest page fault. This function can 6.556 + * be recycled to handle host/nested page fault, if needed. 6.557 + */ 6.558 +int hap_page_fault(struct vcpu *v, unsigned long va, 6.559 + struct cpu_user_regs *regs) 6.560 +{ 6.561 + HERE_I_AM; 6.562 + domain_crash(v->domain); 6.563 + return 0; 6.564 +} 6.565 + 6.566 +/* called when guest issues a invlpg request. 6.567 + * Return 1 if need to issue page invalidation on CPU; Return 0 if does not 6.568 + * need to do so. 6.569 + */ 6.570 +int hap_invlpg(struct vcpu *v, unsigned long va) 6.571 +{ 6.572 + HERE_I_AM; 6.573 + return 0; 6.574 +} 6.575 + 6.576 +void hap_update_cr3(struct vcpu *v, int do_locking) 6.577 +{ 6.578 + struct domain *d = v->domain; 6.579 + mfn_t gmfn; 6.580 + 6.581 + HERE_I_AM; 6.582 + /* Don't do anything on an uninitialised vcpu */ 6.583 + if ( !is_hvm_domain(d) && !test_bit(_VCPUF_initialised, &v->vcpu_flags) ) { 6.584 + ASSERT(v->arch.cr3 == 0); 6.585 + return; 6.586 + } 6.587 + 6.588 + if ( do_locking ) 6.589 + hap_lock(v->domain); 6.590 + 6.591 + ASSERT(hap_locked_by_me(v->domain)); 6.592 + ASSERT(v->arch.paging.mode); 6.593 + 6.594 + gmfn = pagetable_get_mfn(v->arch.guest_table); 6.595 + 6.596 + make_cr3(v, pagetable_get_pfn(v->arch.monitor_table)); 6.597 + 6.598 + hvm_update_guest_cr3(v, pagetable_get_paddr(v->arch.monitor_table)); 6.599 + 6.600 + HAP_PRINTK("d=%u v=%u guest_table=%05lx, monitor_table = %05lx\n", 6.601 + d->domain_id, v->vcpu_id, 6.602 + (unsigned long)pagetable_get_pfn(v->arch.guest_table), 6.603 + (unsigned long)pagetable_get_pfn(v->arch.monitor_table)); 6.604 + 6.605 + flush_tlb_mask(d->domain_dirty_cpumask); 6.606 + 6.607 + if ( do_locking ) 6.608 + hap_unlock(v->domain); 6.609 +} 6.610 + 6.611 +void hap_update_paging_modes(struct vcpu *v) 6.612 +{ 6.613 + struct domain *d; 6.614 + 6.615 + HERE_I_AM; 6.616 + 6.617 + d = v->domain; 6.618 + hap_lock(d); 6.619 + 6.620 + /* update guest paging mode. Note that we rely on hvm functions to detect 6.621 + * guest's paging mode. So, make sure the shadow registers (CR0, CR4, EFER) 6.622 + * reflect guest's status correctly. 6.623 + */ 6.624 + if ( hvm_paging_enabled(v) ) { 6.625 + if ( hvm_long_mode_enabled(v) ) 6.626 + v->arch.paging.mode = &hap_paging_long_mode; 6.627 + else if ( hvm_pae_enabled(v) ) 6.628 + v->arch.paging.mode = &hap_paging_pae_mode; 6.629 + else 6.630 + v->arch.paging.mode = &hap_paging_protected_mode; 6.631 + } 6.632 + else { 6.633 + v->arch.paging.mode = &hap_paging_real_mode; 6.634 + } 6.635 + 6.636 + v->arch.paging.translate_enabled = !!hvm_paging_enabled(v); 6.637 + 6.638 + /* use p2m map */ 6.639 + v->arch.guest_table = 6.640 + pagetable_from_pfn(pagetable_get_pfn(d->arch.phys_table)); 6.641 + 6.642 + if ( pagetable_is_null(v->arch.monitor_table) ) { 6.643 + mfn_t mmfn = hap_make_monitor_table(v); 6.644 + v->arch.monitor_table = pagetable_from_mfn(mmfn); 6.645 + make_cr3(v, mfn_x(mmfn)); 6.646 + } 6.647 + 6.648 + flush_tlb_mask(d->domain_dirty_cpumask); 6.649 + hap_unlock(d); 6.650 +} 6.651 + 6.652 +void 6.653 +hap_write_p2m_entry(struct vcpu *v, unsigned long gfn, l1_pgentry_t *p, 6.654 + l1_pgentry_t new, unsigned int level) 6.655 +{ 6.656 + hap_lock(v->domain); 6.657 + safe_write_pte(p, new); 6.658 + hap_unlock(v->domain); 6.659 +} 6.660 + 6.661 +/* Entry points into this mode of the hap code. */ 6.662 +struct paging_mode hap_paging_real_mode = { 6.663 + .page_fault = hap_page_fault, 6.664 + .invlpg = hap_invlpg, 6.665 + .gva_to_gfn = hap_gva_to_gfn_real_mode, 6.666 + .update_cr3 = hap_update_cr3, 6.667 + .update_paging_modes = hap_update_paging_modes, 6.668 + .write_p2m_entry = hap_write_p2m_entry, 6.669 + .guest_levels = 1 6.670 +}; 6.671 + 6.672 +struct paging_mode hap_paging_protected_mode = { 6.673 + .page_fault = hap_page_fault, 6.674 + .invlpg = hap_invlpg, 6.675 + .gva_to_gfn = hap_gva_to_gfn_protected_mode, 6.676 + .update_cr3 = hap_update_cr3, 6.677 + .update_paging_modes = hap_update_paging_modes, 6.678 + .write_p2m_entry = hap_write_p2m_entry, 6.679 + .guest_levels = 2 6.680 +}; 6.681 + 6.682 +struct paging_mode hap_paging_pae_mode = { 6.683 + .page_fault = hap_page_fault, 6.684 + .invlpg = hap_invlpg, 6.685 + .gva_to_gfn = hap_gva_to_gfn_pae_mode, 6.686 + .update_cr3 = hap_update_cr3, 6.687 + .update_paging_modes = hap_update_paging_modes, 6.688 + .write_p2m_entry = hap_write_p2m_entry, 6.689 + .guest_levels = 3 6.690 +}; 6.691 + 6.692 +struct paging_mode hap_paging_long_mode = { 6.693 + .page_fault = hap_page_fault, 6.694 + .invlpg = hap_invlpg, 6.695 + .gva_to_gfn = hap_gva_to_gfn_long_mode, 6.696 + .update_cr3 = hap_update_cr3, 6.697 + .update_paging_modes = hap_update_paging_modes, 6.698 + .write_p2m_entry = hap_write_p2m_entry, 6.699 + .guest_levels = 4 6.700 +}; 6.701 + 6.702 +/* 6.703 + * Local variables: 6.704 + * mode: C 6.705 + * c-set-style: "BSD" 6.706 + * c-basic-offset: 4 6.707 + * indent-tabs-mode: nil 6.708 + * End: 6.709 + */ 6.710 + 6.711 +
7.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 7.2 +++ b/xen/arch/x86/mm/hap/private.h Thu Mar 08 10:54:56 2007 +0000 7.3 @@ -0,0 +1,112 @@ 7.4 +/* 7.5 + * arch/x86/mm/hap/private.h 7.6 + * 7.7 + * Copyright (c) 2007, AMD Corporation (Wei Huang) 7.8 + * 7.9 + * This program is free software; you can redistribute it and/or modify it 7.10 + * under the terms and conditions of the GNU General Public License, 7.11 + * version 2, as published by the Free Software Foundation. 7.12 + * 7.13 + * This program is distributed in the hope it will be useful, but WITHOUT 7.14 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 7.15 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 7.16 + * more details. 7.17 + * 7.18 + * You should have received a copy of the GNU General Public License along with 7.19 + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple 7.20 + * Place - Suite 330, Boston, MA 02111-1307 USA. 7.21 + * 7.22 + */ 7.23 +#ifndef __HAP_PRIVATE_H__ 7.24 +#define __HAP_PRIVATE_H__ 7.25 + 7.26 +#include <asm/flushtlb.h> 7.27 +#include <asm/hvm/support.h> 7.28 + 7.29 +/********************************************/ 7.30 +/* GUEST TRANSLATION FUNCS */ 7.31 +/********************************************/ 7.32 +unsigned long hap_gva_to_gfn_real_mode(struct vcpu *v, unsigned long gva); 7.33 +unsigned long hap_gva_to_gfn_protected_mode(struct vcpu *v, unsigned long gva); 7.34 +unsigned long hap_gva_to_gfn_pae_mode(struct vcpu *v, unsigned long gva); 7.35 +unsigned long hap_gva_to_gfn_long_mode(struct vcpu *v, unsigned long gva); 7.36 +/********************************************/ 7.37 +/* MISC DEFINITIONS */ 7.38 +/********************************************/ 7.39 + 7.40 +/* PT_SHIFT describes the amount by which a virtual address is shifted right 7.41 + * to right justify the portion to be used for indexing into a page 7.42 + * table, given the guest memory model (i.e. number of levels) and the level 7.43 + * of the page table being accessed. The idea is from Virtual Iron's code. 7.44 + */ 7.45 +static const int PT_SHIFT[][5] = 7.46 + { /* ------ level ------ nr_levels */ 7.47 + /* 1 2 3 4 */ 7.48 + { 0, 0, 0, 0, 0}, /* 0 not used */ 7.49 + { 0, 0, 0, 0, 0}, /* 1 not used */ 7.50 + { 0, 12, 22, 0, 0}, /* 2 */ 7.51 + { 0, 12, 21, 30, 0}, /* 3 */ 7.52 + { 0, 12, 21, 30, 39} /* 4 */ 7.53 + }; 7.54 + 7.55 +/* PT_ENTRIES describes the number of entries in a page table, given the 7.56 + * memory model (i.e. number of levels) and the level of the page table 7.57 + * being considered. This idea from Virtual Iron's shadow code*/ 7.58 +static const int PT_ENTRIES[][5] = 7.59 + { /* ------ level ------ nr_levels */ 7.60 + /* 1 2 3 4 */ 7.61 + { 0, 0, 0, 0, 0}, /* 0 not used */ 7.62 + { 0, 0, 0, 0, 0}, /* 1 not used */ 7.63 + { 0, 1024, 1024, 0, 0}, /* 2 */ 7.64 + { 0, 512, 512, 4, 0}, /* 3 */ 7.65 + { 0, 512, 512, 512, 512} /* 4 */ 7.66 + }; 7.67 + 7.68 +/********************************************/ 7.69 +/* PAGING DEFINITION FOR GUEST */ 7.70 +/********************************************/ 7.71 +#define PHYSICAL_PAGE_4K_SIZE (1UL << 12) 7.72 +#define PHYSICAL_PAGE_2M_SIZE (1UL << 21) 7.73 +#define PHYSICAL_PAGE_4M_SIZE (1UL << 22) 7.74 +#define PHYSICAL_PAGE_4K_MASK ( ~(PHYSICAL_PAGE_4K_SIZE - 1) ) 7.75 +#define PHYSICAL_PAGE_2M_MASK ( ~(PHYSICAL_PAGE_2M_SIZE - 1) ) 7.76 +#define PHYSICAL_PAGE_4M_MASK ( ~(PHYSICAL_PAGE_4M_SIZE - 1) ) 7.77 + 7.78 +/* long mode physical address mask */ 7.79 +#define PHYSICAL_ADDR_BITS_LM 52 7.80 +#define PHYSICAL_ADDR_MASK_LM ((1UL << PHYSICAL_ADDR_BITS_LM)-1) 7.81 +#define PHYSICAL_ADDR_2M_MASK_LM (PHYSICAL_PAGE_2M_MASK & PHYSICAL_ADDR_MASK_LM) 7.82 +#define PHYSICAL_ADDR_4K_MASK_LM (PHYSICAL_PAGE_4K_MASK & PHYSICAL_ADDR_MASK_LM) 7.83 + 7.84 +#define PAGE_NX_BIT (1ULL << 63) 7.85 +/************************************************/ 7.86 +/* PAGETABLE RELATED VARIABLES */ 7.87 +/************************************************/ 7.88 +#if CONFIG_PAGING_LEVELS == 2 7.89 +#define HAP_L1_PAGETABLE_ENTRIES 1024 7.90 +#define HAP_L2_PAGETABLE_ENTRIES 1024 7.91 +#define HAP_L1_PAGETABLE_SHIFT 12 7.92 +#define HAP_L2_PAGETABLE_SHIFT 22 7.93 +#endif 7.94 + 7.95 +#if CONFIG_PAGING_LEVELS == 3 7.96 +#define HAP_L1_PAGETABLE_ENTRIES 512 7.97 +#define HAP_L2_PAGETABLE_ENTRIES 512 7.98 +#define HAP_L3_PAGETABLE_ENTRIES 4 7.99 +#define HAP_L1_PAGETABLE_SHIFT 12 7.100 +#define HAP_L2_PAGETABLE_SHIFT 21 7.101 +#define HAP_L3_PAGETABLE_SHIFT 30 7.102 +#endif 7.103 + 7.104 +#if CONFIG_PAGING_LEVELS == 4 7.105 +#define HAP_L1_PAGETABLE_ENTRIES 512 7.106 +#define HAP_L2_PAGETABLE_ENTRIES 512 7.107 +#define HAP_L3_PAGETABLE_ENTRIES 512 7.108 +#define HAP_L4_PAGETABLE_ENTRIES 512 7.109 +#define HAP_L1_PAGETABLE_SHIFT 12 7.110 +#define HAP_L2_PAGETABLE_SHIFT 21 7.111 +#define HAP_L3_PAGETABLE_SHIFT 30 7.112 +#define HAP_L4_PAGETABLE_SHIFT 39 7.113 +#endif 7.114 + 7.115 +#endif /* __SVM_NPT_H__ */
8.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 8.2 +++ b/xen/arch/x86/mm/hap/support.c Thu Mar 08 10:54:56 2007 +0000 8.3 @@ -0,0 +1,334 @@ 8.4 +/* 8.5 + * arch/x86/mm/hap/support.c 8.6 + * 8.7 + * guest page table walker 8.8 + * Copyright (c) 2007, AMD Corporation (Wei Huang) 8.9 + * 8.10 + * This program is free software; you can redistribute it and/or modify it 8.11 + * under the terms and conditions of the GNU General Public License, 8.12 + * version 2, as published by the Free Software Foundation. 8.13 + * 8.14 + * This program is distributed in the hope it will be useful, but WITHOUT 8.15 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 8.16 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 8.17 + * more details. 8.18 + * 8.19 + * You should have received a copy of the GNU General Public License along with 8.20 + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple 8.21 + * Place - Suite 330, Boston, MA 02111-1307 USA. 8.22 + * 8.23 + */ 8.24 + 8.25 +#include <xen/config.h> 8.26 +#include <xen/types.h> 8.27 +#include <xen/mm.h> 8.28 +#include <xen/domain_page.h> 8.29 +#include <asm/page.h> 8.30 +#include <xen/event.h> 8.31 +#include <xen/sched.h> 8.32 +#include <asm/hvm/svm/vmcb.h> 8.33 +#include <asm/domain.h> 8.34 +#include <asm/shadow.h> 8.35 +#include <asm/hap.h> 8.36 + 8.37 +#include "private.h" 8.38 +#include "../page-guest32.h" 8.39 + 8.40 +/*******************************************/ 8.41 +/* Platform Specific Functions */ 8.42 +/*******************************************/ 8.43 + 8.44 +/* Translate guest virtual address to guest physical address. Specifically 8.45 + * for real mode guest. 8.46 + */ 8.47 +unsigned long hap_gva_to_gfn_real_mode(struct vcpu *v, unsigned long gva) 8.48 +{ 8.49 + HERE_I_AM; 8.50 + return ((paddr_t)gva >> PAGE_SHIFT); 8.51 +} 8.52 + 8.53 +/* Translate guest virtual address to guest physical address. Specifically 8.54 + * for protected guest. 8.55 + */ 8.56 +unsigned long hap_gva_to_gfn_protected_mode(struct vcpu *v, unsigned long gva) 8.57 +{ 8.58 + unsigned long gcr3 = hvm_get_guest_ctrl_reg(v, 3); 8.59 + int mode = 2; /* two-level guest */ 8.60 + int lev, index; 8.61 + paddr_t gpa = 0; 8.62 + unsigned long gpfn, mfn; 8.63 + int success = 1; 8.64 + l2_pgentry_32_t *l2e; /* guest page entry size is 32-bit */ 8.65 + l1_pgentry_32_t *l1e; 8.66 + 8.67 + HERE_I_AM; 8.68 + 8.69 + gpfn = (gcr3 >> PAGE_SHIFT); 8.70 + for ( lev = mode; lev >= 1; lev-- ) { 8.71 + mfn = get_mfn_from_gpfn( gpfn ); 8.72 + if ( mfn == INVALID_MFN ) { 8.73 + HAP_PRINTK("bad pfn=0x%lx from gva=0x%lx at lev%d\n", gpfn, gva, 8.74 + lev); 8.75 + success = 0; 8.76 + break; 8.77 + } 8.78 + index = (gva >> PT_SHIFT[mode][lev]) & (PT_ENTRIES[mode][lev]-1); 8.79 + 8.80 + if ( lev == 2 ) { 8.81 + l2e = map_domain_page( mfn ); 8.82 + HAP_PRINTK("l2 page table entry is %ulx at index = %d\n", 8.83 + l2e[index].l2, index); 8.84 + if ( !(l2e_get_flags_32(l2e[index]) & _PAGE_PRESENT) ) { 8.85 + HAP_PRINTK("Level 2 entry not present at index = %d\n", index); 8.86 + success = 0; 8.87 + } 8.88 + 8.89 + if ( l2e_get_flags_32(l2e[index]) & _PAGE_PSE ) { /* handle PSE */ 8.90 + HAP_PRINTK("guest page table is PSE\n"); 8.91 + if ( l2e_get_intpte(l2e[index]) & 0x001FE000UL ) { /*[13:20] */ 8.92 + printk("guest physical memory size is too large!\n"); 8.93 + domain_crash(v->domain); 8.94 + } 8.95 + gpa = (l2e_get_intpte(l2e[index]) & PHYSICAL_PAGE_4M_MASK) + 8.96 + (gva & ~PHYSICAL_PAGE_4M_MASK); 8.97 + unmap_domain_page(l2e); 8.98 + break; /* last level page table, return from here */ 8.99 + } 8.100 + else { 8.101 + gpfn = l2e_get_pfn( l2e[index] ); 8.102 + } 8.103 + unmap_domain_page(l2e); 8.104 + } 8.105 + 8.106 + if ( lev == 1 ) { 8.107 + l1e = map_domain_page( mfn ); 8.108 + HAP_PRINTK("l1 page table entry is %ulx at index = %d\n", 8.109 + l1e[index].l1, index); 8.110 + if ( !(l1e_get_flags_32(l1e[index]) & _PAGE_PRESENT) ) { 8.111 + HAP_PRINTK("Level 1 entry not present at index = %d\n", index); 8.112 + success = 0; 8.113 + } 8.114 + gpfn = l1e_get_pfn( l1e[index] ); 8.115 + gpa = (l1e_get_intpte(l1e[index]) & PHYSICAL_PAGE_4K_MASK) + 8.116 + (gva & ~PHYSICAL_PAGE_4K_MASK); 8.117 + unmap_domain_page(l1e); 8.118 + } 8.119 + 8.120 + if ( !success ) /* error happened, jump out */ 8.121 + break; 8.122 + } 8.123 + 8.124 + HAP_PRINTK("success = %d, gva = %lx, gpa = %lx\n", success, gva, gpa); 8.125 + 8.126 + if ( !success ) /* error happened */ 8.127 + return INVALID_GFN; 8.128 + else 8.129 + return ((paddr_t)gpa >> PAGE_SHIFT); 8.130 +} 8.131 + 8.132 + 8.133 + 8.134 +/* Translate guest virtual address to guest physical address. Specifically 8.135 + * for PAE mode guest. 8.136 + */ 8.137 +unsigned long hap_gva_to_gfn_pae_mode(struct vcpu *v, unsigned long gva) 8.138 +{ 8.139 +#if CONFIG_PAGING_LEVELS >= 3 8.140 + unsigned long gcr3 = hvm_get_guest_ctrl_reg(v, 3); 8.141 + int mode = 3; /* three-level guest */ 8.142 + int lev, index; 8.143 + paddr_t gpa = 0; 8.144 + unsigned long gpfn, mfn; 8.145 + int success = 1; 8.146 + l1_pgentry_t *l1e; 8.147 + l2_pgentry_t *l2e; 8.148 + l3_pgentry_t *l3e; 8.149 + 8.150 + HERE_I_AM; 8.151 + 8.152 + gpfn = (gcr3 >> PAGE_SHIFT); 8.153 + for ( lev = mode; lev >= 1; lev-- ) { 8.154 + mfn = get_mfn_from_gpfn( gpfn ); 8.155 + if ( mfn == INVALID_MFN ) { 8.156 + HAP_PRINTK("bad pfn=0x%lx from gva=0x%lx at lev%d\n", gpfn, gva, 8.157 + lev); 8.158 + success = 0; 8.159 + break; 8.160 + } 8.161 + index = (gva >> PT_SHIFT[mode][lev]) & (PT_ENTRIES[mode][lev]-1); 8.162 + 8.163 + if ( lev == 3 ) { 8.164 + l3e = map_domain_page( mfn ); 8.165 + index += ( ((gcr3 >> 5 ) & 127 ) * 4 ); 8.166 + if ( !(l3e_get_flags(l3e[index]) & _PAGE_PRESENT) ) { 8.167 + HAP_PRINTK("Level 3 entry not present at index = %d\n", index); 8.168 + success = 0; 8.169 + } 8.170 + gpfn = l3e_get_pfn( l3e[index] ); 8.171 + unmap_domain_page(l3e); 8.172 + } 8.173 + 8.174 + if ( lev == 2 ) { 8.175 + l2e = map_domain_page( mfn ); 8.176 + if ( !(l2e_get_flags(l2e[index]) & _PAGE_PRESENT) ) { 8.177 + HAP_PRINTK("Level 2 entry not present at index = %d\n", index); 8.178 + success = 0; 8.179 + } 8.180 + 8.181 + if ( l2e_get_flags(l2e[index]) & _PAGE_PSE ) { /* handle PSE */ 8.182 + HAP_PRINTK("guest page table is PSE\n"); 8.183 + gpa = (l2e_get_intpte(l2e[index]) & PHYSICAL_PAGE_2M_MASK) + 8.184 + (gva & ~PHYSICAL_PAGE_2M_MASK); 8.185 + unmap_domain_page(l2e); 8.186 + break; /* last level page table, jump out from here */ 8.187 + } 8.188 + else { 8.189 + gpfn = l2e_get_pfn(l2e[index]); 8.190 + } 8.191 + unmap_domain_page(l2e); 8.192 + } 8.193 + 8.194 + if ( lev == 1 ) { 8.195 + l1e = map_domain_page( mfn ); 8.196 + if ( !(l1e_get_flags(l1e[index]) & _PAGE_PRESENT) ) { 8.197 + HAP_PRINTK("Level 1 entry not present at index = %d\n", index); 8.198 + success = 0; 8.199 + } 8.200 + gpfn = l1e_get_pfn( l1e[index] ); 8.201 + gpa = (l1e_get_intpte(l1e[index]) & PHYSICAL_PAGE_4K_MASK) + 8.202 + (gva & ~PHYSICAL_PAGE_4K_MASK); 8.203 + unmap_domain_page(l1e); 8.204 + } 8.205 + 8.206 + if ( success != 1 ) /* error happened, jump out */ 8.207 + break; 8.208 + } 8.209 + 8.210 + gpa &= ~PAGE_NX_BIT; /* clear NX bit of guest physical address */ 8.211 + HAP_PRINTK("success = %d, gva = %lx, gpa = %lx\n", success, gva, gpa); 8.212 + 8.213 + if ( !success ) 8.214 + return INVALID_GFN; 8.215 + else 8.216 + return ((paddr_t)gpa >> PAGE_SHIFT); 8.217 +#else 8.218 + HERE_I_AM; 8.219 + printk("guest paging level (3) is greater than host paging level!\n"); 8.220 + domain_crash(v->domain); 8.221 + return INVALID_GFN; 8.222 +#endif 8.223 +} 8.224 + 8.225 + 8.226 + 8.227 +/* Translate guest virtual address to guest physical address. Specifically 8.228 + * for long mode guest. 8.229 + */ 8.230 +unsigned long hap_gva_to_gfn_long_mode(struct vcpu *v, unsigned long gva) 8.231 +{ 8.232 +#if CONFIG_PAGING_LEVELS == 4 8.233 + unsigned long gcr3 = hvm_get_guest_ctrl_reg(v, 3); 8.234 + int mode = 4; /* four-level guest */ 8.235 + int lev, index; 8.236 + paddr_t gpa = 0; 8.237 + unsigned long gpfn, mfn; 8.238 + int success = 1; 8.239 + l4_pgentry_t *l4e; 8.240 + l3_pgentry_t *l3e; 8.241 + l2_pgentry_t *l2e; 8.242 + l1_pgentry_t *l1e; 8.243 + 8.244 + HERE_I_AM; 8.245 + 8.246 + gpfn = (gcr3 >> PAGE_SHIFT); 8.247 + for ( lev = mode; lev >= 1; lev-- ) { 8.248 + mfn = get_mfn_from_gpfn( gpfn ); 8.249 + if ( mfn == INVALID_MFN ) { 8.250 + HAP_PRINTK("bad pfn=0x%lx from gva=0x%lx at lev%d\n", gpfn, gva, 8.251 + lev); 8.252 + success = 0; 8.253 + break; 8.254 + } 8.255 + index = (gva >> PT_SHIFT[mode][lev]) & (PT_ENTRIES[mode][lev]-1); 8.256 + 8.257 + if ( lev == 4 ) { 8.258 + l4e = map_domain_page( mfn ); 8.259 + if ( !(l4e_get_flags(l4e[index]) & _PAGE_PRESENT) ) { 8.260 + HAP_PRINTK("Level 4 entry not present at index = %d\n", index); 8.261 + success = 0; 8.262 + } 8.263 + gpfn = l4e_get_pfn( l4e[index] ); 8.264 + unmap_domain_page(l4e); 8.265 + } 8.266 + 8.267 + if ( lev == 3 ) { 8.268 + l3e = map_domain_page( mfn ); 8.269 + if ( !(l3e_get_flags(l3e[index]) & _PAGE_PRESENT) ) { 8.270 + HAP_PRINTK("Level 3 entry not present at index = %d\n", index); 8.271 + success = 0; 8.272 + } 8.273 + gpfn = l3e_get_pfn( l3e[index] ); 8.274 + unmap_domain_page(l3e); 8.275 + } 8.276 + 8.277 + if ( lev == 2 ) { 8.278 + l2e = map_domain_page( mfn ); 8.279 + if ( !(l2e_get_flags(l2e[index]) & _PAGE_PRESENT) ) { 8.280 + HAP_PRINTK("Level 2 entry not present at index = %d\n", index); 8.281 + success = 0; 8.282 + } 8.283 + 8.284 + if ( l2e_get_flags(l2e[index]) & _PAGE_PSE ) { /* handle PSE */ 8.285 + HAP_PRINTK("guest page table is PSE\n"); 8.286 + gpa = (l2e_get_intpte(l2e[index]) & PHYSICAL_ADDR_2M_MASK_LM) 8.287 + + (gva & ~PHYSICAL_PAGE_2M_MASK); 8.288 + unmap_domain_page(l2e); 8.289 + break; /* last level page table, jump out from here */ 8.290 + } 8.291 + else { 8.292 + gpfn = l2e_get_pfn(l2e[index]); 8.293 + } 8.294 + unmap_domain_page(l2e); 8.295 + } 8.296 + 8.297 + if ( lev == 1 ) { 8.298 + l1e = map_domain_page( mfn ); 8.299 + if ( !(l1e_get_flags(l1e[index]) & _PAGE_PRESENT) ) { 8.300 + HAP_PRINTK("Level 1 entry not present at index = %d\n", index); 8.301 + success = 0; 8.302 + } 8.303 + gpfn = l1e_get_pfn( l1e[index] ); 8.304 + gpa = (l1e_get_intpte(l1e[index]) & PHYSICAL_ADDR_4K_MASK_LM) + 8.305 + (gva & ~PHYSICAL_PAGE_4K_MASK); 8.306 + unmap_domain_page(l1e); 8.307 + } 8.308 + 8.309 + if ( success != 1 ) /* error happened, jump out */ 8.310 + break; 8.311 + } 8.312 + 8.313 + gpa &= ~PAGE_NX_BIT; /* clear NX bit of guest physical address */ 8.314 + HAP_PRINTK("success = %d, gva = %lx, gpa = %lx\n", success, gva, gpa); 8.315 + 8.316 + if ( !success ) 8.317 + return INVALID_GFN; 8.318 + else 8.319 + return ((paddr_t)gpa >> PAGE_SHIFT); 8.320 +#else 8.321 + HERE_I_AM; 8.322 + printk("guest paging level (4) is greater than host paging level!\n"); 8.323 + domain_crash(v->domain); 8.324 + return INVALID_GFN; 8.325 +#endif 8.326 +} 8.327 + 8.328 +/* 8.329 + * Local variables: 8.330 + * mode: C 8.331 + * c-set-style: "BSD" 8.332 + * c-basic-offset: 4 8.333 + * tab-width: 4 8.334 + * indent-tabs-mode: nil 8.335 + * End: 8.336 + */ 8.337 +
9.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 9.2 +++ b/xen/arch/x86/mm/page-guest32.h Thu Mar 08 10:54:56 2007 +0000 9.3 @@ -0,0 +1,100 @@ 9.4 + 9.5 +#ifndef __X86_PAGE_GUEST_H__ 9.6 +#define __X86_PAGE_GUEST_H__ 9.7 + 9.8 +#ifndef __ASSEMBLY__ 9.9 +# include <asm/types.h> 9.10 +#endif 9.11 + 9.12 +#define PAGETABLE_ORDER_32 10 9.13 +#define L1_PAGETABLE_ENTRIES_32 (1<<PAGETABLE_ORDER_32) 9.14 +#define L2_PAGETABLE_ENTRIES_32 (1<<PAGETABLE_ORDER_32) 9.15 +#define ROOT_PAGETABLE_ENTRIES_32 L2_PAGETABLE_ENTRIES_32 9.16 + 9.17 + 9.18 +#define L1_PAGETABLE_SHIFT_32 12 9.19 +#define L2_PAGETABLE_SHIFT_32 22 9.20 + 9.21 +/* Extract flags into 12-bit integer, or turn 12-bit flags into a pte mask. */ 9.22 + 9.23 +#ifndef __ASSEMBLY__ 9.24 + 9.25 +typedef u32 intpte_32_t; 9.26 + 9.27 +typedef struct { intpte_32_t l1; } l1_pgentry_32_t; 9.28 +typedef struct { intpte_32_t l2; } l2_pgentry_32_t; 9.29 +typedef l2_pgentry_t root_pgentry_32_t; 9.30 +#endif 9.31 + 9.32 +#define get_pte_flags_32(x) ((u32)(x) & 0xFFF) 9.33 +#define put_pte_flags_32(x) ((intpte_32_t)(x)) 9.34 + 9.35 +/* Get pte access flags (unsigned int). */ 9.36 +#define l1e_get_flags_32(x) (get_pte_flags_32((x).l1)) 9.37 +#define l2e_get_flags_32(x) (get_pte_flags_32((x).l2)) 9.38 + 9.39 +#define l1e_get_paddr_32(x) \ 9.40 + ((paddr_t)(((x).l1 & (PADDR_MASK&PAGE_MASK)))) 9.41 +#define l2e_get_paddr_32(x) \ 9.42 + ((paddr_t)(((x).l2 & (PADDR_MASK&PAGE_MASK)))) 9.43 + 9.44 +/* Construct an empty pte. */ 9.45 +#define l1e_empty_32() ((l1_pgentry_32_t) { 0 }) 9.46 +#define l2e_empty_32() ((l2_pgentry_32_t) { 0 }) 9.47 + 9.48 +/* Construct a pte from a pfn and access flags. */ 9.49 +#define l1e_from_pfn_32(pfn, flags) \ 9.50 + ((l1_pgentry_32_t) { ((intpte_32_t)(pfn) << PAGE_SHIFT) | put_pte_flags_32(flags) }) 9.51 +#define l2e_from_pfn_32(pfn, flags) \ 9.52 + ((l2_pgentry_32_t) { ((intpte_32_t)(pfn) << PAGE_SHIFT) | put_pte_flags_32(flags) }) 9.53 + 9.54 +/* Construct a pte from a physical address and access flags. */ 9.55 +#ifndef __ASSEMBLY__ 9.56 +static inline l1_pgentry_32_t l1e_from_paddr_32(paddr_t pa, unsigned int flags) 9.57 +{ 9.58 + ASSERT((pa & ~(PADDR_MASK & PAGE_MASK)) == 0); 9.59 + return (l1_pgentry_32_t) { pa | put_pte_flags_32(flags) }; 9.60 +} 9.61 +static inline l2_pgentry_32_t l2e_from_paddr_32(paddr_t pa, unsigned int flags) 9.62 +{ 9.63 + ASSERT((pa & ~(PADDR_MASK & PAGE_MASK)) == 0); 9.64 + return (l2_pgentry_32_t) { pa | put_pte_flags_32(flags) }; 9.65 +} 9.66 +#endif /* !__ASSEMBLY__ */ 9.67 + 9.68 + 9.69 +/* Construct a pte from a page pointer and access flags. */ 9.70 +#define l1e_from_page_32(page, flags) (l1e_from_pfn_32(page_to_mfn(page),(flags))) 9.71 +#define l2e_from_page_32(page, flags) (l2e_from_pfn_32(page_to_mfn(page),(flags))) 9.72 + 9.73 +/* Add extra flags to an existing pte. */ 9.74 +#define l1e_add_flags_32(x, flags) ((x).l1 |= put_pte_flags_32(flags)) 9.75 +#define l2e_add_flags_32(x, flags) ((x).l2 |= put_pte_flags_32(flags)) 9.76 + 9.77 +/* Remove flags from an existing pte. */ 9.78 +#define l1e_remove_flags_32(x, flags) ((x).l1 &= ~put_pte_flags_32(flags)) 9.79 +#define l2e_remove_flags_32(x, flags) ((x).l2 &= ~put_pte_flags_32(flags)) 9.80 + 9.81 +/* Check if a pte's page mapping or significant access flags have changed. */ 9.82 +#define l1e_has_changed_32(x,y,flags) \ 9.83 + ( !!(((x).l1 ^ (y).l1) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags_32(flags))) ) 9.84 +#define l2e_has_changed_32(x,y,flags) \ 9.85 + ( !!(((x).l2 ^ (y).l2) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags_32(flags))) ) 9.86 + 9.87 +/* Given a virtual address, get an entry offset into a page table. */ 9.88 +#define l1_table_offset_32(a) \ 9.89 + (((a) >> L1_PAGETABLE_SHIFT_32) & (L1_PAGETABLE_ENTRIES_32 - 1)) 9.90 +#define l2_table_offset_32(a) \ 9.91 + (((a) >> L2_PAGETABLE_SHIFT_32) & (L2_PAGETABLE_ENTRIES_32 - 1)) 9.92 + 9.93 +#endif /* __X86_PAGE_GUEST_H__ */ 9.94 + 9.95 +/* 9.96 + * Local variables: 9.97 + * mode: C 9.98 + * c-set-style: "BSD" 9.99 + * c-basic-offset: 4 9.100 + * tab-width: 4 9.101 + * indent-tabs-mode: nil 9.102 + * End: 9.103 + */
10.1 --- a/xen/arch/x86/mm/paging.c Wed Mar 07 16:21:21 2007 +0000 10.2 +++ b/xen/arch/x86/mm/paging.c Thu Mar 08 10:54:56 2007 +0000 10.3 @@ -24,10 +24,12 @@ 10.4 #include <asm/paging.h> 10.5 #include <asm/shadow.h> 10.6 #include <asm/p2m.h> 10.7 +#include <asm/hap.h> 10.8 10.9 /* Xen command-line option to enable hardware-assisted paging */ 10.10 int opt_hap_enabled = 0; 10.11 boolean_param("hap", opt_hap_enabled); 10.12 +int hap_capable_system = 0; 10.13 10.14 /* Printouts */ 10.15 #define PAGING_PRINTK(_f, _a...) \ 10.16 @@ -46,12 +48,18 @@ void paging_domain_init(struct domain *d 10.17 { 10.18 p2m_init(d); 10.19 shadow_domain_init(d); 10.20 + 10.21 + if ( opt_hap_enabled && hap_capable_system && is_hvm_domain(d) ) 10.22 + hap_domain_init(d); 10.23 } 10.24 10.25 /* vcpu paging struct initialization goes here */ 10.26 void paging_vcpu_init(struct vcpu *v) 10.27 { 10.28 - shadow_vcpu_init(v); 10.29 + if ( opt_hap_enabled && hap_capable_system && is_hvm_vcpu(v) ) 10.30 + hap_vcpu_init(v); 10.31 + else 10.32 + shadow_vcpu_init(v); 10.33 } 10.34 10.35 10.36 @@ -59,32 +67,38 @@ int paging_domctl(struct domain *d, xen_ 10.37 XEN_GUEST_HANDLE(void) u_domctl) 10.38 { 10.39 /* Here, dispatch domctl to the appropriate paging code */ 10.40 - return shadow_domctl(d, sc, u_domctl); 10.41 + if ( opt_hap_enabled && hap_capable_system && is_hvm_domain(d) ) 10.42 + return hap_domctl(d, sc, u_domctl); 10.43 + else 10.44 + return shadow_domctl(d, sc, u_domctl); 10.45 } 10.46 10.47 /* Call when destroying a domain */ 10.48 void paging_teardown(struct domain *d) 10.49 { 10.50 - shadow_teardown(d); 10.51 - /* Call other modes' teardown code here */ 10.52 + if ( opt_hap_enabled && hap_capable_system && is_hvm_domain(d) ) 10.53 + hap_teardown(d); 10.54 + else 10.55 + shadow_teardown(d); 10.56 } 10.57 10.58 /* Call once all of the references to the domain have gone away */ 10.59 void paging_final_teardown(struct domain *d) 10.60 { 10.61 - shadow_teardown(d); 10.62 - /* Call other modes' final teardown code here */ 10.63 + if ( opt_hap_enabled && hap_capable_system && is_hvm_domain(d) ) 10.64 + hap_final_teardown(d); 10.65 + else 10.66 + shadow_final_teardown(d); 10.67 } 10.68 10.69 /* Enable an arbitrary paging-assistance mode. Call once at domain 10.70 * creation. */ 10.71 int paging_enable(struct domain *d, u32 mode) 10.72 { 10.73 - if ( mode & PG_SH_enable ) 10.74 - return shadow_enable(d, mode); 10.75 + if ( opt_hap_enabled && hap_capable_system && is_hvm_domain(d) ) 10.76 + return hap_enable(d, mode | PG_HAP_enable); 10.77 else 10.78 - /* No other modes supported yet */ 10.79 - return -EINVAL; 10.80 + return shadow_enable(d, mode | PG_SH_enable); 10.81 } 10.82 10.83 /* Print paging-assistance info to the console */
11.1 --- a/xen/arch/x86/mm/shadow/page-guest32.h Wed Mar 07 16:21:21 2007 +0000 11.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 11.3 @@ -1,100 +0,0 @@ 11.4 - 11.5 -#ifndef __X86_PAGE_GUEST_H__ 11.6 -#define __X86_PAGE_GUEST_H__ 11.7 - 11.8 -#ifndef __ASSEMBLY__ 11.9 -# include <asm/types.h> 11.10 -#endif 11.11 - 11.12 -#define PAGETABLE_ORDER_32 10 11.13 -#define L1_PAGETABLE_ENTRIES_32 (1<<PAGETABLE_ORDER_32) 11.14 -#define L2_PAGETABLE_ENTRIES_32 (1<<PAGETABLE_ORDER_32) 11.15 -#define ROOT_PAGETABLE_ENTRIES_32 L2_PAGETABLE_ENTRIES_32 11.16 - 11.17 - 11.18 -#define L1_PAGETABLE_SHIFT_32 12 11.19 -#define L2_PAGETABLE_SHIFT_32 22 11.20 - 11.21 -/* Extract flags into 12-bit integer, or turn 12-bit flags into a pte mask. */ 11.22 - 11.23 -#ifndef __ASSEMBLY__ 11.24 - 11.25 -typedef u32 intpte_32_t; 11.26 - 11.27 -typedef struct { intpte_32_t l1; } l1_pgentry_32_t; 11.28 -typedef struct { intpte_32_t l2; } l2_pgentry_32_t; 11.29 -typedef l2_pgentry_t root_pgentry_32_t; 11.30 -#endif 11.31 - 11.32 -#define get_pte_flags_32(x) ((u32)(x) & 0xFFF) 11.33 -#define put_pte_flags_32(x) ((intpte_32_t)(x)) 11.34 - 11.35 -/* Get pte access flags (unsigned int). */ 11.36 -#define l1e_get_flags_32(x) (get_pte_flags_32((x).l1)) 11.37 -#define l2e_get_flags_32(x) (get_pte_flags_32((x).l2)) 11.38 - 11.39 -#define l1e_get_paddr_32(x) \ 11.40 - ((paddr_t)(((x).l1 & (PADDR_MASK&PAGE_MASK)))) 11.41 -#define l2e_get_paddr_32(x) \ 11.42 - ((paddr_t)(((x).l2 & (PADDR_MASK&PAGE_MASK)))) 11.43 - 11.44 -/* Construct an empty pte. */ 11.45 -#define l1e_empty_32() ((l1_pgentry_32_t) { 0 }) 11.46 -#define l2e_empty_32() ((l2_pgentry_32_t) { 0 }) 11.47 - 11.48 -/* Construct a pte from a pfn and access flags. */ 11.49 -#define l1e_from_pfn_32(pfn, flags) \ 11.50 - ((l1_pgentry_32_t) { ((intpte_32_t)(pfn) << PAGE_SHIFT) | put_pte_flags_32(flags) }) 11.51 -#define l2e_from_pfn_32(pfn, flags) \ 11.52 - ((l2_pgentry_32_t) { ((intpte_32_t)(pfn) << PAGE_SHIFT) | put_pte_flags_32(flags) }) 11.53 - 11.54 -/* Construct a pte from a physical address and access flags. */ 11.55 -#ifndef __ASSEMBLY__ 11.56 -static inline l1_pgentry_32_t l1e_from_paddr_32(paddr_t pa, unsigned int flags) 11.57 -{ 11.58 - ASSERT((pa & ~(PADDR_MASK & PAGE_MASK)) == 0); 11.59 - return (l1_pgentry_32_t) { pa | put_pte_flags_32(flags) }; 11.60 -} 11.61 -static inline l2_pgentry_32_t l2e_from_paddr_32(paddr_t pa, unsigned int flags) 11.62 -{ 11.63 - ASSERT((pa & ~(PADDR_MASK & PAGE_MASK)) == 0); 11.64 - return (l2_pgentry_32_t) { pa | put_pte_flags_32(flags) }; 11.65 -} 11.66 -#endif /* !__ASSEMBLY__ */ 11.67 - 11.68 - 11.69 -/* Construct a pte from a page pointer and access flags. */ 11.70 -#define l1e_from_page_32(page, flags) (l1e_from_pfn_32(page_to_mfn(page),(flags))) 11.71 -#define l2e_from_page_32(page, flags) (l2e_from_pfn_32(page_to_mfn(page),(flags))) 11.72 - 11.73 -/* Add extra flags to an existing pte. */ 11.74 -#define l1e_add_flags_32(x, flags) ((x).l1 |= put_pte_flags_32(flags)) 11.75 -#define l2e_add_flags_32(x, flags) ((x).l2 |= put_pte_flags_32(flags)) 11.76 - 11.77 -/* Remove flags from an existing pte. */ 11.78 -#define l1e_remove_flags_32(x, flags) ((x).l1 &= ~put_pte_flags_32(flags)) 11.79 -#define l2e_remove_flags_32(x, flags) ((x).l2 &= ~put_pte_flags_32(flags)) 11.80 - 11.81 -/* Check if a pte's page mapping or significant access flags have changed. */ 11.82 -#define l1e_has_changed_32(x,y,flags) \ 11.83 - ( !!(((x).l1 ^ (y).l1) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags_32(flags))) ) 11.84 -#define l2e_has_changed_32(x,y,flags) \ 11.85 - ( !!(((x).l2 ^ (y).l2) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags_32(flags))) ) 11.86 - 11.87 -/* Given a virtual address, get an entry offset into a page table. */ 11.88 -#define l1_table_offset_32(a) \ 11.89 - (((a) >> L1_PAGETABLE_SHIFT_32) & (L1_PAGETABLE_ENTRIES_32 - 1)) 11.90 -#define l2_table_offset_32(a) \ 11.91 - (((a) >> L2_PAGETABLE_SHIFT_32) & (L2_PAGETABLE_ENTRIES_32 - 1)) 11.92 - 11.93 -#endif /* __X86_PAGE_GUEST_H__ */ 11.94 - 11.95 -/* 11.96 - * Local variables: 11.97 - * mode: C 11.98 - * c-set-style: "BSD" 11.99 - * c-basic-offset: 4 11.100 - * tab-width: 4 11.101 - * indent-tabs-mode: nil 11.102 - * End: 11.103 - */
12.1 --- a/xen/arch/x86/mm/shadow/types.h Wed Mar 07 16:21:21 2007 +0000 12.2 +++ b/xen/arch/x86/mm/shadow/types.h Thu Mar 08 10:54:56 2007 +0000 12.3 @@ -235,7 +235,7 @@ static inline shadow_l4e_t shadow_l4e_fr 12.4 12.5 #if GUEST_PAGING_LEVELS == 2 12.6 12.7 -#include "page-guest32.h" 12.8 +#include "../page-guest32.h" 12.9 12.10 #define GUEST_L1_PAGETABLE_ENTRIES 1024 12.11 #define GUEST_L2_PAGETABLE_ENTRIES 1024
13.1 --- a/xen/include/asm-x86/domain.h Wed Mar 07 16:21:21 2007 +0000 13.2 +++ b/xen/include/asm-x86/domain.h Thu Mar 08 10:54:56 2007 +0000 13.3 @@ -104,6 +104,21 @@ struct shadow_vcpu { 13.4 }; 13.5 13.6 /************************************************/ 13.7 +/* hardware assisted paging */ 13.8 +/************************************************/ 13.9 +struct hap_domain { 13.10 + spinlock_t lock; 13.11 + int locker; 13.12 + const char *locker_function; 13.13 + 13.14 + struct list_head freelists; 13.15 + struct list_head p2m_freelist; 13.16 + unsigned int total_pages; /* number of pages allocated */ 13.17 + unsigned int free_pages; /* number of pages on freelists */ 13.18 + unsigned int p2m_pages; /* number of pages allocates to p2m */ 13.19 +}; 13.20 + 13.21 +/************************************************/ 13.22 /* p2m handling */ 13.23 /************************************************/ 13.24 13.25 @@ -135,6 +150,7 @@ struct paging_domain { 13.26 struct shadow_domain shadow; 13.27 13.28 /* Other paging assistance code will have structs here */ 13.29 + struct hap_domain hap; 13.30 }; 13.31 13.32 struct paging_vcpu {
14.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 14.2 +++ b/xen/include/asm-x86/hap.h Thu Mar 08 10:54:56 2007 +0000 14.3 @@ -0,0 +1,122 @@ 14.4 +/****************************************************************************** 14.5 + * include/asm-x86/hap.h 14.6 + * 14.7 + * hardware-assisted paging 14.8 + * Copyright (c) 2007 Advanced Micro Devices (Wei Huang) 14.9 + * 14.10 + * Parts of this code are Copyright (c) 2006 by XenSource Inc. 14.11 + * Parts of this code are Copyright (c) 2006 by Michael A Fetterman 14.12 + * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al. 14.13 + * 14.14 + * This program is free software; you can redistribute it and/or modify 14.15 + * it under the terms of the GNU General Public License as published by 14.16 + * the Free Software Foundation; either version 2 of the License, or 14.17 + * (at your option) any later version. 14.18 + * 14.19 + * This program is distributed in the hope that it will be useful, 14.20 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 14.21 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14.22 + * GNU General Public License for more details. 14.23 + * 14.24 + * You should have received a copy of the GNU General Public License 14.25 + * along with this program; if not, write to the Free Software 14.26 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 14.27 + */ 14.28 + 14.29 +#ifndef _XEN_HAP_H 14.30 +#define _XEN_HAP_H 14.31 + 14.32 +#define HERE_I_AM \ 14.33 + debugtrace_printk("HERE I AM: %s %s %d\n", __func__, __FILE__, __LINE__) 14.34 +#define HAP_PRINTK(_f, _a...) \ 14.35 + debugtrace_printk("hap: %s(): " _f, __func__, ##_a) 14.36 +#define HAP_ERROR(_f, _a...) \ 14.37 + printk("hap error: %s(): " _f, __func__, ##_a) 14.38 + 14.39 +/************************************************/ 14.40 +/* hap domain page mapping */ 14.41 +/************************************************/ 14.42 +static inline void * 14.43 +hap_map_domain_page(mfn_t mfn) 14.44 +{ 14.45 + return map_domain_page(mfn_x(mfn)); 14.46 +} 14.47 + 14.48 +static inline void 14.49 +hap_unmap_domain_page(void *p) 14.50 +{ 14.51 + unmap_domain_page(p); 14.52 +} 14.53 + 14.54 +static inline void * 14.55 +hap_map_domain_page_global(mfn_t mfn) 14.56 +{ 14.57 + return map_domain_page_global(mfn_x(mfn)); 14.58 +} 14.59 + 14.60 +static inline void 14.61 +hap_unmap_domain_page_global(void *p) 14.62 +{ 14.63 + unmap_domain_page_global(p); 14.64 +} 14.65 + 14.66 +/************************************************/ 14.67 +/* locking for hap code */ 14.68 +/************************************************/ 14.69 +#define hap_lock_init(_d) \ 14.70 + do { \ 14.71 + spin_lock_init(&(_d)->arch.paging.hap.lock); \ 14.72 + (_d)->arch.paging.hap.locker = -1; \ 14.73 + (_d)->arch.paging.hap.locker_function = "nobody"; \ 14.74 + } while (0) 14.75 + 14.76 +#define hap_locked_by_me(_d) \ 14.77 + (current->processor == (_d)->arch.paging.hap.locker) 14.78 + 14.79 +#define hap_lock(_d) \ 14.80 + do { \ 14.81 + if ( unlikely((_d)->arch.paging.hap.locker == current->processor) )\ 14.82 + { \ 14.83 + printk("Error: hap lock held by %s\n", \ 14.84 + (_d)->arch.paging.hap.locker_function); \ 14.85 + BUG(); \ 14.86 + } \ 14.87 + spin_lock(&(_d)->arch.paging.hap.lock); \ 14.88 + ASSERT((_d)->arch.paging.hap.locker == -1); \ 14.89 + (_d)->arch.paging.hap.locker = current->processor; \ 14.90 + (_d)->arch.paging.hap.locker_function = __func__; \ 14.91 + } while (0) 14.92 + 14.93 +#define hap_unlock(_d) \ 14.94 + do { \ 14.95 + ASSERT((_d)->arch.paging.hap.locker == current->processor); \ 14.96 + (_d)->arch.paging.hap.locker = -1; \ 14.97 + (_d)->arch.paging.hap.locker_function = "nobody"; \ 14.98 + spin_unlock(&(_d)->arch.paging.hap.lock); \ 14.99 + } while (0) 14.100 + 14.101 +/************************************************/ 14.102 +/* hap domain level functions */ 14.103 +/************************************************/ 14.104 +void hap_domain_init(struct domain *d); 14.105 +int hap_domctl(struct domain *d, xen_domctl_shadow_op_t *sc, 14.106 + XEN_GUEST_HANDLE(void) u_domctl); 14.107 +int hap_enable(struct domain *d, u32 mode); 14.108 +void hap_final_teardown(struct domain *d); 14.109 +void hap_teardown(struct domain *d); 14.110 +void hap_vcpu_init(struct vcpu *v); 14.111 + 14.112 +extern struct paging_mode hap_paging_real_mode; 14.113 +extern struct paging_mode hap_paging_protected_mode; 14.114 +extern struct paging_mode hap_paging_pae_mode; 14.115 +extern struct paging_mode hap_paging_long_mode; 14.116 +#endif /* XEN_HAP_H */ 14.117 + 14.118 +/* 14.119 + * Local variables: 14.120 + * mode: C 14.121 + * c-set-style: "BSD" 14.122 + * c-basic-offset: 4 14.123 + * indent-tabs-mode: nil 14.124 + * End: 14.125 + */