ia64/xen-unstable
changeset 18901:6595393a3d28
Use virtual 8086 mode for VMX guests with CR0.PE == 0
When a VMX guest tries to enter real mode, put it in virtual 8086 mode
instead, if that's possible. Handle all errors and corner cases by
falling back to the real-mode emulator.
This is similar to the old VMXASSIST system except it uses Xen's
x86_emulate emulator instead of having a partial emulator in the guest
firmware. It more than doubles the speed of real-mode operation on
VMX.
Signed-off-by: Tim Deegan <Tim.Deegan@citrix.com>
When a VMX guest tries to enter real mode, put it in virtual 8086 mode
instead, if that's possible. Handle all errors and corner cases by
falling back to the real-mode emulator.
This is similar to the old VMXASSIST system except it uses Xen's
x86_emulate emulator instead of having a partial emulator in the guest
firmware. It more than doubles the speed of real-mode operation on
VMX.
Signed-off-by: Tim Deegan <Tim.Deegan@citrix.com>
author | Keir Fraser <keir.fraser@citrix.com> |
---|---|
date | Tue Dec 09 16:28:02 2008 +0000 (2008-12-09) |
parents | 5535efd8e011 |
children | b73f3646a17f |
files | tools/firmware/hvmloader/hvmloader.c tools/libxc/xc_domain_restore.c tools/libxc/xc_domain_save.c xen/arch/x86/hvm/vmx/entry.S xen/arch/x86/hvm/vmx/realmode.c xen/arch/x86/hvm/vmx/vmcs.c xen/arch/x86/hvm/vmx/vmx.c xen/arch/x86/x86_32/asm-offsets.c xen/arch/x86/x86_64/asm-offsets.c xen/arch/x86/x86_emulate/x86_emulate.h xen/include/asm-x86/hvm/vmx/vmcs.h xen/include/asm-x86/perfc_defn.h xen/include/public/hvm/params.h |
line diff
1.1 --- a/tools/firmware/hvmloader/hvmloader.c Tue Dec 09 13:23:15 2008 +0000 1.2 +++ b/tools/firmware/hvmloader/hvmloader.c Tue Dec 09 16:28:02 2008 +0000 1.3 @@ -536,6 +536,23 @@ static uint16_t init_xen_platform_io_bas 1.4 return bios_info->xen_pfiob; 1.5 } 1.6 1.7 +/* Set up an empty TSS area for virtual 8086 mode to use. 1.8 + * The only important thing is that it musn't have any bits set 1.9 + * in the interrupt redirection bitmap, so all zeros will do. */ 1.10 +static void init_vm86_tss(void) 1.11 +{ 1.12 + uint32_t tss; 1.13 + struct xen_hvm_param p; 1.14 + 1.15 + tss = e820_malloc(128, 128); 1.16 + memset((char *)tss, 0, 128); 1.17 + p.domid = DOMID_SELF; 1.18 + p.index = HVM_PARAM_VM86_TSS; 1.19 + p.value = tss; 1.20 + hypercall_hvm_op(HVMOP_set_param, &p); 1.21 + printf("vm86 TSS at %08x\n", tss); 1.22 +} 1.23 + 1.24 int main(void) 1.25 { 1.26 int option_rom_sz = 0, vgabios_sz = 0, etherboot_sz = 0; 1.27 @@ -606,6 +623,8 @@ int main(void) 1.28 acpi_build_tables(); 1.29 } 1.30 1.31 + init_vm86_tss(); 1.32 + 1.33 cmos_write_memory_size(); 1.34 1.35 printf("BIOS map:\n");
2.1 --- a/tools/libxc/xc_domain_restore.c Tue Dec 09 13:23:15 2008 +0000 2.2 +++ b/tools/libxc/xc_domain_restore.c Tue Dec 09 16:28:02 2008 +0000 2.3 @@ -490,6 +490,22 @@ int xc_domain_restore(int xc_handle, int 2.4 continue; 2.5 } 2.6 2.7 + if ( j == -4 ) 2.8 + { 2.9 + uint64_t vm86_tss; 2.10 + 2.11 + /* Skip padding 4 bytes then read the vm86 TSS location. */ 2.12 + if ( read_exact(io_fd, &vm86_tss, sizeof(uint32_t)) || 2.13 + read_exact(io_fd, &vm86_tss, sizeof(uint64_t)) ) 2.14 + { 2.15 + ERROR("error read the address of the vm86 TSS"); 2.16 + goto out; 2.17 + } 2.18 + 2.19 + xc_set_hvm_param(xc_handle, dom, HVM_PARAM_VM86_TSS, vm86_tss); 2.20 + continue; 2.21 + } 2.22 + 2.23 if ( j == 0 ) 2.24 break; /* our work here is done */ 2.25
3.1 --- a/tools/libxc/xc_domain_save.c Tue Dec 09 13:23:15 2008 +0000 3.2 +++ b/tools/libxc/xc_domain_save.c Tue Dec 09 16:28:02 2008 +0000 3.3 @@ -1388,20 +1388,32 @@ int xc_domain_save(int xc_handle, int io 3.4 if ( hvm ) 3.5 { 3.6 struct { 3.7 - int minusthree; 3.8 + int id; 3.9 uint32_t pad; 3.10 - uint64_t ident_pt; 3.11 - } chunk = { -3, 0 }; 3.12 + uint64_t data; 3.13 + } chunk = { 0, }; 3.14 3.15 + chunk.id = -3; 3.16 xc_get_hvm_param(xc_handle, dom, HVM_PARAM_IDENT_PT, 3.17 - (unsigned long *)&chunk.ident_pt); 3.18 + (unsigned long *)&chunk.data); 3.19 3.20 - if ( (chunk.ident_pt != 0) && 3.21 + if ( (chunk.data != 0) && 3.22 write_exact(io_fd, &chunk, sizeof(chunk)) ) 3.23 { 3.24 PERROR("Error when writing the ident_pt for EPT guest"); 3.25 goto out; 3.26 } 3.27 + 3.28 + chunk.id = -4; 3.29 + xc_get_hvm_param(xc_handle, dom, HVM_PARAM_VM86_TSS, 3.30 + (unsigned long *)&chunk.data); 3.31 + 3.32 + if ( (chunk.data != 0) && 3.33 + write_exact(io_fd, &chunk, sizeof(chunk)) ) 3.34 + { 3.35 + PERROR("Error when writing the vm86 TSS for guest"); 3.36 + goto out; 3.37 + } 3.38 } 3.39 3.40 /* Zero terminate */
4.1 --- a/xen/arch/x86/hvm/vmx/entry.S Tue Dec 09 13:23:15 2008 +0000 4.2 +++ b/xen/arch/x86/hvm/vmx/entry.S Tue Dec 09 16:28:02 2008 +0000 4.3 @@ -133,9 +133,15 @@ vmx_asm_do_vmentry: 4.4 cmpl $0,(r(dx),r(ax),1) 4.5 jnz .Lvmx_process_softirqs 4.6 4.7 - testb $0xff,VCPU_vmx_emul(r(bx)) 4.8 - jnz .Lvmx_goto_realmode 4.9 + testb $0xff,VCPU_vmx_emulate(r(bx)) 4.10 + jnz .Lvmx_goto_emulator 4.11 + testb $0xff,VCPU_vmx_realmode(r(bx)) 4.12 + jz .Lvmx_not_realmode 4.13 + cmpw $0,VCPU_vm86_seg_mask(r(bx)) 4.14 + jnz .Lvmx_goto_emulator 4.15 + call_with_regs(vmx_enter_realmode) 4.16 4.17 +.Lvmx_not_realmode: 4.18 mov VCPU_hvm_guest_cr2(r(bx)),r(ax) 4.19 mov r(ax),%cr2 4.20 call vmx_trace_vmentry 4.21 @@ -189,7 +195,7 @@ vmx_asm_do_vmentry: 4.22 call vm_launch_fail 4.23 ud2 4.24 4.25 -.Lvmx_goto_realmode: 4.26 +.Lvmx_goto_emulator: 4.27 sti 4.28 call_with_regs(vmx_realmode) 4.29 jmp vmx_asm_do_vmentry
5.1 --- a/xen/arch/x86/hvm/vmx/realmode.c Tue Dec 09 13:23:15 2008 +0000 5.2 +++ b/xen/arch/x86/hvm/vmx/realmode.c Tue Dec 09 16:28:02 2008 +0000 5.3 @@ -103,31 +103,13 @@ static void realmode_deliver_exception( 5.4 static void realmode_emulate_one(struct hvm_emulate_ctxt *hvmemul_ctxt) 5.5 { 5.6 struct vcpu *curr = current; 5.7 - unsigned long seg_reg_dirty; 5.8 uint32_t intr_info; 5.9 int rc; 5.10 5.11 - seg_reg_dirty = hvmemul_ctxt->seg_reg_dirty; 5.12 - hvmemul_ctxt->seg_reg_dirty = 0; 5.13 + perfc_incr(realmode_emulations); 5.14 5.15 rc = hvm_emulate_one(hvmemul_ctxt); 5.16 5.17 - if ( test_bit(x86_seg_cs, &hvmemul_ctxt->seg_reg_dirty) ) 5.18 - { 5.19 - curr->arch.hvm_vmx.vmxemul &= ~VMXEMUL_BAD_CS; 5.20 - if ( hvmemul_get_seg_reg(x86_seg_cs, hvmemul_ctxt)->sel & 3 ) 5.21 - curr->arch.hvm_vmx.vmxemul |= VMXEMUL_BAD_CS; 5.22 - } 5.23 - 5.24 - if ( test_bit(x86_seg_ss, &hvmemul_ctxt->seg_reg_dirty) ) 5.25 - { 5.26 - curr->arch.hvm_vmx.vmxemul &= ~VMXEMUL_BAD_SS; 5.27 - if ( hvmemul_get_seg_reg(x86_seg_ss, hvmemul_ctxt)->sel & 3 ) 5.28 - curr->arch.hvm_vmx.vmxemul |= VMXEMUL_BAD_SS; 5.29 - } 5.30 - 5.31 - hvmemul_ctxt->seg_reg_dirty |= seg_reg_dirty; 5.32 - 5.33 if ( rc == X86EMUL_UNHANDLEABLE ) 5.34 { 5.35 gdprintk(XENLOG_ERR, "Failed to emulate insn.\n"); 5.36 @@ -210,7 +192,8 @@ void vmx_realmode(struct cpu_user_regs * 5.37 intr_info = 0; 5.38 } 5.39 5.40 - while ( curr->arch.hvm_vmx.vmxemul && 5.41 + curr->arch.hvm_vmx.vmx_emulate = 1; 5.42 + while ( curr->arch.hvm_vmx.vmx_emulate && 5.43 !softirq_pending(smp_processor_id()) && 5.44 (curr->arch.hvm_vcpu.io_state == HVMIO_none) ) 5.45 { 5.46 @@ -220,13 +203,27 @@ void vmx_realmode(struct cpu_user_regs * 5.47 * in real mode, because we don't emulate protected-mode IDT vectoring. 5.48 */ 5.49 if ( unlikely(!(++emulations & 15)) && 5.50 - !(curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) && 5.51 + curr->arch.hvm_vmx.vmx_realmode && 5.52 hvm_local_events_need_delivery(curr) ) 5.53 break; 5.54 + 5.55 realmode_emulate_one(&hvmemul_ctxt); 5.56 + 5.57 + /* Stop emulating unless our segment state is not safe */ 5.58 + if ( curr->arch.hvm_vmx.vmx_realmode ) 5.59 + curr->arch.hvm_vmx.vmx_emulate = 5.60 + (curr->arch.hvm_vmx.vm86_segment_mask != 0); 5.61 + else 5.62 + curr->arch.hvm_vmx.vmx_emulate = 5.63 + ((hvmemul_ctxt.seg_reg[x86_seg_cs].sel & 3) 5.64 + || (hvmemul_ctxt.seg_reg[x86_seg_ss].sel & 3)); 5.65 } 5.66 5.67 - if ( !curr->arch.hvm_vmx.vmxemul ) 5.68 + /* Need to emulate next time if we've started an IO operation */ 5.69 + if ( curr->arch.hvm_vcpu.io_state != HVMIO_none ) 5.70 + curr->arch.hvm_vmx.vmx_emulate = 1; 5.71 + 5.72 + if ( !curr->arch.hvm_vmx.vmx_emulate && !curr->arch.hvm_vmx.vmx_realmode ) 5.73 { 5.74 /* 5.75 * Cannot enter protected mode with bogus selector RPLs and DPLs.
6.1 --- a/xen/arch/x86/hvm/vmx/vmcs.c Tue Dec 09 13:23:15 2008 +0000 6.2 +++ b/xen/arch/x86/hvm/vmx/vmcs.c Tue Dec 09 16:28:02 2008 +0000 6.3 @@ -880,15 +880,6 @@ void vmx_do_resume(struct vcpu *v) 6.4 reset_stack_and_jump(vmx_asm_do_vmentry); 6.5 } 6.6 6.7 -static void vmx_dump_sel(char *name, enum x86_segment seg) 6.8 -{ 6.9 - struct segment_register sreg; 6.10 - hvm_get_segment_register(current, seg, &sreg); 6.11 - printk("%s: sel=0x%04x, attr=0x%05x, limit=0x%08x, base=0x%016llx\n", 6.12 - name, sreg.sel, sreg.attr.bytes, sreg.limit, 6.13 - (unsigned long long)sreg.base); 6.14 -} 6.15 - 6.16 static unsigned long vmr(unsigned long field) 6.17 { 6.18 int rc; 6.19 @@ -897,6 +888,28 @@ static unsigned long vmr(unsigned long f 6.20 return rc ? 0 : val; 6.21 } 6.22 6.23 +static void vmx_dump_sel(char *name, uint32_t selector) 6.24 +{ 6.25 + uint32_t sel, attr, limit; 6.26 + uint64_t base; 6.27 + sel = vmr(selector); 6.28 + attr = vmr(selector + (GUEST_ES_AR_BYTES - GUEST_ES_SELECTOR)); 6.29 + limit = vmr(selector + (GUEST_ES_LIMIT - GUEST_ES_SELECTOR)); 6.30 + base = vmr(selector + (GUEST_ES_BASE - GUEST_ES_SELECTOR)); 6.31 + printk("%s: sel=0x%04x, attr=0x%05x, limit=0x%08x, base=0x%016"PRIx64"\n", 6.32 + name, sel, attr, limit, base); 6.33 +} 6.34 + 6.35 +static void vmx_dump_sel2(char *name, uint32_t lim) 6.36 +{ 6.37 + uint32_t limit; 6.38 + uint64_t base; 6.39 + limit = vmr(lim); 6.40 + base = vmr(lim + (GUEST_GDTR_BASE - GUEST_GDTR_LIMIT)); 6.41 + printk("%s: limit=0x%08x, base=0x%016"PRIx64"\n", 6.42 + name, limit, base); 6.43 +} 6.44 + 6.45 void vmcs_dump_vcpu(struct vcpu *v) 6.46 { 6.47 struct cpu_user_regs *regs = &v->arch.guest_context.user_regs; 6.48 @@ -938,16 +951,16 @@ void vmcs_dump_vcpu(struct vcpu *v) 6.49 (unsigned long long)vmr(GUEST_SYSENTER_ESP), 6.50 (int)vmr(GUEST_SYSENTER_CS), 6.51 (unsigned long long)vmr(GUEST_SYSENTER_EIP)); 6.52 - vmx_dump_sel("CS", x86_seg_cs); 6.53 - vmx_dump_sel("DS", x86_seg_ds); 6.54 - vmx_dump_sel("SS", x86_seg_ss); 6.55 - vmx_dump_sel("ES", x86_seg_es); 6.56 - vmx_dump_sel("FS", x86_seg_fs); 6.57 - vmx_dump_sel("GS", x86_seg_gs); 6.58 - vmx_dump_sel("GDTR", x86_seg_gdtr); 6.59 - vmx_dump_sel("LDTR", x86_seg_ldtr); 6.60 - vmx_dump_sel("IDTR", x86_seg_idtr); 6.61 - vmx_dump_sel("TR", x86_seg_tr); 6.62 + vmx_dump_sel("CS", GUEST_CS_SELECTOR); 6.63 + vmx_dump_sel("DS", GUEST_DS_SELECTOR); 6.64 + vmx_dump_sel("SS", GUEST_SS_SELECTOR); 6.65 + vmx_dump_sel("ES", GUEST_ES_SELECTOR); 6.66 + vmx_dump_sel("FS", GUEST_FS_SELECTOR); 6.67 + vmx_dump_sel("GS", GUEST_GS_SELECTOR); 6.68 + vmx_dump_sel2("GDTR", GUEST_GDTR_LIMIT); 6.69 + vmx_dump_sel("LDTR", GUEST_LDTR_SELECTOR); 6.70 + vmx_dump_sel2("IDTR", GUEST_IDTR_LIMIT); 6.71 + vmx_dump_sel("TR", GUEST_TR_SELECTOR); 6.72 x = (unsigned long long)vmr(TSC_OFFSET_HIGH) << 32; 6.73 x |= (uint32_t)vmr(TSC_OFFSET); 6.74 printk("TSC Offset = %016llx\n", x);
7.1 --- a/xen/arch/x86/hvm/vmx/vmx.c Tue Dec 09 13:23:15 2008 +0000 7.2 +++ b/xen/arch/x86/hvm/vmx/vmx.c Tue Dec 09 16:28:02 2008 +0000 7.3 @@ -704,6 +704,26 @@ static void vmx_ctxt_switch_to(struct vc 7.4 vpmu_load(v); 7.5 } 7.6 7.7 + 7.8 +/* SDM volume 3b section 22.3.1.2: we can only enter virtual 8086 mode 7.9 + * if all of CS, SS, DS, ES, FS and GS are 16bit ring-3 data segments. 7.10 + * The guest thinks it's got ring-0 segments, so we need to fudge 7.11 + * things. We store the ring-3 version in the VMCS to avoid lots of 7.12 + * shuffling on vmenter and vmexit, and translate in these accessors. */ 7.13 + 7.14 +#define rm_cs_attr (((union segment_attributes) { \ 7.15 + .fields = { .type = 0xb, .s = 1, .dpl = 0, .p = 1, .avl = 0, \ 7.16 + .l = 0, .db = 0, .g = 0, .pad = 0 } }).bytes) 7.17 +#define rm_ds_attr (((union segment_attributes) { \ 7.18 + .fields = { .type = 0x3, .s = 1, .dpl = 0, .p = 1, .avl = 0, \ 7.19 + .l = 0, .db = 0, .g = 0, .pad = 0 } }).bytes) 7.20 +#define vm86_ds_attr (((union segment_attributes) { \ 7.21 + .fields = { .type = 0x3, .s = 1, .dpl = 3, .p = 1, .avl = 0, \ 7.22 + .l = 0, .db = 0, .g = 0, .pad = 0 } }).bytes) 7.23 +#define vm86_tr_attr (((union segment_attributes) { \ 7.24 + .fields = { .type = 0xb, .s = 0, .dpl = 0, .p = 1, .avl = 0, \ 7.25 + .l = 0, .db = 0, .g = 0, .pad = 0 } }).bytes) 7.26 + 7.27 static void vmx_get_segment_register(struct vcpu *v, enum x86_segment seg, 7.28 struct segment_register *reg) 7.29 { 7.30 @@ -779,14 +799,85 @@ static void vmx_get_segment_register(str 7.31 /* Unusable flag is folded into Present flag. */ 7.32 if ( attr & (1u<<16) ) 7.33 reg->attr.fields.p = 0; 7.34 + 7.35 + /* Adjust for virtual 8086 mode */ 7.36 + if ( v->arch.hvm_vmx.vmx_realmode && seg <= x86_seg_tr 7.37 + && !(v->arch.hvm_vmx.vm86_segment_mask & (1u << seg)) ) 7.38 + { 7.39 + struct segment_register *sreg = &v->arch.hvm_vmx.vm86_saved_seg[seg]; 7.40 + if ( seg == x86_seg_tr ) 7.41 + *reg = *sreg; 7.42 + else if ( reg->base != sreg->base || seg == x86_seg_ss ) 7.43 + { 7.44 + /* If the guest's reloaded the segment, remember the new version. 7.45 + * We can't tell if the guest reloaded the segment with another 7.46 + * one that has the same base. By default we assume it hasn't, 7.47 + * since we don't want to lose big-real-mode segment attributes, 7.48 + * but for SS we assume it has: the Ubuntu graphical bootloader 7.49 + * does this and gets badly confused if we leave the old SS in 7.50 + * place. */ 7.51 + reg->attr.bytes = (seg == x86_seg_cs ? rm_cs_attr : rm_ds_attr); 7.52 + *sreg = *reg; 7.53 + } 7.54 + else 7.55 + { 7.56 + /* Always give realmode guests a selector that matches the base 7.57 + * but keep the attr and limit from before */ 7.58 + *reg = *sreg; 7.59 + reg->sel = reg->base >> 4; 7.60 + } 7.61 + } 7.62 } 7.63 7.64 static void vmx_set_segment_register(struct vcpu *v, enum x86_segment seg, 7.65 struct segment_register *reg) 7.66 { 7.67 - uint32_t attr; 7.68 + uint32_t attr, sel, limit; 7.69 + uint64_t base; 7.70 + 7.71 + sel = reg->sel; 7.72 + attr = reg->attr.bytes; 7.73 + limit = reg->limit; 7.74 + base = reg->base; 7.75 7.76 - attr = reg->attr.bytes; 7.77 + /* Adjust CS/SS/DS/ES/FS/GS/TR for virtual 8086 mode */ 7.78 + if ( v->arch.hvm_vmx.vmx_realmode && seg <= x86_seg_tr ) 7.79 + { 7.80 + /* Remember the proper contents */ 7.81 + v->arch.hvm_vmx.vm86_saved_seg[seg] = *reg; 7.82 + 7.83 + if ( seg == x86_seg_tr ) 7.84 + { 7.85 + if ( v->domain->arch.hvm_domain.params[HVM_PARAM_VM86_TSS] ) 7.86 + { 7.87 + sel = 0; 7.88 + attr = vm86_tr_attr; 7.89 + limit = 0xff; 7.90 + base = v->domain->arch.hvm_domain.params[HVM_PARAM_VM86_TSS]; 7.91 + v->arch.hvm_vmx.vm86_segment_mask &= ~(1u << seg); 7.92 + } 7.93 + else 7.94 + v->arch.hvm_vmx.vm86_segment_mask |= (1u << seg); 7.95 + } 7.96 + else 7.97 + { 7.98 + /* Try to fake it out as a 16bit data segment. This could 7.99 + * cause confusion for the guest if it reads the selector, 7.100 + * but otherwise we have to emulate if *any* segment hasn't 7.101 + * been reloaded. */ 7.102 + if ( base < 0x100000 && !(base & 0xf) && limit >= 0xffff 7.103 + && reg->attr.fields.p ) 7.104 + { 7.105 + sel = base >> 4; 7.106 + attr = vm86_ds_attr; 7.107 + limit = 0xffff; 7.108 + v->arch.hvm_vmx.vm86_segment_mask &= ~(1u << seg); 7.109 + } 7.110 + else 7.111 + v->arch.hvm_vmx.vm86_segment_mask |= (1u << seg); 7.112 + } 7.113 + } 7.114 + 7.115 attr = ((attr & 0xf00) << 4) | (attr & 0xff); 7.116 7.117 /* Not-present must mean unusable. */ 7.118 @@ -794,67 +885,67 @@ static void vmx_set_segment_register(str 7.119 attr |= (1u << 16); 7.120 7.121 /* VMX has strict consistency requirement for flag G. */ 7.122 - attr |= !!(reg->limit >> 20) << 15; 7.123 + attr |= !!(limit >> 20) << 15; 7.124 7.125 vmx_vmcs_enter(v); 7.126 7.127 switch ( seg ) 7.128 { 7.129 case x86_seg_cs: 7.130 - __vmwrite(GUEST_CS_SELECTOR, reg->sel); 7.131 - __vmwrite(GUEST_CS_LIMIT, reg->limit); 7.132 - __vmwrite(GUEST_CS_BASE, reg->base); 7.133 + __vmwrite(GUEST_CS_SELECTOR, sel); 7.134 + __vmwrite(GUEST_CS_LIMIT, limit); 7.135 + __vmwrite(GUEST_CS_BASE, base); 7.136 __vmwrite(GUEST_CS_AR_BYTES, attr); 7.137 break; 7.138 case x86_seg_ds: 7.139 - __vmwrite(GUEST_DS_SELECTOR, reg->sel); 7.140 - __vmwrite(GUEST_DS_LIMIT, reg->limit); 7.141 - __vmwrite(GUEST_DS_BASE, reg->base); 7.142 + __vmwrite(GUEST_DS_SELECTOR, sel); 7.143 + __vmwrite(GUEST_DS_LIMIT, limit); 7.144 + __vmwrite(GUEST_DS_BASE, base); 7.145 __vmwrite(GUEST_DS_AR_BYTES, attr); 7.146 break; 7.147 case x86_seg_es: 7.148 - __vmwrite(GUEST_ES_SELECTOR, reg->sel); 7.149 - __vmwrite(GUEST_ES_LIMIT, reg->limit); 7.150 - __vmwrite(GUEST_ES_BASE, reg->base); 7.151 + __vmwrite(GUEST_ES_SELECTOR, sel); 7.152 + __vmwrite(GUEST_ES_LIMIT, limit); 7.153 + __vmwrite(GUEST_ES_BASE, base); 7.154 __vmwrite(GUEST_ES_AR_BYTES, attr); 7.155 break; 7.156 case x86_seg_fs: 7.157 - __vmwrite(GUEST_FS_SELECTOR, reg->sel); 7.158 - __vmwrite(GUEST_FS_LIMIT, reg->limit); 7.159 - __vmwrite(GUEST_FS_BASE, reg->base); 7.160 + __vmwrite(GUEST_FS_SELECTOR, sel); 7.161 + __vmwrite(GUEST_FS_LIMIT, limit); 7.162 + __vmwrite(GUEST_FS_BASE, base); 7.163 __vmwrite(GUEST_FS_AR_BYTES, attr); 7.164 break; 7.165 case x86_seg_gs: 7.166 - __vmwrite(GUEST_GS_SELECTOR, reg->sel); 7.167 - __vmwrite(GUEST_GS_LIMIT, reg->limit); 7.168 - __vmwrite(GUEST_GS_BASE, reg->base); 7.169 + __vmwrite(GUEST_GS_SELECTOR, sel); 7.170 + __vmwrite(GUEST_GS_LIMIT, limit); 7.171 + __vmwrite(GUEST_GS_BASE, base); 7.172 __vmwrite(GUEST_GS_AR_BYTES, attr); 7.173 break; 7.174 case x86_seg_ss: 7.175 - __vmwrite(GUEST_SS_SELECTOR, reg->sel); 7.176 - __vmwrite(GUEST_SS_LIMIT, reg->limit); 7.177 - __vmwrite(GUEST_SS_BASE, reg->base); 7.178 + __vmwrite(GUEST_SS_SELECTOR, sel); 7.179 + __vmwrite(GUEST_SS_LIMIT, limit); 7.180 + __vmwrite(GUEST_SS_BASE, base); 7.181 __vmwrite(GUEST_SS_AR_BYTES, attr); 7.182 break; 7.183 case x86_seg_tr: 7.184 - __vmwrite(GUEST_TR_SELECTOR, reg->sel); 7.185 - __vmwrite(GUEST_TR_LIMIT, reg->limit); 7.186 - __vmwrite(GUEST_TR_BASE, reg->base); 7.187 + __vmwrite(GUEST_TR_SELECTOR, sel); 7.188 + __vmwrite(GUEST_TR_LIMIT, limit); 7.189 + __vmwrite(GUEST_TR_BASE, base); 7.190 /* VMX checks that the the busy flag (bit 1) is set. */ 7.191 __vmwrite(GUEST_TR_AR_BYTES, attr | 2); 7.192 break; 7.193 case x86_seg_gdtr: 7.194 - __vmwrite(GUEST_GDTR_LIMIT, reg->limit); 7.195 - __vmwrite(GUEST_GDTR_BASE, reg->base); 7.196 + __vmwrite(GUEST_GDTR_LIMIT, limit); 7.197 + __vmwrite(GUEST_GDTR_BASE, base); 7.198 break; 7.199 case x86_seg_idtr: 7.200 - __vmwrite(GUEST_IDTR_LIMIT, reg->limit); 7.201 - __vmwrite(GUEST_IDTR_BASE, reg->base); 7.202 + __vmwrite(GUEST_IDTR_LIMIT, limit); 7.203 + __vmwrite(GUEST_IDTR_BASE, base); 7.204 break; 7.205 case x86_seg_ldtr: 7.206 - __vmwrite(GUEST_LDTR_SELECTOR, reg->sel); 7.207 - __vmwrite(GUEST_LDTR_LIMIT, reg->limit); 7.208 - __vmwrite(GUEST_LDTR_BASE, reg->base); 7.209 + __vmwrite(GUEST_LDTR_SELECTOR, sel); 7.210 + __vmwrite(GUEST_LDTR_LIMIT, limit); 7.211 + __vmwrite(GUEST_LDTR_BASE, base); 7.212 __vmwrite(GUEST_LDTR_AR_BYTES, attr); 7.213 break; 7.214 default: 7.215 @@ -970,6 +1061,7 @@ static void vmx_update_guest_cr(struct v 7.216 switch ( cr ) 7.217 { 7.218 case 0: { 7.219 + int realmode; 7.220 unsigned long hw_cr0_mask = 7.221 X86_CR0_NE | X86_CR0_PG | X86_CR0_PE; 7.222 7.223 @@ -998,9 +1090,44 @@ static void vmx_update_guest_cr(struct v 7.224 vmx_fpu_enter(v); 7.225 } 7.226 7.227 - v->arch.hvm_vmx.vmxemul &= ~VMXEMUL_REALMODE; 7.228 - if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) ) 7.229 - v->arch.hvm_vmx.vmxemul |= VMXEMUL_REALMODE; 7.230 + realmode = !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE); 7.231 + if ( realmode != v->arch.hvm_vmx.vmx_realmode ) 7.232 + { 7.233 + enum x86_segment s; 7.234 + struct segment_register reg[x86_seg_tr + 1]; 7.235 + 7.236 + /* Entering or leaving real mode: adjust the segment registers. 7.237 + * Need to read them all either way, as realmode reads can update 7.238 + * the saved values we'll use when returning to prot mode. */ 7.239 + for ( s = x86_seg_cs ; s <= x86_seg_tr ; s++ ) 7.240 + vmx_get_segment_register(v, s, ®[s]); 7.241 + v->arch.hvm_vmx.vmx_realmode = realmode; 7.242 + 7.243 + if ( realmode ) 7.244 + { 7.245 + for ( s = x86_seg_cs ; s <= x86_seg_tr ; s++ ) 7.246 + vmx_set_segment_register(v, s, ®[s]); 7.247 + v->arch.hvm_vcpu.hw_cr[4] |= X86_CR4_VME; 7.248 + __vmwrite(GUEST_CR4, v->arch.hvm_vcpu.hw_cr[4]); 7.249 + __vmwrite(EXCEPTION_BITMAP, 0xffffffff); 7.250 + } 7.251 + else 7.252 + { 7.253 + for ( s = x86_seg_cs ; s <= x86_seg_tr ; s++ ) 7.254 + if ( !(v->arch.hvm_vmx.vm86_segment_mask & (1<<s)) ) 7.255 + vmx_set_segment_register( 7.256 + v, s, &v->arch.hvm_vmx.vm86_saved_seg[s]); 7.257 + v->arch.hvm_vcpu.hw_cr[4] = 7.258 + ((v->arch.hvm_vcpu.hw_cr[4] & ~X86_CR4_VME) 7.259 + |(v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_VME)); 7.260 + __vmwrite(GUEST_CR4, v->arch.hvm_vcpu.hw_cr[4]); 7.261 + __vmwrite(EXCEPTION_BITMAP, 7.262 + HVM_TRAP_MASK 7.263 + | (paging_mode_hap(v->domain) ? 7.264 + 0 : (1U << TRAP_page_fault)) 7.265 + | (1U << TRAP_no_device)); 7.266 + } 7.267 + } 7.268 7.269 v->arch.hvm_vcpu.hw_cr[0] = 7.270 v->arch.hvm_vcpu.guest_cr[0] | hw_cr0_mask; 7.271 @@ -1028,6 +1155,8 @@ static void vmx_update_guest_cr(struct v 7.272 if ( paging_mode_hap(v->domain) ) 7.273 v->arch.hvm_vcpu.hw_cr[4] &= ~X86_CR4_PAE; 7.274 v->arch.hvm_vcpu.hw_cr[4] |= v->arch.hvm_vcpu.guest_cr[4]; 7.275 + if ( v->arch.hvm_vmx.vmx_realmode ) 7.276 + v->arch.hvm_vcpu.hw_cr[4] |= X86_CR4_VME; 7.277 if ( paging_mode_hap(v->domain) && !hvm_paging_enabled(v) ) 7.278 { 7.279 v->arch.hvm_vcpu.hw_cr[4] |= X86_CR4_PSE; 7.280 @@ -1097,6 +1226,7 @@ void ept_sync_domain(struct domain *d) 7.281 static void __vmx_inject_exception(int trap, int type, int error_code) 7.282 { 7.283 unsigned long intr_fields; 7.284 + struct vcpu *curr = current; 7.285 7.286 /* 7.287 * NB. Callers do not need to worry about clearing STI/MOV-SS blocking: 7.288 @@ -1113,6 +1243,11 @@ static void __vmx_inject_exception(int t 7.289 } 7.290 7.291 __vmwrite(VM_ENTRY_INTR_INFO, intr_fields); 7.292 + 7.293 + /* Can't inject exceptions in virtual 8086 mode because they would 7.294 + * use the protected-mode IDT. Emulate at the next vmenter instead. */ 7.295 + if ( curr->arch.hvm_vmx.vmx_realmode ) 7.296 + curr->arch.hvm_vmx.vmx_emulate = 1; 7.297 } 7.298 7.299 void vmx_inject_hw_exception(int trap, int error_code) 7.300 @@ -2072,6 +2207,17 @@ static void vmx_failed_vmentry(unsigned 7.301 domain_crash(curr->domain); 7.302 } 7.303 7.304 +asmlinkage void vmx_enter_realmode(struct cpu_user_regs *regs) 7.305 +{ 7.306 + struct vcpu *v = current; 7.307 + 7.308 + /* Adjust RFLAGS to enter virtual 8086 mode with IOPL == 3. Since 7.309 + * we have CR4.VME == 1 and our own TSS with an empty interrupt 7.310 + * redirection bitmap, all software INTs will be handled by vm86 */ 7.311 + v->arch.hvm_vmx.vm86_saved_eflags = regs->eflags; 7.312 + regs->eflags |= (X86_EFLAGS_VM | X86_EFLAGS_IOPL); 7.313 +} 7.314 + 7.315 asmlinkage void vmx_vmexit_handler(struct cpu_user_regs *regs) 7.316 { 7.317 unsigned int exit_reason, idtv_info; 7.318 @@ -2100,6 +2246,42 @@ asmlinkage void vmx_vmexit_handler(struc 7.319 if ( unlikely(exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) ) 7.320 return vmx_failed_vmentry(exit_reason, regs); 7.321 7.322 + if ( v->arch.hvm_vmx.vmx_realmode ) 7.323 + { 7.324 + unsigned int vector; 7.325 + 7.326 + /* Put RFLAGS back the way the guest wants it */ 7.327 + regs->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IOPL); 7.328 + regs->eflags |= (v->arch.hvm_vmx.vm86_saved_eflags & X86_EFLAGS_IOPL); 7.329 + 7.330 + /* Unless this exit was for an interrupt, we've hit something 7.331 + * vm86 can't handle. Try again, using the emulator. */ 7.332 + switch ( exit_reason ) 7.333 + { 7.334 + case EXIT_REASON_EXCEPTION_NMI: 7.335 + vector = __vmread(VM_EXIT_INTR_INFO) & INTR_INFO_VECTOR_MASK;; 7.336 + if ( vector != TRAP_page_fault 7.337 + && vector != TRAP_nmi 7.338 + && vector != TRAP_machine_check ) 7.339 + { 7.340 + perfc_incr(realmode_exits); 7.341 + v->arch.hvm_vmx.vmx_emulate = 1; 7.342 + return; 7.343 + } 7.344 + case EXIT_REASON_EXTERNAL_INTERRUPT: 7.345 + case EXIT_REASON_INIT: 7.346 + case EXIT_REASON_SIPI: 7.347 + case EXIT_REASON_PENDING_VIRT_INTR: 7.348 + case EXIT_REASON_PENDING_VIRT_NMI: 7.349 + case EXIT_REASON_MACHINE_CHECK: 7.350 + break; 7.351 + default: 7.352 + v->arch.hvm_vmx.vmx_emulate = 1; 7.353 + perfc_incr(realmode_exits); 7.354 + return; 7.355 + } 7.356 + } 7.357 + 7.358 hvm_maybe_deassert_evtchn_irq(); 7.359 7.360 /* Event delivery caused this intercept? Queue for redelivery. */
8.1 --- a/xen/arch/x86/x86_32/asm-offsets.c Tue Dec 09 13:23:15 2008 +0000 8.2 +++ b/xen/arch/x86/x86_32/asm-offsets.c Tue Dec 09 16:28:02 2008 +0000 8.3 @@ -88,7 +88,9 @@ void __dummy__(void) 8.4 BLANK(); 8.5 8.6 OFFSET(VCPU_vmx_launched, struct vcpu, arch.hvm_vmx.launched); 8.7 - OFFSET(VCPU_vmx_emul, struct vcpu, arch.hvm_vmx.vmxemul); 8.8 + OFFSET(VCPU_vmx_realmode, struct vcpu, arch.hvm_vmx.vmx_realmode); 8.9 + OFFSET(VCPU_vmx_emulate, struct vcpu, arch.hvm_vmx.vmx_emulate); 8.10 + OFFSET(VCPU_vm86_seg_mask, struct vcpu, arch.hvm_vmx.vm86_segment_mask); 8.11 OFFSET(VCPU_hvm_guest_cr2, struct vcpu, arch.hvm_vcpu.guest_cr[2]); 8.12 BLANK(); 8.13
9.1 --- a/xen/arch/x86/x86_64/asm-offsets.c Tue Dec 09 13:23:15 2008 +0000 9.2 +++ b/xen/arch/x86/x86_64/asm-offsets.c Tue Dec 09 16:28:02 2008 +0000 9.3 @@ -107,7 +107,9 @@ void __dummy__(void) 9.4 BLANK(); 9.5 9.6 OFFSET(VCPU_vmx_launched, struct vcpu, arch.hvm_vmx.launched); 9.7 - OFFSET(VCPU_vmx_emul, struct vcpu, arch.hvm_vmx.vmxemul); 9.8 + OFFSET(VCPU_vmx_realmode, struct vcpu, arch.hvm_vmx.vmx_realmode); 9.9 + OFFSET(VCPU_vmx_emulate, struct vcpu, arch.hvm_vmx.vmx_emulate); 9.10 + OFFSET(VCPU_vm86_seg_mask, struct vcpu, arch.hvm_vmx.vm86_segment_mask); 9.11 OFFSET(VCPU_hvm_guest_cr2, struct vcpu, arch.hvm_vcpu.guest_cr[2]); 9.12 BLANK(); 9.13
10.1 --- a/xen/arch/x86/x86_emulate/x86_emulate.h Tue Dec 09 13:23:15 2008 +0000 10.2 +++ b/xen/arch/x86/x86_emulate/x86_emulate.h Tue Dec 09 16:28:02 2008 +0000 10.3 @@ -67,6 +67,7 @@ typedef union segment_attributes { 10.4 uint16_t l: 1; /* 9; Bit 53 */ 10.5 uint16_t db: 1; /* 10; Bit 54 */ 10.6 uint16_t g: 1; /* 11; Bit 55 */ 10.7 + uint16_t pad: 4; 10.8 } fields; 10.9 } __attribute__ ((packed)) segment_attributes_t; 10.10
11.1 --- a/xen/include/asm-x86/hvm/vmx/vmcs.h Tue Dec 09 13:23:15 2008 +0000 11.2 +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h Tue Dec 09 16:28:02 2008 +0000 11.3 @@ -109,11 +109,16 @@ struct arch_vmx_struct { 11.4 11.5 unsigned long host_cr0; 11.6 11.7 + /* Is the guest in real mode? */ 11.8 + uint8_t vmx_realmode; 11.9 /* Are we emulating rather than VMENTERing? */ 11.10 -#define VMXEMUL_REALMODE 1 /* Yes, because CR0.PE == 0 */ 11.11 -#define VMXEMUL_BAD_CS 2 /* Yes, because CS.RPL != CPL */ 11.12 -#define VMXEMUL_BAD_SS 4 /* Yes, because SS.RPL != CPL */ 11.13 - uint8_t vmxemul; 11.14 + uint8_t vmx_emulate; 11.15 + /* Bitmask of segments that we can't safely use in virtual 8086 mode */ 11.16 + uint16_t vm86_segment_mask; 11.17 + /* Shadow CS, SS, DS, ES, FS, GS, TR while in virtual 8086 mode */ 11.18 + struct segment_register vm86_saved_seg[x86_seg_tr + 1]; 11.19 + /* Remember EFLAGS while in virtual 8086 mode */ 11.20 + uint32_t vm86_saved_eflags; 11.21 }; 11.22 11.23 int vmx_create_vmcs(struct vcpu *v);
12.1 --- a/xen/include/asm-x86/perfc_defn.h Tue Dec 09 13:23:15 2008 +0000 12.2 +++ b/xen/include/asm-x86/perfc_defn.h Tue Dec 09 16:28:02 2008 +0000 12.3 @@ -127,4 +127,7 @@ PERFCOUNTER(mshv_wrmsr_icr, 12.4 PERFCOUNTER(mshv_wrmsr_tpr, "MS Hv wrmsr tpr") 12.5 PERFCOUNTER(mshv_wrmsr_eoi, "MS Hv wrmsr eoi") 12.6 12.7 +PERFCOUNTER(realmode_emulations, "realmode instructions emulated") 12.8 +PERFCOUNTER(realmode_exits, "vmexits from realmode") 12.9 + 12.10 /*#endif*/ /* __XEN_PERFC_DEFN_H__ */
13.1 --- a/xen/include/public/hvm/params.h Tue Dec 09 13:23:15 2008 +0000 13.2 +++ b/xen/include/public/hvm/params.h Tue Dec 09 16:28:02 2008 +0000 13.3 @@ -100,6 +100,9 @@ 13.4 /* ACPI S state: currently support S0 and S3 on x86. */ 13.5 #define HVM_PARAM_ACPI_S_STATE 14 13.6 13.7 -#define HVM_NR_PARAMS 15 13.8 +/* TSS used on Intel when CR0.PE=0. */ 13.9 +#define HVM_PARAM_VM86_TSS 15 13.10 + 13.11 +#define HVM_NR_PARAMS 16 13.12 13.13 #endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */