ia64/xen-unstable

view xen/include/asm-x86/hvm/vmx/vmx.h @ 9952:0c586a81d941

Fix injection of guest faults resulting from failed injection of a
previous event. We enter an infinite loop if the original failed
injection cannot be fixed up by Xen (e.g., because it's not a shadow
pagetable issue).

The RHEL4 HVM guest hang issue was actually a side effect of
change-set 9699. In the rhel4 guest hang rc.sysinit init-script was
calls kmodule program to probe the hardware. The kmodule uses the kudzu
library call probeDevices(). For probing the graphics hardware in the
vbe_get_mode_info() function, sets up the environment and goes into the
vm86 mode to do the int x10 call. For returning back to protected mode
it sets up a int 0xff call. At the time of calling the int 0xff the
guest process pages were not filled up. And it was causing an infinite
loop of vmexits with the IDT_VECTORING_INFO on the int 0xff instruction.

The reason for the infinite loop is changeset 9699. With that
the guest page fault was always getting overridden by the int 0xff gp
fault coming from the IDT_VECTORING_INFO. With the attached patch if VMM
is injecting exceptions like page faults or gp faults then
IDT_VECTORING_INFO field does not override it, and that breaks the
vmexit infinite loop for the rhel4.

Signed-off-by: Nitin A Kamble <nitin.a.kamble@intel.com>
Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>
Signed-off-by: Edwin Zhai <edwin.zhai@intel.com>
author kaf24@firebug.cl.cam.ac.uk
date Fri May 05 14:05:31 2006 +0100 (2006-05-05)
parents c6557cad2670
children da7fe04d8e80
line source
1 /*
2 * vmx.h: VMX Architecture related definitions
3 * Copyright (c) 2004, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
16 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 *
18 */
19 #ifndef __ASM_X86_HVM_VMX_VMX_H__
20 #define __ASM_X86_HVM_VMX_VMX_H__
22 #include <xen/sched.h>
23 #include <asm/types.h>
24 #include <asm/regs.h>
25 #include <asm/processor.h>
26 #include <asm/hvm/vmx/vmcs.h>
27 #include <asm/i387.h>
29 extern void vmx_asm_vmexit_handler(struct cpu_user_regs);
30 extern void vmx_asm_do_resume(void);
31 extern void vmx_asm_do_launch(void);
32 extern void vmx_intr_assist(void);
33 extern void vmx_migrate_timers(struct vcpu *v);
34 extern void arch_vmx_do_launch(struct vcpu *);
35 extern void arch_vmx_do_resume(struct vcpu *);
36 extern void set_guest_time(struct vcpu *v, u64 gtime);
37 extern u64 get_guest_time(struct vcpu *v);
39 extern unsigned int cpu_rev;
41 /*
42 * Need fill bits for SENTER
43 */
45 #define MONITOR_PIN_BASED_EXEC_CONTROLS_RESERVED_VALUE 0x00000016
47 #define MONITOR_PIN_BASED_EXEC_CONTROLS \
48 ( \
49 MONITOR_PIN_BASED_EXEC_CONTROLS_RESERVED_VALUE | \
50 PIN_BASED_EXT_INTR_MASK | \
51 PIN_BASED_NMI_EXITING \
52 )
54 #define MONITOR_CPU_BASED_EXEC_CONTROLS_RESERVED_VALUE 0x0401e172
56 #define _MONITOR_CPU_BASED_EXEC_CONTROLS \
57 ( \
58 MONITOR_CPU_BASED_EXEC_CONTROLS_RESERVED_VALUE | \
59 CPU_BASED_HLT_EXITING | \
60 CPU_BASED_INVDPG_EXITING | \
61 CPU_BASED_MWAIT_EXITING | \
62 CPU_BASED_MOV_DR_EXITING | \
63 CPU_BASED_ACTIVATE_IO_BITMAP | \
64 CPU_BASED_USE_TSC_OFFSETING \
65 )
67 #define MONITOR_CPU_BASED_EXEC_CONTROLS_IA32E_MODE \
68 ( \
69 CPU_BASED_CR8_LOAD_EXITING | \
70 CPU_BASED_CR8_STORE_EXITING \
71 )
73 #define MONITOR_VM_EXIT_CONTROLS_RESERVED_VALUE 0x0003edff
75 #define MONITOR_VM_EXIT_CONTROLS_IA32E_MODE 0x00000200
77 #define _MONITOR_VM_EXIT_CONTROLS \
78 ( \
79 MONITOR_VM_EXIT_CONTROLS_RESERVED_VALUE |\
80 VM_EXIT_ACK_INTR_ON_EXIT \
81 )
83 #if defined (__x86_64__)
84 #define MONITOR_CPU_BASED_EXEC_CONTROLS \
85 ( \
86 _MONITOR_CPU_BASED_EXEC_CONTROLS | \
87 MONITOR_CPU_BASED_EXEC_CONTROLS_IA32E_MODE \
88 )
89 #define MONITOR_VM_EXIT_CONTROLS \
90 ( \
91 _MONITOR_VM_EXIT_CONTROLS | \
92 MONITOR_VM_EXIT_CONTROLS_IA32E_MODE \
93 )
94 #else
95 #define MONITOR_CPU_BASED_EXEC_CONTROLS \
96 _MONITOR_CPU_BASED_EXEC_CONTROLS
98 #define MONITOR_VM_EXIT_CONTROLS \
99 _MONITOR_VM_EXIT_CONTROLS
100 #endif
102 #define VM_ENTRY_CONTROLS_RESERVED_VALUE 0x000011ff
103 #define VM_ENTRY_CONTROLS_IA32E_MODE 0x00000200
104 #define MONITOR_VM_ENTRY_CONTROLS VM_ENTRY_CONTROLS_RESERVED_VALUE
105 /*
106 * Exit Reasons
107 */
108 #define VMX_EXIT_REASONS_FAILED_VMENTRY 0x80000000
110 #define EXIT_REASON_EXCEPTION_NMI 0
111 #define EXIT_REASON_EXTERNAL_INTERRUPT 1
113 #define EXIT_REASON_PENDING_INTERRUPT 7
115 #define EXIT_REASON_TASK_SWITCH 9
116 #define EXIT_REASON_CPUID 10
117 #define EXIT_REASON_HLT 12
118 #define EXIT_REASON_INVLPG 14
119 #define EXIT_REASON_RDPMC 15
120 #define EXIT_REASON_RDTSC 16
121 #define EXIT_REASON_VMCALL 18
122 #define EXIT_REASON_VMCLEAR 19
123 #define EXIT_REASON_VMLAUNCH 20
124 #define EXIT_REASON_VMPTRLD 21
125 #define EXIT_REASON_VMPTRST 22
126 #define EXIT_REASON_VMREAD 23
127 #define EXIT_REASON_VMRESUME 24
128 #define EXIT_REASON_VMWRITE 25
129 #define EXIT_REASON_VMOFF 26
130 #define EXIT_REASON_VMON 27
131 #define EXIT_REASON_CR_ACCESS 28
132 #define EXIT_REASON_DR_ACCESS 29
133 #define EXIT_REASON_IO_INSTRUCTION 30
134 #define EXIT_REASON_MSR_READ 31
135 #define EXIT_REASON_MSR_WRITE 32
136 #define EXIT_REASON_MWAIT_INSTRUCTION 36
138 /*
139 * Interruption-information format
140 */
141 #define INTR_INFO_VECTOR_MASK 0xff /* 7:0 */
142 #define INTR_INFO_INTR_TYPE_MASK 0x700 /* 10:8 */
143 #define INTR_INFO_DELIEVER_CODE_MASK 0x800 /* 11 */
144 #define INTR_INFO_VALID_MASK 0x80000000 /* 31 */
146 #define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */
147 #define INTR_TYPE_EXCEPTION (3 << 8) /* processor exception */
149 /*
150 * Exit Qualifications for MOV for Control Register Access
151 */
152 #define CONTROL_REG_ACCESS_NUM 0x7 /* 2:0, number of control register */
153 #define CONTROL_REG_ACCESS_TYPE 0x30 /* 5:4, access type */
154 #define CONTROL_REG_ACCESS_REG 0xf00 /* 10:8, general purpose register */
155 #define LMSW_SOURCE_DATA (0xFFFF << 16) /* 16:31 lmsw source */
156 #define REG_EAX (0 << 8)
157 #define REG_ECX (1 << 8)
158 #define REG_EDX (2 << 8)
159 #define REG_EBX (3 << 8)
160 #define REG_ESP (4 << 8)
161 #define REG_EBP (5 << 8)
162 #define REG_ESI (6 << 8)
163 #define REG_EDI (7 << 8)
164 #define REG_R8 (8 << 8)
165 #define REG_R9 (9 << 8)
166 #define REG_R10 (10 << 8)
167 #define REG_R11 (11 << 8)
168 #define REG_R12 (12 << 8)
169 #define REG_R13 (13 << 8)
170 #define REG_R14 (14 << 8)
171 #define REG_R15 (15 << 8)
173 /*
174 * Exit Qualifications for MOV for Debug Register Access
175 */
176 #define DEBUG_REG_ACCESS_NUM 0x7 /* 2:0, number of debug register */
177 #define DEBUG_REG_ACCESS_TYPE 0x10 /* 4, direction of access */
178 #define TYPE_MOV_TO_DR (0 << 4)
179 #define TYPE_MOV_FROM_DR (1 << 4)
180 #define DEBUG_REG_ACCESS_REG 0xf00 /* 11:8, general purpose register */
182 /* These bits in the CR4 are owned by the host */
183 #if CONFIG_PAGING_LEVELS >= 3
184 #define VMX_CR4_HOST_MASK (X86_CR4_VMXE | X86_CR4_PAE)
185 #else
186 #define VMX_CR4_HOST_MASK (X86_CR4_VMXE)
187 #endif
189 #define VMCALL_OPCODE ".byte 0x0f,0x01,0xc1\n"
190 #define VMCLEAR_OPCODE ".byte 0x66,0x0f,0xc7\n" /* reg/opcode: /6 */
191 #define VMLAUNCH_OPCODE ".byte 0x0f,0x01,0xc2\n"
192 #define VMPTRLD_OPCODE ".byte 0x0f,0xc7\n" /* reg/opcode: /6 */
193 #define VMPTRST_OPCODE ".byte 0x0f,0xc7\n" /* reg/opcode: /7 */
194 #define VMREAD_OPCODE ".byte 0x0f,0x78\n"
195 #define VMRESUME_OPCODE ".byte 0x0f,0x01,0xc3\n"
196 #define VMWRITE_OPCODE ".byte 0x0f,0x79\n"
197 #define VMXOFF_OPCODE ".byte 0x0f,0x01,0xc4\n"
198 #define VMXON_OPCODE ".byte 0xf3,0x0f,0xc7\n"
200 #define MODRM_EAX_06 ".byte 0x30\n" /* [EAX], with reg/opcode: /6 */
201 #define MODRM_EAX_07 ".byte 0x38\n" /* [EAX], with reg/opcode: /7 */
202 #define MODRM_EAX_ECX ".byte 0xc1\n" /* [EAX], [ECX] */
204 static inline int __vmptrld (u64 addr)
205 {
206 unsigned long eflags;
207 __asm__ __volatile__ ( VMPTRLD_OPCODE
208 MODRM_EAX_06
209 :
210 : "a" (&addr)
211 : "memory");
213 __save_flags(eflags);
214 if (eflags & X86_EFLAGS_ZF || eflags & X86_EFLAGS_CF)
215 return -1;
216 return 0;
217 }
219 static inline void __vmptrst (u64 addr)
220 {
221 __asm__ __volatile__ ( VMPTRST_OPCODE
222 MODRM_EAX_07
223 :
224 : "a" (&addr)
225 : "memory");
226 }
228 static inline int __vmpclear (u64 addr)
229 {
230 unsigned long eflags;
232 __asm__ __volatile__ ( VMCLEAR_OPCODE
233 MODRM_EAX_06
234 :
235 : "a" (&addr)
236 : "memory");
237 __save_flags(eflags);
238 if (eflags & X86_EFLAGS_ZF || eflags & X86_EFLAGS_CF)
239 return -1;
240 return 0;
241 }
243 #define __vmread(x, ptr) ___vmread((x), (ptr), sizeof(*(ptr)))
245 static always_inline int ___vmread (const unsigned long field, void *ptr, const int size)
246 {
247 unsigned long eflags;
248 unsigned long ecx = 0;
250 __asm__ __volatile__ ( VMREAD_OPCODE
251 MODRM_EAX_ECX
252 : "=c" (ecx)
253 : "a" (field)
254 : "memory");
256 switch (size) {
257 case 1:
258 *((u8 *) (ptr)) = ecx;
259 break;
260 case 2:
261 *((u16 *) (ptr)) = ecx;
262 break;
263 case 4:
264 *((u32 *) (ptr)) = ecx;
265 break;
266 case 8:
267 *((u64 *) (ptr)) = ecx;
268 break;
269 default:
270 domain_crash_synchronous();
271 break;
272 }
274 __save_flags(eflags);
275 if (eflags & X86_EFLAGS_ZF || eflags & X86_EFLAGS_CF)
276 return -1;
277 return 0;
278 }
281 static always_inline void __vmwrite_vcpu(struct vcpu *v, unsigned long field, unsigned long value)
282 {
283 switch(field) {
284 case CR0_READ_SHADOW:
285 v->arch.hvm_vmx.cpu_shadow_cr0 = value;
286 break;
287 case GUEST_CR0:
288 v->arch.hvm_vmx.cpu_cr0 = value;
289 break;
290 case CPU_BASED_VM_EXEC_CONTROL:
291 v->arch.hvm_vmx.cpu_based_exec_control = value;
292 break;
293 default:
294 printk("__vmwrite_cpu: invalid field %lx\n", field);
295 break;
296 }
297 }
299 static always_inline void __vmread_vcpu(struct vcpu *v, unsigned long field, unsigned long *value)
300 {
301 switch(field) {
302 case CR0_READ_SHADOW:
303 *value = v->arch.hvm_vmx.cpu_shadow_cr0;
304 break;
305 case GUEST_CR0:
306 *value = v->arch.hvm_vmx.cpu_cr0;
307 break;
308 case CPU_BASED_VM_EXEC_CONTROL:
309 *value = v->arch.hvm_vmx.cpu_based_exec_control;
310 break;
311 default:
312 printk("__vmread_cpu: invalid field %lx\n", field);
313 break;
314 }
315 }
317 static inline int __vmwrite (unsigned long field, unsigned long value)
318 {
319 unsigned long eflags;
320 struct vcpu *v = current;
322 __asm__ __volatile__ ( VMWRITE_OPCODE
323 MODRM_EAX_ECX
324 :
325 : "a" (field) , "c" (value)
326 : "memory");
327 __save_flags(eflags);
328 if (eflags & X86_EFLAGS_ZF || eflags & X86_EFLAGS_CF)
329 return -1;
331 switch(field) {
332 case CR0_READ_SHADOW:
333 case GUEST_CR0:
334 case CPU_BASED_VM_EXEC_CONTROL:
335 __vmwrite_vcpu(v, field, value);
336 break;
337 }
339 return 0;
340 }
342 static inline int __vm_set_bit(unsigned long field, unsigned long mask)
343 {
344 unsigned long tmp;
345 int err = 0;
347 err |= __vmread(field, &tmp);
348 tmp |= mask;
349 err |= __vmwrite(field, tmp);
351 return err;
352 }
354 static inline int __vm_clear_bit(unsigned long field, unsigned long mask)
355 {
356 unsigned long tmp;
357 int err = 0;
359 err |= __vmread(field, &tmp);
360 tmp &= ~mask;
361 err |= __vmwrite(field, tmp);
363 return err;
364 }
366 static inline void __vmxoff (void)
367 {
368 __asm__ __volatile__ ( VMXOFF_OPCODE
369 ::: "memory");
370 }
372 static inline int __vmxon (u64 addr)
373 {
374 unsigned long eflags;
376 __asm__ __volatile__ ( VMXON_OPCODE
377 MODRM_EAX_06
378 :
379 : "a" (&addr)
380 : "memory");
381 __save_flags(eflags);
382 if (eflags & X86_EFLAGS_ZF || eflags & X86_EFLAGS_CF)
383 return -1;
384 return 0;
385 }
387 /* Make sure that xen intercepts any FP accesses from current */
388 static inline void vmx_stts(void)
389 {
390 unsigned long cr0;
391 struct vcpu *v = current;
393 /* FPU state already dirty? Then no need to setup_fpu() lazily. */
394 if ( test_bit(_VCPUF_fpu_dirtied, &v->vcpu_flags) )
395 return;
397 /*
398 * If the guest does not have TS enabled then we must cause and handle an
399 * exception on first use of the FPU. If the guest *does* have TS enabled
400 * then this is not necessary: no FPU activity can occur until the guest
401 * clears CR0.TS, and we will initialise the FPU when that happens.
402 */
403 __vmread_vcpu(v, CR0_READ_SHADOW, &cr0);
404 if ( !(cr0 & X86_CR0_TS) )
405 {
406 __vmread_vcpu(v, GUEST_CR0, &cr0);
407 __vmwrite(GUEST_CR0, cr0 | X86_CR0_TS);
408 __vm_set_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_NM);
409 }
410 }
412 /* Works only for vcpu == current */
413 static inline int vmx_paging_enabled(struct vcpu *v)
414 {
415 unsigned long cr0;
417 __vmread_vcpu(v, CR0_READ_SHADOW, &cr0);
418 return (cr0 & X86_CR0_PE) && (cr0 & X86_CR0_PG);
419 }
421 static inline int vmx_pgbit_test(struct vcpu *v)
422 {
423 unsigned long cr0;
425 __vmread_vcpu(v, CR0_READ_SHADOW, &cr0);
426 return (cr0 & X86_CR0_PG);
427 }
429 static inline int __vmx_inject_exception(struct vcpu *v, int trap, int type,
430 int error_code)
431 {
432 unsigned long intr_fields;
434 /* Reflect it back into the guest */
435 intr_fields = (INTR_INFO_VALID_MASK | type | trap);
436 if (error_code != VMX_DELIVER_NO_ERROR_CODE) {
437 __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
438 intr_fields |= INTR_INFO_DELIEVER_CODE_MASK;
439 }
441 __vmwrite(VM_ENTRY_INTR_INFO_FIELD, intr_fields);
442 return 0;
443 }
445 static inline int vmx_inject_exception(struct vcpu *v, int trap, int error_code)
446 {
447 v->arch.hvm_vmx.vector_injected = 1;
448 return __vmx_inject_exception(v, trap, INTR_TYPE_EXCEPTION, error_code);
449 }
451 static inline int vmx_inject_extint(struct vcpu *v, int trap, int error_code)
452 {
453 __vmx_inject_exception(v, trap, INTR_TYPE_EXT_INTR, error_code);
454 __vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0);
456 return 0;
457 }
459 static inline int vmx_reflect_exception(struct vcpu *v)
460 {
461 int error_code, vector;
463 __vmread(VM_EXIT_INTR_INFO, &vector);
464 if (vector & INTR_INFO_DELIEVER_CODE_MASK)
465 __vmread(VM_EXIT_INTR_ERROR_CODE, &error_code);
466 else
467 error_code = VMX_DELIVER_NO_ERROR_CODE;
468 vector &= 0xff;
470 #ifndef NDEBUG
471 {
472 unsigned long eip;
474 __vmread(GUEST_RIP, &eip);
475 HVM_DBG_LOG(DBG_LEVEL_1,
476 "vmx_reflect_exception: eip = %lx, error_code = %x",
477 eip, error_code);
478 }
479 #endif /* NDEBUG */
481 vmx_inject_exception(v, vector, error_code);
482 return 0;
483 }
485 #endif /* __ASM_X86_HVM_VMX_VMX_H__ */