ia64/xen-unstable

view xen/arch/x86/vmx.c @ 3858:5b63436f25fe

bitkeeper revision 1.1205.1.2 (421527deX3t0INFwjrOweq0E7Le7pw)

Rename fields in arch_exec_domain to be more uniform.
Promote vmx_shadow_invlpg() to shadow_invlpg().
author maf46@burn.cl.cam.ac.uk
date Thu Feb 17 23:25:18 2005 +0000 (2005-02-17)
parents 0fe3bb5ed3aa
children d6d5fbf0eee1
line source
1 /* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
2 /*
3 * vmx.c: handling VMX architecture-related VM exits
4 * Copyright (c) 2004, Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
17 * Place - Suite 330, Boston, MA 02111-1307 USA.
18 *
19 */
21 #include <xen/config.h>
22 #include <xen/init.h>
23 #include <xen/lib.h>
24 #include <xen/trace.h>
25 #include <xen/sched.h>
26 #include <asm/current.h>
27 #include <asm/io.h>
28 #include <asm/irq.h>
29 #include <asm/shadow.h>
30 #include <asm/regs.h>
31 #include <asm/cpufeature.h>
32 #include <asm/processor.h>
33 #include <asm/types.h>
34 #include <asm/msr.h>
35 #include <asm/spinlock.h>
36 #include <asm/vmx.h>
37 #include <asm/vmx_vmcs.h>
38 #include <asm/vmx_intercept.h>
39 #include <asm/shadow.h>
40 #include <public/io/ioreq.h>
42 #ifdef CONFIG_VMX
44 int vmcs_size;
45 unsigned int opt_vmx_debug_level = 0;
47 extern long evtchn_send(int lport);
48 extern long do_block(void);
50 #define VECTOR_DB 1
51 #define VECTOR_BP 3
52 #define VECTOR_GP 13
53 #define VECTOR_PG 14
55 int start_vmx()
56 {
57 struct vmcs_struct *vmcs;
58 unsigned long ecx;
59 u64 phys_vmcs; /* debugging */
61 vmcs_size = VMCS_SIZE;
62 /*
63 * Xen does not fill x86_capability words except 0.
64 */
65 ecx = cpuid_ecx(1);
66 boot_cpu_data.x86_capability[4] = ecx;
68 if (!(test_bit(X86_FEATURE_VMXE, &boot_cpu_data.x86_capability)))
69 return 0;
71 set_in_cr4(X86_CR4_VMXE); /* Enable VMXE */
73 if (!(vmcs = alloc_vmcs())) {
74 printk("Failed to allocate VMCS\n");
75 return 0;
76 }
78 phys_vmcs = (u64) virt_to_phys(vmcs);
80 if (!(__vmxon(phys_vmcs))) {
81 printk("VMXON is done\n");
82 }
84 return 1;
85 }
87 void stop_vmx()
88 {
89 if (read_cr4() & X86_CR4_VMXE)
90 __vmxoff();
91 }
93 /*
94 * Not all cases recevie valid value in the VM-exit instruction length field.
95 */
96 #define __get_instruction_length(len) \
97 __vmread(INSTRUCTION_LEN, &(len)); \
98 if ((len) < 1 || (len) > 15) \
99 __vmx_bug(&regs);
101 static void inline __update_guest_eip(unsigned long inst_len)
102 {
103 unsigned long current_eip;
105 __vmread(GUEST_EIP, &current_eip);
106 __vmwrite(GUEST_EIP, current_eip + inst_len);
107 }
110 #include <asm/domain_page.h>
112 static int vmx_do_page_fault(unsigned long va, unsigned long error_code)
113 {
114 unsigned long eip;
115 unsigned long gpa;
116 int result;
118 #if VMX_DEBUG
119 {
120 __vmread(GUEST_EIP, &eip);
121 VMX_DBG_LOG(DBG_LEVEL_VMMU,
122 "vmx_do_page_fault = 0x%lx, eip = %lx, erro_code = %lx",
123 va, eip, error_code);
124 }
125 #endif
127 /*
128 * If vpagetable is zero, then we are still emulating 1:1 page tables,
129 * and we should have never gotten here.
130 */
131 if ( !current->arch.guest_vtable )
132 {
133 printk("vmx_do_page_fault while still running on 1:1 page table\n");
134 return 0;
135 }
137 gpa = gva_to_gpa(va);
138 if (!gpa)
139 return 0;
141 if (mmio_space(gpa))
142 handle_mmio(va, gpa);
144 if ((result = shadow_fault(va, error_code)))
145 return result;
147 return 0; /* failed to resolve, i.e raise #PG */
148 }
150 static void vmx_do_general_protection_fault(struct xen_regs *regs)
151 {
152 unsigned long eip, error_code;
153 unsigned long intr_fields;
155 __vmread(GUEST_EIP, &eip);
156 __vmread(VM_EXIT_INTR_ERROR_CODE, &error_code);
158 VMX_DBG_LOG(DBG_LEVEL_1,
159 "vmx_general_protection_fault: eip = %lx, erro_code = %lx",
160 eip, error_code);
162 VMX_DBG_LOG(DBG_LEVEL_1,
163 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
164 regs->eax, regs->ebx, regs->ecx, regs->edx, regs->esi, regs->edi);
166 /* Reflect it back into the guest */
167 intr_fields = (INTR_INFO_VALID_MASK |
168 INTR_TYPE_EXCEPTION |
169 INTR_INFO_DELIEVER_CODE_MASK |
170 VECTOR_GP);
171 __vmwrite(VM_ENTRY_INTR_INFO_FIELD, intr_fields);
172 __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
173 }
175 static void vmx_vmexit_do_cpuid(unsigned long input, struct xen_regs *regs)
176 {
177 int eax, ebx, ecx, edx;
178 unsigned long eip;
180 __vmread(GUEST_EIP, &eip);
182 VMX_DBG_LOG(DBG_LEVEL_1,
183 "do_cpuid: (eax) %lx, (ebx) %lx, (ecx) %lx, (edx) %lx,"
184 " (esi) %lx, (edi) %lx",
185 regs->eax, regs->ebx, regs->ecx, regs->edx,
186 regs->esi, regs->edi);
188 cpuid(input, &eax, &ebx, &ecx, &edx);
190 if (input == 1) {
191 clear_bit(X86_FEATURE_PSE, &edx);
192 clear_bit(X86_FEATURE_PAE, &edx);
193 clear_bit(X86_FEATURE_PSE36, &edx);
194 }
196 regs->eax = (unsigned long) eax;
197 regs->ebx = (unsigned long) ebx;
198 regs->ecx = (unsigned long) ecx;
199 regs->edx = (unsigned long) edx;
201 VMX_DBG_LOG(DBG_LEVEL_1,
202 "vmx_vmexit_do_cpuid: eip: %lx, input: %lx, out:eax=%x, ebx=%x, ecx=%x, edx=%x",
203 eip, input, eax, ebx, ecx, edx);
205 }
207 #define CASE_GET_REG_P(REG, reg) \
208 case REG_ ## REG: reg_p = (unsigned long *)&(regs->reg); break
210 static void vmx_dr_access (unsigned long exit_qualification, struct xen_regs *regs)
211 {
212 unsigned int reg;
213 unsigned long *reg_p = 0;
214 struct exec_domain *ed = current;
215 unsigned long eip;
217 __vmread(GUEST_EIP, &eip);
219 reg = exit_qualification & DEBUG_REG_ACCESS_NUM;
221 VMX_DBG_LOG(DBG_LEVEL_1,
222 "vmx_dr_access : eip=%lx, reg=%d, exit_qualification = %lx",
223 eip, reg, exit_qualification);
225 switch(exit_qualification & DEBUG_REG_ACCESS_REG) {
226 CASE_GET_REG_P(EAX, eax);
227 CASE_GET_REG_P(ECX, ecx);
228 CASE_GET_REG_P(EDX, edx);
229 CASE_GET_REG_P(EBX, ebx);
230 CASE_GET_REG_P(EBP, ebp);
231 CASE_GET_REG_P(ESI, esi);
232 CASE_GET_REG_P(EDI, edi);
233 case REG_ESP:
234 break;
235 default:
236 __vmx_bug(regs);
237 }
239 switch (exit_qualification & DEBUG_REG_ACCESS_TYPE) {
240 case TYPE_MOV_TO_DR:
241 /* don't need to check the range */
242 if (reg != REG_ESP)
243 ed->arch.debugreg[reg] = *reg_p;
244 else {
245 unsigned long value;
246 __vmread(GUEST_ESP, &value);
247 ed->arch.debugreg[reg] = value;
248 }
249 break;
250 case TYPE_MOV_FROM_DR:
251 if (reg != REG_ESP)
252 *reg_p = ed->arch.debugreg[reg];
253 else {
254 __vmwrite(GUEST_ESP, ed->arch.debugreg[reg]);
255 }
256 break;
257 }
258 }
260 /*
261 * Invalidate the TLB for va. Invalidate the shadow page corresponding
262 * the address va.
263 */
264 static void vmx_vmexit_do_invlpg(unsigned long va)
265 {
266 unsigned long eip;
267 struct exec_domain *ed = current;
268 unsigned int index;
270 __vmread(GUEST_EIP, &eip);
272 VMX_DBG_LOG(DBG_LEVEL_VMMU, "vmx_vmexit_do_invlpg:eip=%p, va=%p",
273 eip, va);
275 /*
276 * We do the safest things first, then try to update the shadow
277 * copying from guest
278 */
279 shadow_invlpg(ed, va);
280 index = (va >> L2_PAGETABLE_SHIFT);
281 ed->arch.hl2_vtable[index] =
282 mk_l2_pgentry(0); /* invalidate pgd cache */
283 }
285 static inline void hl2_table_invalidate(struct exec_domain *ed)
286 {
287 /*
288 * Need to optimize this
289 */
290 memset(ed->arch.hl2_vtable, 0, PAGE_SIZE);
291 }
293 static void vmx_io_instruction(struct xen_regs *regs,
294 unsigned long exit_qualification, unsigned long inst_len)
295 {
296 struct exec_domain *d = current;
297 vcpu_iodata_t *vio;
298 ioreq_t *p;
299 unsigned long addr;
300 unsigned long eip;
302 __vmread(GUEST_EIP, &eip);
304 VMX_DBG_LOG(DBG_LEVEL_1,
305 "vmx_io_instruction: eip=%p, exit_qualification = %lx",
306 eip, exit_qualification);
308 if (test_bit(6, &exit_qualification))
309 addr = (exit_qualification >> 16) & (0xffff);
310 else
311 addr = regs->edx & 0xffff;
313 if (addr == 0x80) {
314 __update_guest_eip(inst_len);
315 return;
316 }
318 vio = (vcpu_iodata_t *) d->arch.arch_vmx.vmx_platform.shared_page_va;
319 if (vio == 0) {
320 VMX_DBG_LOG(DBG_LEVEL_1, "bad shared page: %lx", (unsigned long) vio);
321 domain_crash();
322 }
323 p = &vio->vp_ioreq;
324 p->dir = test_bit(3, &exit_qualification);
326 p->pdata_valid = 0;
327 p->count = 1;
328 p->size = (exit_qualification & 7) + 1;
330 if (test_bit(4, &exit_qualification)) {
331 unsigned long eflags;
333 __vmread(GUEST_EFLAGS, &eflags);
334 p->df = (eflags & X86_EFLAGS_DF) ? 1 : 0;
335 p->pdata_valid = 1;
336 p->u.pdata = (void *) ((p->dir == IOREQ_WRITE) ?
337 regs->esi
338 : regs->edi);
339 p->u.pdata = (void *) gva_to_gpa(p->u.data);
340 if (test_bit(5, &exit_qualification))
341 p->count = regs->ecx;
342 if ((p->u.data & PAGE_MASK) !=
343 ((p->u.data + p->count * p->size - 1) & PAGE_MASK)) {
344 printk("stringio crosses page boundary!\n");
345 if (p->u.data & (p->size - 1)) {
346 printk("Not aligned I/O!\n");
347 domain_crash();
348 }
349 p->count = (PAGE_SIZE - (p->u.data & ~PAGE_MASK)) / p->size;
350 } else {
351 __update_guest_eip(inst_len);
352 }
353 } else if (p->dir == IOREQ_WRITE) {
354 p->u.data = regs->eax;
355 __update_guest_eip(inst_len);
356 } else
357 __update_guest_eip(inst_len);
359 p->addr = addr;
360 p->port_mm = 0;
362 /* Check if the packet needs to be intercepted */
363 if (vmx_io_intercept(p)) {
364 /* no blocking & no evtchn notification */
365 return;
366 }
368 set_bit(ARCH_VMX_IO_WAIT, &d->arch.arch_vmx.flags);
369 p->state = STATE_IOREQ_READY;
370 evtchn_send(IOPACKET_PORT);
371 do_block();
372 }
374 #define CASE_GET_REG(REG, reg) \
375 case REG_ ## REG: value = regs->reg; break
377 /*
378 * Write to control registers
379 */
380 static void mov_to_cr(int gp, int cr, struct xen_regs *regs)
381 {
382 unsigned long value;
383 unsigned long old_cr;
384 struct exec_domain *d = current;
386 switch (gp) {
387 CASE_GET_REG(EAX, eax);
388 CASE_GET_REG(ECX, ecx);
389 CASE_GET_REG(EDX, edx);
390 CASE_GET_REG(EBX, ebx);
391 CASE_GET_REG(EBP, ebp);
392 CASE_GET_REG(ESI, esi);
393 CASE_GET_REG(EDI, edi);
394 case REG_ESP:
395 __vmread(GUEST_ESP, &value);
396 break;
397 default:
398 printk("invalid gp: %d\n", gp);
399 __vmx_bug(regs);
400 }
402 VMX_DBG_LOG(DBG_LEVEL_1, "mov_to_cr: CR%d, value = %lx,", cr, value);
403 VMX_DBG_LOG(DBG_LEVEL_1, "current = %lx,", (unsigned long) current);
405 switch(cr) {
406 case 0:
407 {
408 unsigned long old_base_pfn = 0, pfn;
410 /*
411 * CR0:
412 * We don't want to lose PE and PG.
413 */
414 __vmwrite(GUEST_CR0, (value | X86_CR0_PE | X86_CR0_PG));
415 __vmwrite(CR0_READ_SHADOW, value);
417 if (value & (X86_CR0_PE | X86_CR0_PG) &&
418 !test_bit(VMX_CPU_STATE_PG_ENABLED, &d->arch.arch_vmx.cpu_state)) {
419 /*
420 * Enable paging
421 */
422 set_bit(VMX_CPU_STATE_PG_ENABLED, &d->arch.arch_vmx.cpu_state);
423 /*
424 * The guest CR3 must be pointing to the guest physical.
425 */
426 if (!(pfn = phys_to_machine_mapping(
427 d->arch.arch_vmx.cpu_cr3 >> PAGE_SHIFT)))
428 {
429 VMX_DBG_LOG(DBG_LEVEL_VMMU, "Invalid CR3 value = %lx",
430 d->arch.arch_vmx.cpu_cr3);
431 domain_crash(); /* need to take a clean path */
432 }
433 old_base_pfn = pagetable_val(d->arch.guest_table) >> PAGE_SHIFT;
435 /* We know that none of the previous 1:1 shadow pages are
436 * going to be used again, so might as well flush them.
437 * XXXX wait until the last VCPU boots before doing the flush !!
438 */
439 shadow_lock(d->domain);
440 free_shadow_state(d->domain); // XXX SMP
441 shadow_unlock(d->domain);
443 /*
444 * Now arch.guest_table points to machine physical.
445 */
446 d->arch.guest_table = mk_pagetable(pfn << PAGE_SHIFT);
447 update_pagetables(d);
449 VMX_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
450 (unsigned long) (pfn << PAGE_SHIFT));
452 __vmwrite(GUEST_CR3, pagetable_val(d->arch.shadow_table));
453 /*
454 * arch->shadow_table should hold the next CR3 for shadow
455 */
456 VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, pfn = %lx",
457 d->arch.arch_vmx.cpu_cr3, pfn);
458 /* undo the get_page done in the para virt case */
459 put_page_and_type(&frame_table[old_base_pfn]);
461 }
462 break;
463 }
464 case 3:
465 {
466 unsigned long pfn;
468 /*
469 * If paging is not enabled yet, simply copy the value to CR3.
470 */
471 if (!test_bit(VMX_CPU_STATE_PG_ENABLED, &d->arch.arch_vmx.cpu_state)) {
472 d->arch.arch_vmx.cpu_cr3 = value;
473 break;
474 }
476 hl2_table_invalidate(d);
477 /*
478 * We make a new one if the shadow does not exist.
479 */
480 if (value == d->arch.arch_vmx.cpu_cr3) {
481 /*
482 * This is simple TLB flush, implying the guest has
483 * removed some translation or changed page attributes.
484 * We simply invalidate the shadow.
485 */
486 pfn = phys_to_machine_mapping(value >> PAGE_SHIFT);
487 if ((pfn << PAGE_SHIFT) != pagetable_val(d->arch.guest_table))
488 __vmx_bug(regs);
489 vmx_shadow_clear_state(d->domain);
490 shadow_invalidate(d);
491 } else {
492 /*
493 * If different, make a shadow. Check if the PDBR is valid
494 * first.
495 */
496 VMX_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value);
497 if ((value >> PAGE_SHIFT) > d->domain->max_pages)
498 {
499 VMX_DBG_LOG(DBG_LEVEL_VMMU,
500 "Invalid CR3 value=%lx", value);
501 domain_crash(); /* need to take a clean path */
502 }
503 pfn = phys_to_machine_mapping(value >> PAGE_SHIFT);
504 vmx_shadow_clear_state(d->domain);
505 d->arch.guest_table = mk_pagetable(pfn << PAGE_SHIFT);
506 update_pagetables(d);
507 /*
508 * arch.shadow_table should now hold the next CR3 for shadow
509 */
510 d->arch.arch_vmx.cpu_cr3 = value;
511 VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx",
512 value);
513 __vmwrite(GUEST_CR3, pagetable_val(d->arch.shadow_table));
514 }
515 break;
516 }
517 case 4:
518 /* CR4 */
519 if (value & X86_CR4_PAE)
520 __vmx_bug(regs); /* not implemented */
521 __vmread(CR4_READ_SHADOW, &old_cr);
523 __vmwrite(GUEST_CR4, (value | X86_CR4_VMXE));
524 __vmwrite(CR4_READ_SHADOW, value);
526 /*
527 * Writing to CR4 to modify the PSE, PGE, or PAE flag invalidates
528 * all TLB entries except global entries.
529 */
530 if ((old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE)) {
531 vmx_shadow_clear_state(d->domain);
532 shadow_invalidate(d);
533 hl2_table_invalidate(d);
534 }
535 break;
536 default:
537 printk("invalid cr: %d\n", gp);
538 __vmx_bug(regs);
539 }
540 }
542 #define CASE_SET_REG(REG, reg) \
543 case REG_ ## REG: \
544 regs->reg = value; \
545 break
547 /*
548 * Read from control registers. CR0 and CR4 are read from the shadow.
549 */
550 static void mov_from_cr(int cr, int gp, struct xen_regs *regs)
551 {
552 unsigned long value;
553 struct exec_domain *d = current;
555 if (cr != 3)
556 __vmx_bug(regs);
558 value = (unsigned long) d->arch.arch_vmx.cpu_cr3;
559 ASSERT(value);
561 switch (gp) {
562 CASE_SET_REG(EAX, eax);
563 CASE_SET_REG(ECX, ecx);
564 CASE_SET_REG(EDX, edx);
565 CASE_SET_REG(EBX, ebx);
566 CASE_SET_REG(EBP, ebp);
567 CASE_SET_REG(ESI, esi);
568 CASE_SET_REG(EDI, edi);
569 case REG_ESP:
570 __vmwrite(GUEST_ESP, value);
571 regs->esp = value;
572 break;
573 default:
574 printk("invalid gp: %d\n", gp);
575 __vmx_bug(regs);
576 }
578 VMX_DBG_LOG(DBG_LEVEL_VMMU, "mov_from_cr: CR%d, value = %lx,", cr, value);
579 }
581 static void vmx_cr_access (unsigned long exit_qualification, struct xen_regs *regs)
582 {
583 unsigned int gp, cr;
584 unsigned long value;
586 switch (exit_qualification & CONTROL_REG_ACCESS_TYPE) {
587 case TYPE_MOV_TO_CR:
588 gp = exit_qualification & CONTROL_REG_ACCESS_REG;
589 cr = exit_qualification & CONTROL_REG_ACCESS_NUM;
590 mov_to_cr(gp, cr, regs);
591 break;
592 case TYPE_MOV_FROM_CR:
593 gp = exit_qualification & CONTROL_REG_ACCESS_REG;
594 cr = exit_qualification & CONTROL_REG_ACCESS_NUM;
595 mov_from_cr(cr, gp, regs);
596 break;
597 case TYPE_CLTS:
598 __vmread(GUEST_CR0, &value);
599 value &= ~X86_CR0_TS; /* clear TS */
600 __vmwrite(GUEST_CR0, value);
602 __vmread(CR0_READ_SHADOW, &value);
603 value &= ~X86_CR0_TS; /* clear TS */
604 __vmwrite(CR0_READ_SHADOW, value);
605 break;
606 default:
607 __vmx_bug(regs);
608 break;
609 }
610 }
612 static inline void vmx_do_msr_read(struct xen_regs *regs)
613 {
614 VMX_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_read: ecx=%lx, eax=%lx, edx=%lx",
615 regs->ecx, regs->eax, regs->edx);
617 rdmsr(regs->ecx, regs->eax, regs->edx);
619 VMX_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_read returns: "
620 "ecx=%lx, eax=%lx, edx=%lx",
621 regs->ecx, regs->eax, regs->edx);
622 }
624 /*
625 * Need to use this exit to rescheule
626 */
627 static inline void vmx_vmexit_do_hlt()
628 {
629 #if VMX_DEBUG
630 unsigned long eip;
631 __vmread(GUEST_EIP, &eip);
632 #endif
633 VMX_DBG_LOG(DBG_LEVEL_1, "vmx_vmexit_do_hlt:eip=%p", eip);
634 __enter_scheduler();
635 }
637 static inline void vmx_vmexit_do_mwait()
638 {
639 #if VMX_DEBUG
640 unsigned long eip;
641 __vmread(GUEST_EIP, &eip);
642 #endif
643 VMX_DBG_LOG(DBG_LEVEL_1, "vmx_vmexit_do_mwait:eip=%p", eip);
644 __enter_scheduler();
645 }
647 #define BUF_SIZ 256
648 #define MAX_LINE 80
649 char print_buf[BUF_SIZ];
650 static int index;
652 static void vmx_print_line(const char c, struct exec_domain *d)
653 {
655 if (index == MAX_LINE || c == '\n') {
656 if (index == MAX_LINE) {
657 print_buf[index++] = c;
658 }
659 print_buf[index] = '\0';
660 printk("(GUEST: %u) %s\n", d->domain->id, (char *) &print_buf);
661 index = 0;
662 }
663 else
664 print_buf[index++] = c;
665 }
667 void save_vmx_execution_context(execution_context_t *ctxt)
668 {
669 __vmread(GUEST_SS_SELECTOR, &ctxt->ss);
670 __vmread(GUEST_ESP, &ctxt->esp);
671 __vmread(GUEST_EFLAGS, &ctxt->eflags);
672 __vmread(GUEST_CS_SELECTOR, &ctxt->cs);
673 __vmread(GUEST_EIP, &ctxt->eip);
675 __vmread(GUEST_GS_SELECTOR, &ctxt->gs);
676 __vmread(GUEST_FS_SELECTOR, &ctxt->fs);
677 __vmread(GUEST_ES_SELECTOR, &ctxt->es);
678 __vmread(GUEST_DS_SELECTOR, &ctxt->ds);
679 }
681 #ifdef XEN_DEBUGGER
682 void save_xen_regs(struct xen_regs *regs)
683 {
684 __vmread(GUEST_SS_SELECTOR, &regs->xss);
685 __vmread(GUEST_ESP, &regs->esp);
686 __vmread(GUEST_EFLAGS, &regs->eflags);
687 __vmread(GUEST_CS_SELECTOR, &regs->xcs);
688 __vmread(GUEST_EIP, &regs->eip);
690 __vmread(GUEST_GS_SELECTOR, &regs->xgs);
691 __vmread(GUEST_FS_SELECTOR, &regs->xfs);
692 __vmread(GUEST_ES_SELECTOR, &regs->xes);
693 __vmread(GUEST_DS_SELECTOR, &regs->xds);
694 }
696 void restore_xen_regs(struct xen_regs *regs)
697 {
698 __vmwrite(GUEST_SS_SELECTOR, regs->xss);
699 __vmwrite(GUEST_ESP, regs->esp);
700 __vmwrite(GUEST_EFLAGS, regs->eflags);
701 __vmwrite(GUEST_CS_SELECTOR, regs->xcs);
702 __vmwrite(GUEST_EIP, regs->eip);
704 __vmwrite(GUEST_GS_SELECTOR, regs->xgs);
705 __vmwrite(GUEST_FS_SELECTOR, regs->xfs);
706 __vmwrite(GUEST_ES_SELECTOR, regs->xes);
707 __vmwrite(GUEST_DS_SELECTOR, regs->xds);
708 }
709 #endif
711 #define TRC_VMX_VMEXIT 0x00040001
712 #define TRC_VMX_VECTOR 0x00040002
714 asmlinkage void vmx_vmexit_handler(struct xen_regs regs)
715 {
716 unsigned int exit_reason, idtv_info_field;
717 unsigned long exit_qualification, eip, inst_len = 0;
718 struct exec_domain *ed = current;
719 int error;
721 if ((error = __vmread(VM_EXIT_REASON, &exit_reason)))
722 __vmx_bug(&regs);
724 perfc_incra(vmexits, exit_reason);
726 __vmread(IDT_VECTORING_INFO_FIELD, &idtv_info_field);
727 if (idtv_info_field & INTR_INFO_VALID_MASK) {
728 __vmwrite(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field);
729 if ((idtv_info_field & 0xff) == 14) {
730 unsigned long error_code;
732 __vmread(VM_EXIT_INTR_ERROR_CODE, &error_code);
733 printk("#PG error code: %lx\n", error_code);
734 }
735 VMX_DBG_LOG(DBG_LEVEL_1, "idtv_info_field=%x",
736 idtv_info_field);
737 }
739 /* don't bother H/W interrutps */
740 if (exit_reason != EXIT_REASON_EXTERNAL_INTERRUPT &&
741 exit_reason != EXIT_REASON_VMCALL &&
742 exit_reason != EXIT_REASON_IO_INSTRUCTION)
743 VMX_DBG_LOG(DBG_LEVEL_0, "exit reason = %x", exit_reason);
745 if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
746 domain_crash();
747 return;
748 }
750 __vmread(GUEST_EIP, &eip);
751 TRACE_3D(TRC_VMX_VMEXIT, ed->domain->id, eip, exit_reason);
753 switch (exit_reason) {
754 case EXIT_REASON_EXCEPTION_NMI:
755 {
756 /*
757 * We don't set the software-interrupt exiting (INT n).
758 * (1) We can get an exception (e.g. #PG) in the guest, or
759 * (2) NMI
760 */
761 int error;
762 unsigned int vector;
763 unsigned long va;
764 unsigned long error_code;
766 if ((error = __vmread(VM_EXIT_INTR_INFO, &vector))
767 && !(vector & INTR_INFO_VALID_MASK))
768 __vmx_bug(&regs);
769 vector &= 0xff;
771 perfc_incra(cause_vector, vector);
773 TRACE_3D(TRC_VMX_VECTOR, ed->domain->id, eip, vector);
774 switch (vector) {
775 #ifdef XEN_DEBUGGER
776 case VECTOR_DB:
777 {
778 save_xen_regs(&regs);
779 pdb_handle_exception(1, &regs, 1);
780 restore_xen_regs(&regs);
781 break;
782 }
783 case VECTOR_BP:
784 {
785 save_xen_regs(&regs);
786 pdb_handle_exception(3, &regs, 1);
787 restore_xen_regs(&regs);
788 break;
789 }
790 #endif
791 case VECTOR_GP:
792 {
793 vmx_do_general_protection_fault(&regs);
794 break;
795 }
796 case VECTOR_PG:
797 {
798 __vmread(EXIT_QUALIFICATION, &va);
799 __vmread(VM_EXIT_INTR_ERROR_CODE, &error_code);
800 VMX_DBG_LOG(DBG_LEVEL_VMMU,
801 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
802 regs.eax, regs.ebx, regs.ecx, regs.edx, regs.esi,
803 regs.edi);
804 ed->arch.arch_vmx.vmx_platform.mpci.inst_decoder_regs = &regs;
806 if (!(error = vmx_do_page_fault(va, error_code))) {
807 /*
808 * Inject #PG using Interruption-Information Fields
809 */
810 unsigned long intr_fields;
812 intr_fields = (INTR_INFO_VALID_MASK |
813 INTR_TYPE_EXCEPTION |
814 INTR_INFO_DELIEVER_CODE_MASK |
815 VECTOR_PG);
816 __vmwrite(VM_ENTRY_INTR_INFO_FIELD, intr_fields);
817 __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
818 ed->arch.arch_vmx.cpu_cr2 = va;
819 }
820 break;
821 }
822 default:
823 printk("unexpected VMexit for exception vector 0x%x\n", vector);
824 //__vmx_bug(&regs);
825 break;
826 }
827 break;
828 }
829 case EXIT_REASON_EXTERNAL_INTERRUPT:
830 {
831 extern int vector_irq[];
832 extern asmlinkage void do_IRQ(struct xen_regs *);
833 extern void smp_apic_timer_interrupt(struct xen_regs *);
834 extern void timer_interrupt(int, void *, struct xen_regs *);
835 unsigned int vector;
837 if ((error = __vmread(VM_EXIT_INTR_INFO, &vector))
838 && !(vector & INTR_INFO_VALID_MASK))
839 __vmx_bug(&regs);
841 vector &= 0xff;
842 local_irq_disable();
844 if (vector == LOCAL_TIMER_VECTOR) {
845 smp_apic_timer_interrupt(&regs);
846 } else {
847 regs.entry_vector = (vector == FIRST_DEVICE_VECTOR?
848 0 : vector_irq[vector]);
849 do_IRQ(&regs);
850 }
851 break;
852 }
853 case EXIT_REASON_PENDING_INTERRUPT:
854 __vmwrite(CPU_BASED_VM_EXEC_CONTROL,
855 MONITOR_CPU_BASED_EXEC_CONTROLS);
856 vmx_intr_assist(ed);
857 break;
858 case EXIT_REASON_TASK_SWITCH:
859 __vmx_bug(&regs);
860 break;
861 case EXIT_REASON_CPUID:
862 __get_instruction_length(inst_len);
863 vmx_vmexit_do_cpuid(regs.eax, &regs);
864 __update_guest_eip(inst_len);
865 break;
866 case EXIT_REASON_HLT:
867 __get_instruction_length(inst_len);
868 __update_guest_eip(inst_len);
869 vmx_vmexit_do_hlt();
870 break;
871 case EXIT_REASON_INVLPG:
872 {
873 unsigned long va;
875 __vmread(EXIT_QUALIFICATION, &va);
876 vmx_vmexit_do_invlpg(va);
877 __get_instruction_length(inst_len);
878 __update_guest_eip(inst_len);
879 break;
880 }
881 case EXIT_REASON_VMCALL:
882 __get_instruction_length(inst_len);
883 __vmread(GUEST_EIP, &eip);
884 __vmread(EXIT_QUALIFICATION, &exit_qualification);
886 vmx_print_line(regs.eax, ed); /* provides the current domain */
887 __update_guest_eip(inst_len);
888 break;
889 case EXIT_REASON_CR_ACCESS:
890 {
891 __vmread(GUEST_EIP, &eip);
892 __get_instruction_length(inst_len);
893 __vmread(EXIT_QUALIFICATION, &exit_qualification);
895 VMX_DBG_LOG(DBG_LEVEL_1, "eip = %lx, inst_len =%lx, exit_qualification = %lx",
896 eip, inst_len, exit_qualification);
897 vmx_cr_access(exit_qualification, &regs);
898 __update_guest_eip(inst_len);
899 break;
900 }
901 case EXIT_REASON_DR_ACCESS:
902 __vmread(EXIT_QUALIFICATION, &exit_qualification);
903 vmx_dr_access(exit_qualification, &regs);
904 __get_instruction_length(inst_len);
905 __update_guest_eip(inst_len);
906 break;
907 case EXIT_REASON_IO_INSTRUCTION:
908 __vmread(EXIT_QUALIFICATION, &exit_qualification);
909 __get_instruction_length(inst_len);
910 vmx_io_instruction(&regs, exit_qualification, inst_len);
911 break;
912 case EXIT_REASON_MSR_READ:
913 __get_instruction_length(inst_len);
914 vmx_do_msr_read(&regs);
915 __update_guest_eip(inst_len);
916 break;
917 case EXIT_REASON_MSR_WRITE:
918 __vmread(GUEST_EIP, &eip);
919 VMX_DBG_LOG(DBG_LEVEL_1, "MSR_WRITE: eip=%p, eax=%p, edx=%p",
920 eip, regs.eax, regs.edx);
921 /* just ignore this point */
922 __get_instruction_length(inst_len);
923 __update_guest_eip(inst_len);
924 break;
925 case EXIT_REASON_MWAIT_INSTRUCTION:
926 __get_instruction_length(inst_len);
927 __update_guest_eip(inst_len);
928 vmx_vmexit_do_mwait();
929 break;
930 default:
931 __vmx_bug(&regs); /* should not happen */
932 }
934 vmx_intr_assist(ed);
935 return;
936 }
938 asmlinkage void load_cr2(void)
939 {
940 struct exec_domain *d = current;
942 local_irq_disable();
943 asm volatile("movl %0,%%cr2": :"r" (d->arch.arch_vmx.cpu_cr2));
944 }
946 #endif /* CONFIG_VMX */