ia64/xen-unstable

view xen/arch/x86/hvm/svm/vmcb.c @ 8840:9f662b5e7d3c

Absolutely must not return to HVM guest context until
synchronous I/O emulation is completed.

Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Mon Feb 13 12:16:16 2006 +0100 (2006-02-13)
parents 4a9a39d08a06
children 3cb8e672b115
line source
1 /*
2 * vmcb.c: VMCB management
3 * Copyright (c) 2005, AMD Corporation.
4 * Copyright (c) 2004, Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
17 * Place - Suite 330, Boston, MA 02111-1307 USA.
18 *
19 */
21 #include <xen/config.h>
22 #include <xen/init.h>
23 #include <xen/mm.h>
24 #include <xen/lib.h>
25 #include <xen/errno.h>
26 #include <xen/shadow.h>
27 #include <asm/cpufeature.h>
28 #include <asm/processor.h>
29 #include <asm/msr.h>
30 #include <asm/hvm/hvm.h>
31 #include <asm/hvm/io.h>
32 #include <asm/hvm/support.h>
33 #include <asm/hvm/svm/svm.h>
34 #include <asm/hvm/svm/intr.h>
35 #include <xen/event.h>
36 #include <xen/kernel.h>
37 #include <xen/domain_page.h>
39 extern int svm_dbg_on;
40 extern int asidpool_assign_next( struct vmcb_struct *vmcb, int retire_current,
41 int oldcore, int newcore);
43 #define round_pgdown(_p) ((_p)&PAGE_MASK) /* coped from domain.c */
45 #define GUEST_SEGMENT_LIMIT 0xffffffff
47 #define IOPM_SIZE (12 * 1024)
48 #define MSRPM_SIZE (8 * 1024)
50 struct vmcb_struct *alloc_vmcb(void)
51 {
52 struct vmcb_struct *vmcb = NULL;
53 unsigned int order;
54 order = get_order_from_bytes(sizeof(struct vmcb_struct));
55 ASSERT(order >= 0);
56 vmcb = alloc_xenheap_pages(order);
57 ASSERT(vmcb);
59 if (vmcb)
60 memset(vmcb, 0, sizeof(struct vmcb_struct));
62 return vmcb;
63 }
66 void free_vmcb(struct vmcb_struct *vmcb)
67 {
68 unsigned int order;
70 order = get_order_from_bytes(sizeof(struct vmcb_struct));
71 ASSERT(vmcb);
73 if (vmcb)
74 free_xenheap_pages(vmcb, order);
75 }
78 struct host_save_area *alloc_host_save_area(void)
79 {
80 unsigned int order = 0;
81 struct host_save_area *hsa = NULL;
83 hsa = alloc_xenheap_pages(order);
84 ASSERT(hsa);
86 if (hsa)
87 memset(hsa, 0, PAGE_SIZE);
89 return hsa;
90 }
93 void free_host_save_area(struct host_save_area *hsa)
94 {
95 unsigned int order;
97 order = get_order_from_bytes(PAGE_SIZE);
98 ASSERT(hsa);
100 if (hsa)
101 free_xenheap_pages(hsa, order);
102 }
105 /* Set up intercepts to exit the guest into the hypervisor when we want it. */
106 static int construct_vmcb_controls(struct arch_svm_struct *arch_svm)
107 {
108 struct vmcb_struct *vmcb;
109 u32 *iopm;
110 u32 *msrpm;
112 vmcb = arch_svm->vmcb;
114 ASSERT(vmcb);
116 /* mask off all general 1 intercepts except those listed here */
117 vmcb->general1_intercepts =
118 ~(GENERAL1_INTERCEPT_CR0_SEL_WRITE | GENERAL1_INTERCEPT_VINTR |
119 GENERAL1_INTERCEPT_IDTR_READ | GENERAL1_INTERCEPT_IDTR_WRITE |
120 GENERAL1_INTERCEPT_GDTR_READ | GENERAL1_INTERCEPT_GDTR_WRITE |
121 GENERAL1_INTERCEPT_LDTR_READ | GENERAL1_INTERCEPT_LDTR_WRITE |
122 GENERAL1_INTERCEPT_TR_READ | GENERAL1_INTERCEPT_TR_WRITE |
123 GENERAL1_INTERCEPT_RDTSC | GENERAL1_INTERCEPT_PUSHF |
124 GENERAL1_INTERCEPT_SWINT | GENERAL1_INTERCEPT_POPF |
125 GENERAL1_INTERCEPT_IRET | GENERAL1_INTERCEPT_PAUSE |
126 GENERAL1_INTERCEPT_TASK_SWITCH
127 );
129 /* turn on the general 2 intercepts */
130 vmcb->general2_intercepts =
131 GENERAL2_INTERCEPT_VMRUN | GENERAL2_INTERCEPT_VMMCALL |
132 GENERAL2_INTERCEPT_VMLOAD | GENERAL2_INTERCEPT_VMSAVE |
133 GENERAL2_INTERCEPT_STGI | GENERAL2_INTERCEPT_CLGI |
134 GENERAL2_INTERCEPT_SKINIT | GENERAL2_INTERCEPT_RDTSCP;
136 /* read or write all debug registers 0 - 15 */
137 vmcb->dr_intercepts = 0;
139 /* RD/WR all control registers 0 - 15, but not read CR2 */
140 vmcb->cr_intercepts = ~(CR_INTERCEPT_CR2_READ | CR_INTERCEPT_CR2_WRITE);
142 /* The following is for I/O and MSR permision map */
143 iopm = alloc_xenheap_pages(get_order_from_bytes(IOPM_SIZE));
145 ASSERT(iopm);
146 memset(iopm, 0xff, IOPM_SIZE);
147 clear_bit(PC_DEBUG_PORT, iopm);
148 msrpm = alloc_xenheap_pages(get_order_from_bytes(MSRPM_SIZE));
150 ASSERT(msrpm);
151 memset(msrpm, 0xff, MSRPM_SIZE);
153 arch_svm->iopm = iopm;
154 arch_svm->msrpm = msrpm;
156 vmcb->iopm_base_pa = (u64) virt_to_maddr(iopm);
157 vmcb->msrpm_base_pa = (u64) virt_to_maddr(msrpm);
159 return 0;
160 }
163 /*
164 * modify guest eflags and execption bitmap for gdb
165 */
166 int svm_modify_vmcb(struct vcpu *v, struct cpu_user_regs *regs)
167 {
168 int error;
169 if ((error = load_vmcb(&v->arch.hvm_svm, v->arch.hvm_svm.host_save_pa)))
170 {
171 printk("svm_modify_vmcb: load_vmcb failed: VMCB = %lx\n",
172 (unsigned long) v->arch.hvm_svm.host_save_pa);
173 return -EINVAL;
174 }
175 svm_load_cpu_user_regs(v,regs);
176 return 0;
177 }
180 /*
181 * Initially set the same environement as host.
182 */
183 static int construct_init_vmcb_guest(struct arch_svm_struct *arch_svm,
184 struct cpu_user_regs *regs )
185 {
186 int error = 0;
187 unsigned long crn;
188 segment_attributes_t attrib;
189 unsigned long dr7;
190 unsigned long eflags;
191 unsigned long shadow_cr;
192 struct vmcb_struct *vmcb = arch_svm->vmcb;
193 struct Xgt_desc_struct desc;
195 /* Allows IRQs to be shares */
196 vmcb->vintr.fields.intr_masking = 1;
198 /* Set up event injection entry in VMCB. Just clear it. */
199 vmcb->eventinj.bytes = 0;
201 /* TSC */
202 vmcb->tsc_offset = 0;
204 vmcb->cs.sel = regs->cs;
205 vmcb->es.sel = regs->es;
206 vmcb->ss.sel = regs->ss;
207 vmcb->ds.sel = regs->ds;
208 vmcb->fs.sel = regs->fs;
209 vmcb->gs.sel = regs->gs;
211 /* Guest segment Limits. 64K for real mode*/
212 vmcb->cs.limit = GUEST_SEGMENT_LIMIT;
213 vmcb->es.limit = GUEST_SEGMENT_LIMIT;
214 vmcb->ss.limit = GUEST_SEGMENT_LIMIT;
215 vmcb->ds.limit = GUEST_SEGMENT_LIMIT;
216 vmcb->fs.limit = GUEST_SEGMENT_LIMIT;
217 vmcb->gs.limit = GUEST_SEGMENT_LIMIT;
219 /* Base address for segments */
220 vmcb->cs.base = 0;
221 vmcb->es.base = 0;
222 vmcb->ss.base = 0;
223 vmcb->ds.base = 0;
224 vmcb->fs.base = 0;
225 vmcb->gs.base = 0;
227 __asm__ __volatile__ ("sidt (%0) \n" :: "a"(&desc) : "memory");
228 vmcb->idtr.base = desc.address;
229 vmcb->idtr.limit = desc.size;
231 /* Set up segment attributes */
232 attrib.bytes = 0;
233 attrib.fields.type = 0x3; /* type = 3 */
234 attrib.fields.s = 1; /* code or data, i.e. not system */
235 attrib.fields.dpl = 0; /* DPL = 0 */
236 attrib.fields.p = 1; /* segment present */
237 attrib.fields.db = 1; /* 32-bit */
238 attrib.fields.g = 1; /* 4K pages in limit */
240 /* Data selectors */
241 vmcb->es.attributes = attrib;
242 vmcb->ss.attributes = attrib;
243 vmcb->ds.attributes = attrib;
244 vmcb->fs.attributes = attrib;
245 vmcb->gs.attributes = attrib;
247 /* Code selector */
248 attrib.fields.type = 0xb; /* type=0xb -> executable/readable, accessed */
249 vmcb->cs.attributes = attrib;
251 /* Global descriptor table */
252 //NMERGE7500 - can probably remove access to gdtr
253 vmcb->gdtr.base = regs->edx;
254 regs->edx = 0;
255 ASSERT(regs->eax <= 0xFFFF); /* Make sure we're in the limit */
256 vmcb->gdtr.limit = regs->eax;
257 regs->eax = 0;
259 /* Local Descriptor Table */
260 attrib.fields.s = 0; /* not code or data segement */
261 attrib.fields.type = 0x2; /* LDT */
262 attrib.fields.db = 0; /* 16-bit */
263 attrib.fields.g = 0;
264 vmcb->ldtr.attributes = attrib;
266 attrib.fields.type = 0xb; /* 32-bit TSS (busy) */
267 vmcb->tr.attributes = attrib;
268 vmcb->tr.base = 0;
269 vmcb->tr.limit = 0xff;
271 __asm__ __volatile__ ("mov %%cr0,%0" : "=r" (crn) :);
272 vmcb->cr0 = crn;
274 /* Initally PG, PE are not set*/
275 shadow_cr = vmcb->cr0;
276 shadow_cr &= ~X86_CR0_PG;
277 arch_svm->cpu_shadow_cr0 = shadow_cr;
279 /* CR3 is set in svm_final_setup_guest */
281 __asm__ __volatile__ ("mov %%cr4,%0" : "=r" (crn) :);
282 shadow_cr = crn;
283 vmcb->cr4 = shadow_cr;
285 //MERGE7500 - should write a 0 instead to rsp?
286 vmcb->rsp = regs->esp;
287 vmcb->rip = regs->eip;
289 eflags = regs->eflags & ~HVM_EFLAGS_RESERVED_0; /* clear 0s */
290 eflags |= HVM_EFLAGS_RESERVED_1; /* set 1s */
292 vmcb->rflags = eflags;
294 __asm__ __volatile__ ("mov %%dr7, %0\n" : "=r" (dr7));
295 vmcb->dr7 = dr7;
297 return error;
298 }
301 /*
302 * destroy the vmcb.
303 */
305 void destroy_vmcb(struct arch_svm_struct *arch_svm)
306 {
307 if(arch_svm->vmcb != NULL)
308 {
309 asidpool_retire(arch_svm->vmcb, arch_svm->core);
310 free_vmcb(arch_svm->vmcb);
311 }
312 if(arch_svm->iopm != NULL) {
313 free_xenheap_pages(
314 arch_svm->iopm, get_order_from_bytes(IOPM_SIZE));
315 arch_svm->iopm = NULL;
316 }
317 if(arch_svm->msrpm != NULL) {
318 free_xenheap_pages(
319 arch_svm->msrpm, get_order_from_bytes(MSRPM_SIZE));
320 arch_svm->msrpm = NULL;
321 }
322 arch_svm->vmcb = NULL;
323 }
326 /*
327 * construct the vmcb.
328 */
330 int construct_vmcb(struct arch_svm_struct *arch_svm, struct cpu_user_regs *regs)
331 {
332 int error;
333 long rc=0;
334 struct host_save_area *hsa = NULL;
335 u64 phys_hsa;
337 memset(arch_svm, 0, sizeof(struct arch_svm_struct));
339 if (!(arch_svm->vmcb = alloc_vmcb())) {
340 printk("Failed to create a new VMCB\n");
341 rc = -ENOMEM;
342 goto err_out;
343 }
345 /*
346 * The following code is for allocating host_save_area.
347 * Note: We either allocate a Host Save Area per core or per VCPU.
348 * However, we do not want a global data structure
349 * for HSA per core, we decided to implement a HSA for each VCPU.
350 * It will waste space since VCPU number is larger than core number.
351 * But before we find a better place for HSA for each core, we will
352 * stay will this solution.
353 */
355 if (!(hsa = alloc_host_save_area()))
356 {
357 printk("Failed to allocate Host Save Area\n");
358 rc = -ENOMEM;
359 goto err_out;
360 }
362 phys_hsa = (u64) virt_to_maddr(hsa);
363 arch_svm->host_save_area = hsa;
364 arch_svm->host_save_pa = phys_hsa;
366 arch_svm->vmcb_pa = (u64) virt_to_maddr(arch_svm->vmcb);
368 if ((error = load_vmcb(arch_svm, arch_svm->host_save_pa)))
369 {
370 printk("construct_vmcb: load_vmcb failed: VMCB = %lx\n",
371 (unsigned long) arch_svm->host_save_pa);
372 rc = -EINVAL;
373 goto err_out;
374 }
376 if ((error = construct_vmcb_controls(arch_svm)))
377 {
378 printk("construct_vmcb: construct_vmcb_controls failed\n");
379 rc = -EINVAL;
380 goto err_out;
381 }
383 /* guest selectors */
384 if ((error = construct_init_vmcb_guest(arch_svm, regs)))
385 {
386 printk("construct_vmcb: construct_vmcb_guest failed\n");
387 rc = -EINVAL;
388 goto err_out;
389 }
391 arch_svm->vmcb->exception_intercepts = MONITOR_DEFAULT_EXCEPTION_BITMAP;
392 if (regs->eflags & EF_TF)
393 arch_svm->vmcb->exception_intercepts |= EXCEPTION_BITMAP_DB;
394 else
395 arch_svm->vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_DB;
397 return 0;
399 err_out:
400 destroy_vmcb(arch_svm);
401 return rc;
402 }
405 void svm_do_launch(struct vcpu *v)
406 {
407 /* Update CR3, GDT, LDT, TR */
408 struct vmcb_struct *vmcb;
409 int core = smp_processor_id();
410 vmcb = v->arch.hvm_svm.vmcb;
411 ASSERT(vmcb);
413 svm_stts(v);
415 /* current core is the one we will perform the vmrun on */
416 v->arch.hvm_svm.core = core;
417 clear_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
418 if ( !asidpool_assign_next(vmcb, 0, core, core) )
419 BUG();
421 if (v->vcpu_id == 0)
422 hvm_setup_platform(v->domain);
424 if (hvm_apic_support(v->domain))
425 vlapic_init(v);
426 init_timer(&v->arch.hvm_svm.hlt_timer,
427 hlt_timer_fn, v, v->processor);
429 vmcb->ldtr.sel = 0;
430 vmcb->ldtr.base = 0;
431 vmcb->ldtr.limit = 0;
432 vmcb->ldtr.attributes.bytes = 0;
434 vmcb->efer = EFER_SVME; /* Make sure VMRUN won't return with -1 */
436 if (svm_dbg_on)
437 {
438 unsigned long pt;
439 pt = pagetable_get_paddr(v->arch.shadow_table);
440 printk("%s: shadow_table = %lx\n", __func__, pt);
441 pt = pagetable_get_paddr(v->arch.guest_table);
442 printk("%s: guest_table = %lx\n", __func__, pt);
443 pt = pagetable_get_paddr(v->domain->arch.phys_table);
444 printk("%s: phys_table = %lx\n", __func__, pt);
445 }
447 shadow_direct_map_init(v);
449 if ( svm_paging_enabled(v) )
450 vmcb->cr3 = pagetable_get_paddr(v->arch.guest_table);
451 else
452 vmcb->cr3 = pagetable_get_paddr(v->domain->arch.phys_table);
454 if (svm_dbg_on)
455 {
456 printk("%s: cr3 = %lx ", __func__, (unsigned long)vmcb->cr3);
457 printk("init_guest_table: guest_table = 0x%08x, monitor_table = 0x%08x,"
458 " shadow_table = 0x%08x\n", (int)v->arch.guest_table.pfn,
459 (int)v->arch.monitor_table.pfn, (int)v->arch.shadow_table.pfn);
460 }
462 v->arch.schedule_tail = arch_svm_do_resume;
464 v->arch.hvm_svm.injecting_event = 0;
465 v->arch.hvm_svm.saved_irq_vector = -1;
467 if (svm_dbg_on)
468 svm_dump_vmcb(__func__, vmcb);
469 }
472 int load_vmcb(struct arch_svm_struct *arch_svm, u64 phys_hsa)
473 {
474 u32 phys_hsa_lo, phys_hsa_hi;
476 phys_hsa_lo = (u32) phys_hsa;
477 phys_hsa_hi = (u32) (phys_hsa >> 32);
479 wrmsr(MSR_K8_VM_HSAVE_PA, phys_hsa_lo, phys_hsa_hi);
480 set_bit(ARCH_SVM_VMCB_LOADED, &arch_svm->flags);
481 return 0;
482 }
485 /*
486 * Resume the guest.
487 */
488 void svm_do_resume(struct vcpu *v)
489 {
490 struct hvm_virpit *vpit = &v->domain->arch.hvm_domain.vpit;
492 if ( event_pending(v) ||
493 test_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags) )
494 hvm_wait_io();
496 /* pick up the elapsed PIT ticks and re-enable pit_timer */
497 if ( vpit->first_injected ) {
498 pickup_deactive_ticks(vpit);
499 }
500 svm_set_tsc_shift(v, vpit);
502 /* We can't resume the guest if we're waiting on I/O */
503 ASSERT(!test_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags));
504 }
507 void svm_launch_fail(unsigned long eflags)
508 {
509 BUG();
510 }
513 void svm_resume_fail(unsigned long eflags)
514 {
515 BUG();
516 }
519 void svm_dump_sel(char *name, segment_selector_t *s)
520 {
521 printf("%s: sel=0x%04x, attr=0x%04x, limit=0x%08x, base=0x%016llx\n",
522 name, s->sel, s->attributes.bytes, s->limit,
523 (unsigned long long)s->base);
524 }
527 void svm_dump_vmcb(const char *from, struct vmcb_struct *vmcb)
528 {
529 printf("Dumping guest's current state at %s...\n", from);
530 printf("Size of VMCB = %d, address = %p\n",
531 (int) sizeof(struct vmcb_struct), vmcb);
533 printf("cr_intercepts = 0x%08x dr_intercepts = 0x%08x exception_intercepts "
534 "= 0x%08x\n", vmcb->cr_intercepts, vmcb->dr_intercepts,
535 vmcb->exception_intercepts);
536 printf("general1_intercepts = 0x%08x general2_intercepts = 0x%08x\n",
537 vmcb->general1_intercepts, vmcb->general2_intercepts);
538 printf("iopm_base_pa = %016llx msrpm_base_pa = 0x%016llx tsc_offset = "
539 "0x%016llx\n",
540 (unsigned long long) vmcb->iopm_base_pa,
541 (unsigned long long) vmcb->msrpm_base_pa,
542 (unsigned long long) vmcb->tsc_offset);
543 printf("tlb_control = 0x%08x vintr = 0x%016llx interrupt_shadow = "
544 "0x%016llx\n", vmcb->tlb_control,
545 (unsigned long long) vmcb->vintr.bytes,
546 (unsigned long long) vmcb->interrupt_shadow);
547 printf("exitcode = 0x%016llx exitintinfo = 0x%016llx\n",
548 (unsigned long long) vmcb->exitcode,
549 (unsigned long long) vmcb->exitintinfo.bytes);
550 printf("exitinfo1 = 0x%016llx exitinfo2 = 0x%016llx \n",
551 (unsigned long long) vmcb->exitinfo1,
552 (unsigned long long) vmcb->exitinfo2);
553 printf("np_enable = 0x%016llx guest_asid = 0x%03x\n",
554 (unsigned long long) vmcb->np_enable, vmcb->guest_asid);
555 printf("cpl = %d efer = 0x%016llx star = 0x%016llx lstar = 0x%016llx\n",
556 vmcb->cpl, (unsigned long long) vmcb->efer,
557 (unsigned long long) vmcb->star, (unsigned long long) vmcb->lstar);
558 printf("CR0 = 0x%016llx CR2 = 0x%016llx\n",
559 (unsigned long long) vmcb->cr0, (unsigned long long) vmcb->cr2);
560 printf("CR3 = 0x%016llx CR4 = 0x%016llx\n",
561 (unsigned long long) vmcb->cr3, (unsigned long long) vmcb->cr4);
562 printf("RSP = 0x%016llx RIP = 0x%016llx\n",
563 (unsigned long long) vmcb->rsp, (unsigned long long) vmcb->rip);
564 printf("RAX = 0x%016llx RFLAGS=0x%016llx\n",
565 (unsigned long long) vmcb->rax, (unsigned long long) vmcb->rflags);
566 printf("DR6 = 0x%016llx, DR7 = 0x%016llx\n",
567 (unsigned long long) vmcb->dr6, (unsigned long long) vmcb->dr7);
568 printf("CSTAR = 0x%016llx SFMask = 0x%016llx\n",
569 (unsigned long long) vmcb->cstar, (unsigned long long) vmcb->sfmask);
570 printf("KernGSBase = 0x%016llx PAT = 0x%016llx \n",
571 (unsigned long long) vmcb->kerngsbase,
572 (unsigned long long) vmcb->g_pat);
574 /* print out all the selectors */
575 svm_dump_sel("CS", &vmcb->cs);
576 svm_dump_sel("DS", &vmcb->ds);
577 svm_dump_sel("SS", &vmcb->ss);
578 svm_dump_sel("ES", &vmcb->es);
579 svm_dump_sel("FS", &vmcb->fs);
580 svm_dump_sel("GS", &vmcb->gs);
581 svm_dump_sel("GDTR", &vmcb->gdtr);
582 svm_dump_sel("LDTR", &vmcb->ldtr);
583 svm_dump_sel("IDTR", &vmcb->idtr);
584 svm_dump_sel("TR", &vmcb->tr);
585 }
587 /*
588 * Local variables:
589 * mode: C
590 * c-set-style: "BSD"
591 * c-basic-offset: 4
592 * tab-width: 4
593 * indent-tabs-mode: nil
594 * End:
595 */