direct-io.hg

view xen/arch/x86/hvm/svm/vmcb.c @ 9462:48abab2ab719

SVM patch to update guest time with latest hvm modifications.

Signed-off-by: Tom Woller <thomas.woller@amd.com>
author kaf24@firebug.cl.cam.ac.uk
date Wed Mar 29 13:56:26 2006 +0100 (2006-03-29)
parents 98eaa430bdf3
children f0e14b4e535c
line source
1 /*
2 * vmcb.c: VMCB management
3 * Copyright (c) 2005, AMD Corporation.
4 * Copyright (c) 2004, Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
17 * Place - Suite 330, Boston, MA 02111-1307 USA.
18 *
19 */
21 #include <xen/config.h>
22 #include <xen/init.h>
23 #include <xen/mm.h>
24 #include <xen/lib.h>
25 #include <xen/errno.h>
26 #include <xen/shadow.h>
27 #include <asm/cpufeature.h>
28 #include <asm/processor.h>
29 #include <asm/msr.h>
30 #include <asm/hvm/hvm.h>
31 #include <asm/hvm/io.h>
32 #include <asm/hvm/support.h>
33 #include <asm/hvm/svm/svm.h>
34 #include <asm/hvm/svm/intr.h>
35 #include <xen/event.h>
36 #include <xen/kernel.h>
37 #include <xen/domain_page.h>
39 extern int svm_dbg_on;
40 extern int asidpool_assign_next( struct vmcb_struct *vmcb, int retire_current,
41 int oldcore, int newcore);
43 #define round_pgdown(_p) ((_p)&PAGE_MASK) /* coped from domain.c */
45 #define GUEST_SEGMENT_LIMIT 0xffffffff
47 #define IOPM_SIZE (12 * 1024)
48 #define MSRPM_SIZE (8 * 1024)
50 struct vmcb_struct *alloc_vmcb(void)
51 {
52 struct vmcb_struct *vmcb = NULL;
53 unsigned int order;
54 order = get_order_from_bytes(sizeof(struct vmcb_struct));
55 ASSERT(order >= 0);
56 vmcb = alloc_xenheap_pages(order);
57 ASSERT(vmcb);
59 if (vmcb)
60 memset(vmcb, 0, sizeof(struct vmcb_struct));
62 return vmcb;
63 }
66 void free_vmcb(struct vmcb_struct *vmcb)
67 {
68 unsigned int order;
70 order = get_order_from_bytes(sizeof(struct vmcb_struct));
71 ASSERT(vmcb);
73 if (vmcb)
74 free_xenheap_pages(vmcb, order);
75 }
78 struct host_save_area *alloc_host_save_area(void)
79 {
80 unsigned int order = 0;
81 struct host_save_area *hsa = NULL;
83 hsa = alloc_xenheap_pages(order);
84 ASSERT(hsa);
86 if (hsa)
87 memset(hsa, 0, PAGE_SIZE);
89 return hsa;
90 }
93 void free_host_save_area(struct host_save_area *hsa)
94 {
95 unsigned int order;
97 order = get_order_from_bytes(PAGE_SIZE);
98 ASSERT(hsa);
100 if (hsa)
101 free_xenheap_pages(hsa, order);
102 }
105 /* Set up intercepts to exit the guest into the hypervisor when we want it. */
106 static int construct_vmcb_controls(struct arch_svm_struct *arch_svm)
107 {
108 struct vmcb_struct *vmcb;
109 u32 *iopm;
110 u32 *msrpm;
112 vmcb = arch_svm->vmcb;
114 ASSERT(vmcb);
116 /* mask off all general 1 intercepts except those listed here */
117 vmcb->general1_intercepts =
118 ~(GENERAL1_INTERCEPT_CR0_SEL_WRITE | GENERAL1_INTERCEPT_VINTR |
119 GENERAL1_INTERCEPT_IDTR_READ | GENERAL1_INTERCEPT_IDTR_WRITE |
120 GENERAL1_INTERCEPT_GDTR_READ | GENERAL1_INTERCEPT_GDTR_WRITE |
121 GENERAL1_INTERCEPT_LDTR_READ | GENERAL1_INTERCEPT_LDTR_WRITE |
122 GENERAL1_INTERCEPT_TR_READ | GENERAL1_INTERCEPT_TR_WRITE |
123 GENERAL1_INTERCEPT_RDTSC | GENERAL1_INTERCEPT_PUSHF |
124 GENERAL1_INTERCEPT_SWINT | GENERAL1_INTERCEPT_POPF |
125 GENERAL1_INTERCEPT_IRET | GENERAL1_INTERCEPT_PAUSE |
126 GENERAL1_INTERCEPT_TASK_SWITCH
127 );
129 /* turn on the general 2 intercepts */
130 vmcb->general2_intercepts =
131 GENERAL2_INTERCEPT_VMRUN | GENERAL2_INTERCEPT_VMMCALL |
132 GENERAL2_INTERCEPT_VMLOAD | GENERAL2_INTERCEPT_VMSAVE |
133 GENERAL2_INTERCEPT_STGI | GENERAL2_INTERCEPT_CLGI |
134 GENERAL2_INTERCEPT_SKINIT | GENERAL2_INTERCEPT_RDTSCP;
136 /* read or write all debug registers 0 - 15 */
137 vmcb->dr_intercepts = 0;
139 /* RD/WR all control registers 0 - 15, but not read CR2 */
140 vmcb->cr_intercepts = ~(CR_INTERCEPT_CR2_READ | CR_INTERCEPT_CR2_WRITE);
142 /* The following is for I/O and MSR permision map */
143 iopm = alloc_xenheap_pages(get_order_from_bytes(IOPM_SIZE));
145 ASSERT(iopm);
146 memset(iopm, 0xff, IOPM_SIZE);
147 clear_bit(PC_DEBUG_PORT, iopm);
148 msrpm = alloc_xenheap_pages(get_order_from_bytes(MSRPM_SIZE));
150 ASSERT(msrpm);
151 memset(msrpm, 0xff, MSRPM_SIZE);
153 arch_svm->iopm = iopm;
154 arch_svm->msrpm = msrpm;
156 vmcb->iopm_base_pa = (u64) virt_to_maddr(iopm);
157 vmcb->msrpm_base_pa = (u64) virt_to_maddr(msrpm);
159 return 0;
160 }
163 /*
164 * modify guest eflags and execption bitmap for gdb
165 */
166 int svm_modify_vmcb(struct vcpu *v, struct cpu_user_regs *regs)
167 {
168 int error;
169 if ((error = load_vmcb(&v->arch.hvm_svm, v->arch.hvm_svm.host_save_pa)))
170 {
171 printk("svm_modify_vmcb: load_vmcb failed: VMCB = %lx\n",
172 (unsigned long) v->arch.hvm_svm.host_save_pa);
173 return -EINVAL;
174 }
175 svm_load_cpu_user_regs(v,regs);
176 return 0;
177 }
180 /*
181 * Initially set the same environement as host.
182 */
183 static int construct_init_vmcb_guest(struct arch_svm_struct *arch_svm,
184 struct cpu_user_regs *regs )
185 {
186 int error = 0;
187 unsigned long crn;
188 segment_attributes_t attrib;
189 unsigned long dr7;
190 unsigned long eflags;
191 unsigned long shadow_cr;
192 struct vmcb_struct *vmcb = arch_svm->vmcb;
194 /* Allows IRQs to be shares */
195 vmcb->vintr.fields.intr_masking = 1;
197 /* Set up event injection entry in VMCB. Just clear it. */
198 vmcb->eventinj.bytes = 0;
200 /* TSC */
201 vmcb->tsc_offset = 0;
203 vmcb->cs.sel = regs->cs;
204 vmcb->es.sel = regs->es;
205 vmcb->ss.sel = regs->ss;
206 vmcb->ds.sel = regs->ds;
207 vmcb->fs.sel = regs->fs;
208 vmcb->gs.sel = regs->gs;
210 /* Guest segment Limits. 64K for real mode*/
211 vmcb->cs.limit = GUEST_SEGMENT_LIMIT;
212 vmcb->es.limit = GUEST_SEGMENT_LIMIT;
213 vmcb->ss.limit = GUEST_SEGMENT_LIMIT;
214 vmcb->ds.limit = GUEST_SEGMENT_LIMIT;
215 vmcb->fs.limit = GUEST_SEGMENT_LIMIT;
216 vmcb->gs.limit = GUEST_SEGMENT_LIMIT;
218 /* Base address for segments */
219 vmcb->cs.base = 0;
220 vmcb->es.base = 0;
221 vmcb->ss.base = 0;
222 vmcb->ds.base = 0;
223 vmcb->fs.base = 0;
224 vmcb->gs.base = 0;
226 /* Guest Interrupt descriptor table */
227 vmcb->idtr.base = 0;
228 vmcb->idtr.limit = 0;
230 /* Set up segment attributes */
231 attrib.bytes = 0;
232 attrib.fields.type = 0x3; /* type = 3 */
233 attrib.fields.s = 1; /* code or data, i.e. not system */
234 attrib.fields.dpl = 0; /* DPL = 0 */
235 attrib.fields.p = 1; /* segment present */
236 attrib.fields.db = 1; /* 32-bit */
237 attrib.fields.g = 1; /* 4K pages in limit */
239 /* Data selectors */
240 vmcb->es.attributes = attrib;
241 vmcb->ss.attributes = attrib;
242 vmcb->ds.attributes = attrib;
243 vmcb->fs.attributes = attrib;
244 vmcb->gs.attributes = attrib;
246 /* Code selector */
247 attrib.fields.type = 0xb; /* type=0xb -> executable/readable, accessed */
248 vmcb->cs.attributes = attrib;
250 /* Guest Global descriptor table */
251 vmcb->gdtr.base = 0;
252 vmcb->gdtr.limit = 0;
254 /* Guest Local Descriptor Table */
255 attrib.fields.s = 0; /* not code or data segement */
256 attrib.fields.type = 0x2; /* LDT */
257 attrib.fields.db = 0; /* 16-bit */
258 attrib.fields.g = 0;
259 vmcb->ldtr.attributes = attrib;
261 attrib.fields.type = 0xb; /* 32-bit TSS (busy) */
262 vmcb->tr.attributes = attrib;
263 vmcb->tr.base = 0;
264 vmcb->tr.limit = 0xff;
266 __asm__ __volatile__ ("mov %%cr0,%0" : "=r" (crn) :);
267 vmcb->cr0 = crn;
269 /* Initally PG, PE are not set*/
270 shadow_cr = vmcb->cr0;
271 shadow_cr &= ~X86_CR0_PG;
272 arch_svm->cpu_shadow_cr0 = shadow_cr;
274 /* CR3 is set in svm_final_setup_guest */
276 __asm__ __volatile__ ("mov %%cr4,%0" : "=r" (crn) :);
277 arch_svm->cpu_shadow_cr4 = crn & ~(X86_CR4_PGE | X86_CR4_PSE);
278 vmcb->cr4 = crn | SVM_CR4_HOST_MASK;
280 vmcb->rsp = 0;
281 vmcb->rip = regs->eip;
283 eflags = regs->eflags & ~HVM_EFLAGS_RESERVED_0; /* clear 0s */
284 eflags |= HVM_EFLAGS_RESERVED_1; /* set 1s */
286 vmcb->rflags = eflags;
288 __asm__ __volatile__ ("mov %%dr7, %0\n" : "=r" (dr7));
289 vmcb->dr7 = dr7;
291 return error;
292 }
295 /*
296 * destroy the vmcb.
297 */
299 void destroy_vmcb(struct arch_svm_struct *arch_svm)
300 {
301 if(arch_svm->vmcb != NULL)
302 {
303 asidpool_retire(arch_svm->vmcb, arch_svm->asid_core);
304 free_vmcb(arch_svm->vmcb);
305 }
306 if(arch_svm->iopm != NULL) {
307 free_xenheap_pages(
308 arch_svm->iopm, get_order_from_bytes(IOPM_SIZE));
309 arch_svm->iopm = NULL;
310 }
311 if(arch_svm->msrpm != NULL) {
312 free_xenheap_pages(
313 arch_svm->msrpm, get_order_from_bytes(MSRPM_SIZE));
314 arch_svm->msrpm = NULL;
315 }
316 arch_svm->vmcb = NULL;
317 }
320 /*
321 * construct the vmcb.
322 */
324 int construct_vmcb(struct arch_svm_struct *arch_svm, struct cpu_user_regs *regs)
325 {
326 int error;
327 long rc=0;
328 struct host_save_area *hsa = NULL;
329 u64 phys_hsa;
331 memset(arch_svm, 0, sizeof(struct arch_svm_struct));
333 if (!(arch_svm->vmcb = alloc_vmcb())) {
334 printk("Failed to create a new VMCB\n");
335 rc = -ENOMEM;
336 goto err_out;
337 }
339 /*
340 * The following code is for allocating host_save_area.
341 * Note: We either allocate a Host Save Area per core or per VCPU.
342 * However, we do not want a global data structure
343 * for HSA per core, we decided to implement a HSA for each VCPU.
344 * It will waste space since VCPU number is larger than core number.
345 * But before we find a better place for HSA for each core, we will
346 * stay will this solution.
347 */
349 if (!(hsa = alloc_host_save_area()))
350 {
351 printk("Failed to allocate Host Save Area\n");
352 rc = -ENOMEM;
353 goto err_out;
354 }
356 phys_hsa = (u64) virt_to_maddr(hsa);
357 arch_svm->host_save_area = hsa;
358 arch_svm->host_save_pa = phys_hsa;
360 arch_svm->vmcb_pa = (u64) virt_to_maddr(arch_svm->vmcb);
362 if ((error = load_vmcb(arch_svm, arch_svm->host_save_pa)))
363 {
364 printk("construct_vmcb: load_vmcb failed: VMCB = %lx\n",
365 (unsigned long) arch_svm->host_save_pa);
366 rc = -EINVAL;
367 goto err_out;
368 }
370 if ((error = construct_vmcb_controls(arch_svm)))
371 {
372 printk("construct_vmcb: construct_vmcb_controls failed\n");
373 rc = -EINVAL;
374 goto err_out;
375 }
377 /* guest selectors */
378 if ((error = construct_init_vmcb_guest(arch_svm, regs)))
379 {
380 printk("construct_vmcb: construct_vmcb_guest failed\n");
381 rc = -EINVAL;
382 goto err_out;
383 }
385 arch_svm->vmcb->exception_intercepts = MONITOR_DEFAULT_EXCEPTION_BITMAP;
386 if (regs->eflags & EF_TF)
387 arch_svm->vmcb->exception_intercepts |= EXCEPTION_BITMAP_DB;
388 else
389 arch_svm->vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_DB;
391 return 0;
393 err_out:
394 destroy_vmcb(arch_svm);
395 return rc;
396 }
399 void svm_do_launch(struct vcpu *v)
400 {
401 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
402 int core = smp_processor_id();
403 ASSERT(vmcb);
405 /* Update CR3, GDT, LDT, TR */
406 svm_stts(v);
408 /* current core is the one we intend to perform the VMRUN on */
409 v->arch.hvm_svm.launch_core = v->arch.hvm_svm.asid_core = core;
410 clear_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
411 if ( !asidpool_assign_next( vmcb, 0, core, core ))
412 BUG();
414 if (v->vcpu_id == 0)
415 hvm_setup_platform(v->domain);
417 if ( evtchn_bind_vcpu(iopacket_port(v), v->vcpu_id) < 0 )
418 {
419 printk("HVM domain bind port %d to vcpu %d failed!\n",
420 iopacket_port(v), v->vcpu_id);
421 domain_crash_synchronous();
422 }
424 HVM_DBG_LOG(DBG_LEVEL_1, "eport: %x", iopacket_port(v));
426 clear_bit(iopacket_port(v),
427 &v->domain->shared_info->evtchn_mask[0]);
429 if (hvm_apic_support(v->domain))
430 vlapic_init(v);
431 init_timer(&v->arch.hvm_svm.hlt_timer,
432 hlt_timer_fn, v, v->processor);
434 vmcb->ldtr.sel = 0;
435 vmcb->ldtr.base = 0;
436 vmcb->ldtr.limit = 0;
437 vmcb->ldtr.attributes.bytes = 0;
439 vmcb->efer = EFER_SVME; /* Make sure VMRUN won't return with -1 */
441 if (svm_dbg_on)
442 {
443 unsigned long pt;
444 pt = pagetable_get_paddr(v->arch.shadow_table);
445 printk("%s: shadow_table = %lx\n", __func__, pt);
446 pt = pagetable_get_paddr(v->arch.guest_table);
447 printk("%s: guest_table = %lx\n", __func__, pt);
448 pt = pagetable_get_paddr(v->domain->arch.phys_table);
449 printk("%s: phys_table = %lx\n", __func__, pt);
450 }
452 if ( svm_paging_enabled(v) )
453 vmcb->cr3 = pagetable_get_paddr(v->arch.guest_table);
454 else
455 vmcb->cr3 = pagetable_get_paddr(v->domain->arch.phys_table);
457 if (svm_dbg_on)
458 {
459 printk("%s: cr3 = %lx ", __func__, (unsigned long)vmcb->cr3);
460 printk("init_guest_table: guest_table = 0x%08x, monitor_table = 0x%08x,"
461 " shadow_table = 0x%08x\n", (int)v->arch.guest_table.pfn,
462 (int)v->arch.monitor_table.pfn, (int)v->arch.shadow_table.pfn);
463 }
465 v->arch.schedule_tail = arch_svm_do_resume;
467 v->arch.hvm_svm.injecting_event = 0;
468 v->arch.hvm_svm.saved_irq_vector = -1;
470 svm_set_guest_time(v, 0);
472 if (svm_dbg_on)
473 svm_dump_vmcb(__func__, vmcb);
474 }
477 int load_vmcb(struct arch_svm_struct *arch_svm, u64 phys_hsa)
478 {
479 u32 phys_hsa_lo, phys_hsa_hi;
481 phys_hsa_lo = (u32) phys_hsa;
482 phys_hsa_hi = (u32) (phys_hsa >> 32);
484 wrmsr(MSR_K8_VM_HSAVE_PA, phys_hsa_lo, phys_hsa_hi);
485 set_bit(ARCH_SVM_VMCB_LOADED, &arch_svm->flags);
486 return 0;
487 }
490 /*
491 * Resume the guest.
492 */
493 void svm_do_resume(struct vcpu *v)
494 {
495 struct domain *d = v->domain;
496 struct hvm_virpit *vpit = &d->arch.hvm_domain.vpit;
498 svm_stts(v);
500 /* pick up the elapsed PIT ticks and re-enable pit_timer */
501 if ( vpit->first_injected ) {
502 if ( v->domain->arch.hvm_domain.guest_time ) {
503 svm_set_guest_time(v, v->domain->arch.hvm_domain.guest_time);
504 v->domain->arch.hvm_domain.guest_time = 0;
505 }
506 pickup_deactive_ticks(vpit);
507 }
509 if ( test_bit(iopacket_port(v), &d->shared_info->evtchn_pending[0]) ||
510 test_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags) )
511 hvm_wait_io();
513 /* We can't resume the guest if we're waiting on I/O */
514 ASSERT(!test_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags));
515 }
517 void svm_launch_fail(unsigned long eflags)
518 {
519 BUG();
520 }
523 void svm_resume_fail(unsigned long eflags)
524 {
525 BUG();
526 }
529 void svm_dump_sel(char *name, segment_selector_t *s)
530 {
531 printf("%s: sel=0x%04x, attr=0x%04x, limit=0x%08x, base=0x%016llx\n",
532 name, s->sel, s->attributes.bytes, s->limit,
533 (unsigned long long)s->base);
534 }
537 void svm_dump_vmcb(const char *from, struct vmcb_struct *vmcb)
538 {
539 printf("Dumping guest's current state at %s...\n", from);
540 printf("Size of VMCB = %d, address = %p\n",
541 (int) sizeof(struct vmcb_struct), vmcb);
543 printf("cr_intercepts = 0x%08x dr_intercepts = 0x%08x exception_intercepts "
544 "= 0x%08x\n", vmcb->cr_intercepts, vmcb->dr_intercepts,
545 vmcb->exception_intercepts);
546 printf("general1_intercepts = 0x%08x general2_intercepts = 0x%08x\n",
547 vmcb->general1_intercepts, vmcb->general2_intercepts);
548 printf("iopm_base_pa = %016llx msrpm_base_pa = 0x%016llx tsc_offset = "
549 "0x%016llx\n",
550 (unsigned long long) vmcb->iopm_base_pa,
551 (unsigned long long) vmcb->msrpm_base_pa,
552 (unsigned long long) vmcb->tsc_offset);
553 printf("tlb_control = 0x%08x vintr = 0x%016llx interrupt_shadow = "
554 "0x%016llx\n", vmcb->tlb_control,
555 (unsigned long long) vmcb->vintr.bytes,
556 (unsigned long long) vmcb->interrupt_shadow);
557 printf("exitcode = 0x%016llx exitintinfo = 0x%016llx\n",
558 (unsigned long long) vmcb->exitcode,
559 (unsigned long long) vmcb->exitintinfo.bytes);
560 printf("exitinfo1 = 0x%016llx exitinfo2 = 0x%016llx \n",
561 (unsigned long long) vmcb->exitinfo1,
562 (unsigned long long) vmcb->exitinfo2);
563 printf("np_enable = 0x%016llx guest_asid = 0x%03x\n",
564 (unsigned long long) vmcb->np_enable, vmcb->guest_asid);
565 printf("cpl = %d efer = 0x%016llx star = 0x%016llx lstar = 0x%016llx\n",
566 vmcb->cpl, (unsigned long long) vmcb->efer,
567 (unsigned long long) vmcb->star, (unsigned long long) vmcb->lstar);
568 printf("CR0 = 0x%016llx CR2 = 0x%016llx\n",
569 (unsigned long long) vmcb->cr0, (unsigned long long) vmcb->cr2);
570 printf("CR3 = 0x%016llx CR4 = 0x%016llx\n",
571 (unsigned long long) vmcb->cr3, (unsigned long long) vmcb->cr4);
572 printf("RSP = 0x%016llx RIP = 0x%016llx\n",
573 (unsigned long long) vmcb->rsp, (unsigned long long) vmcb->rip);
574 printf("RAX = 0x%016llx RFLAGS=0x%016llx\n",
575 (unsigned long long) vmcb->rax, (unsigned long long) vmcb->rflags);
576 printf("DR6 = 0x%016llx, DR7 = 0x%016llx\n",
577 (unsigned long long) vmcb->dr6, (unsigned long long) vmcb->dr7);
578 printf("CSTAR = 0x%016llx SFMask = 0x%016llx\n",
579 (unsigned long long) vmcb->cstar, (unsigned long long) vmcb->sfmask);
580 printf("KernGSBase = 0x%016llx PAT = 0x%016llx \n",
581 (unsigned long long) vmcb->kerngsbase,
582 (unsigned long long) vmcb->g_pat);
584 /* print out all the selectors */
585 svm_dump_sel("CS", &vmcb->cs);
586 svm_dump_sel("DS", &vmcb->ds);
587 svm_dump_sel("SS", &vmcb->ss);
588 svm_dump_sel("ES", &vmcb->es);
589 svm_dump_sel("FS", &vmcb->fs);
590 svm_dump_sel("GS", &vmcb->gs);
591 svm_dump_sel("GDTR", &vmcb->gdtr);
592 svm_dump_sel("LDTR", &vmcb->ldtr);
593 svm_dump_sel("IDTR", &vmcb->idtr);
594 svm_dump_sel("TR", &vmcb->tr);
595 }
597 /*
598 * Local variables:
599 * mode: C
600 * c-set-style: "BSD"
601 * c-basic-offset: 4
602 * tab-width: 4
603 * indent-tabs-mode: nil
604 * End:
605 */