ia64/xen-unstable

view xen/arch/x86/hvm/hvm.c @ 15732:00aa18fd722e

hvm: Fix %cr4 handling in HAP paging mode.
Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Wed Aug 08 13:18:07 2007 +0100 (2007-08-08)
parents da2c7dab1a3a
children 25e5c1b9faad
line source
1 /*
2 * hvm.c: Common hardware virtual machine abstractions.
3 *
4 * Copyright (c) 2004, Intel Corporation.
5 * Copyright (c) 2005, International Business Machines Corporation.
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms and conditions of the GNU General Public License,
9 * version 2, as published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 * more details.
15 *
16 * You should have received a copy of the GNU General Public License along with
17 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
18 * Place - Suite 330, Boston, MA 02111-1307 USA.
19 */
21 #include <xen/config.h>
22 #include <xen/init.h>
23 #include <xen/lib.h>
24 #include <xen/trace.h>
25 #include <xen/sched.h>
26 #include <xen/irq.h>
27 #include <xen/softirq.h>
28 #include <xen/domain.h>
29 #include <xen/domain_page.h>
30 #include <xen/hypercall.h>
31 #include <xen/guest_access.h>
32 #include <xen/event.h>
33 #include <asm/current.h>
34 #include <asm/e820.h>
35 #include <asm/io.h>
36 #include <asm/paging.h>
37 #include <asm/regs.h>
38 #include <asm/cpufeature.h>
39 #include <asm/processor.h>
40 #include <asm/types.h>
41 #include <asm/msr.h>
42 #include <asm/mc146818rtc.h>
43 #include <asm/spinlock.h>
44 #include <asm/hvm/hvm.h>
45 #include <asm/hvm/vpt.h>
46 #include <asm/hvm/support.h>
47 #include <public/sched.h>
48 #include <public/hvm/ioreq.h>
49 #include <public/version.h>
50 #include <public/memory.h>
52 int hvm_enabled __read_mostly;
54 unsigned int opt_hvm_debug_level __read_mostly;
55 integer_param("hvm_debug", opt_hvm_debug_level);
57 struct hvm_function_table hvm_funcs __read_mostly;
59 /* I/O permission bitmap is globally shared by all HVM guests. */
60 char __attribute__ ((__section__ (".bss.page_aligned")))
61 hvm_io_bitmap[3*PAGE_SIZE];
63 void hvm_enable(struct hvm_function_table *fns)
64 {
65 BUG_ON(hvm_enabled);
66 printk("HVM: %s enabled\n", fns->name);
68 /*
69 * Allow direct access to the PC debug port (it is often used for I/O
70 * delays, but the vmexits simply slow things down).
71 */
72 memset(hvm_io_bitmap, ~0, sizeof(hvm_io_bitmap));
73 clear_bit(0x80, hvm_io_bitmap);
75 hvm_funcs = *fns;
76 hvm_enabled = 1;
77 }
79 void hvm_stts(struct vcpu *v)
80 {
81 /* FPU state already dirty? Then no need to setup_fpu() lazily. */
82 if ( !v->fpu_dirtied )
83 hvm_funcs.stts(v);
84 }
86 void hvm_set_guest_time(struct vcpu *v, u64 gtime)
87 {
88 u64 host_tsc;
90 rdtscll(host_tsc);
92 v->arch.hvm_vcpu.cache_tsc_offset = gtime - host_tsc;
93 hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset);
94 }
96 u64 hvm_get_guest_time(struct vcpu *v)
97 {
98 u64 host_tsc;
100 rdtscll(host_tsc);
101 return host_tsc + v->arch.hvm_vcpu.cache_tsc_offset;
102 }
104 void hvm_migrate_timers(struct vcpu *v)
105 {
106 rtc_migrate_timers(v);
107 hpet_migrate_timers(v);
108 pt_migrate(v);
109 }
111 void hvm_do_resume(struct vcpu *v)
112 {
113 ioreq_t *p;
115 hvm_stts(v);
117 pt_thaw_time(v);
119 /* NB. Optimised for common case (p->state == STATE_IOREQ_NONE). */
120 p = &get_ioreq(v)->vp_ioreq;
121 while ( p->state != STATE_IOREQ_NONE )
122 {
123 switch ( p->state )
124 {
125 case STATE_IORESP_READY: /* IORESP_READY -> NONE */
126 hvm_io_assist();
127 break;
128 case STATE_IOREQ_READY: /* IOREQ_{READY,INPROCESS} -> IORESP_READY */
129 case STATE_IOREQ_INPROCESS:
130 wait_on_xen_event_channel(v->arch.hvm_vcpu.xen_port,
131 (p->state != STATE_IOREQ_READY) &&
132 (p->state != STATE_IOREQ_INPROCESS));
133 break;
134 default:
135 gdprintk(XENLOG_ERR, "Weird HVM iorequest state %d.\n", p->state);
136 domain_crash_synchronous();
137 }
138 }
139 }
141 static void hvm_init_ioreq_page(
142 struct domain *d, struct hvm_ioreq_page *iorp)
143 {
144 memset(iorp, 0, sizeof(*iorp));
145 spin_lock_init(&iorp->lock);
146 domain_pause(d);
147 }
149 static void hvm_destroy_ioreq_page(
150 struct domain *d, struct hvm_ioreq_page *iorp)
151 {
152 spin_lock(&iorp->lock);
154 ASSERT(d->is_dying);
156 if ( iorp->va != NULL )
157 {
158 unmap_domain_page_global(iorp->va);
159 put_page_and_type(iorp->page);
160 iorp->va = NULL;
161 }
163 spin_unlock(&iorp->lock);
164 }
166 static int hvm_set_ioreq_page(
167 struct domain *d, struct hvm_ioreq_page *iorp, unsigned long gmfn)
168 {
169 struct page_info *page;
170 unsigned long mfn;
171 void *va;
173 mfn = gmfn_to_mfn(d, gmfn);
174 if ( !mfn_valid(mfn) )
175 return -EINVAL;
177 page = mfn_to_page(mfn);
178 if ( !get_page_and_type(page, d, PGT_writable_page) )
179 return -EINVAL;
181 va = map_domain_page_global(mfn);
182 if ( va == NULL )
183 {
184 put_page_and_type(page);
185 return -ENOMEM;
186 }
188 spin_lock(&iorp->lock);
190 if ( (iorp->va != NULL) || d->is_dying )
191 {
192 spin_unlock(&iorp->lock);
193 unmap_domain_page_global(va);
194 put_page_and_type(mfn_to_page(mfn));
195 return -EINVAL;
196 }
198 iorp->va = va;
199 iorp->page = page;
201 spin_unlock(&iorp->lock);
203 domain_unpause(d);
205 return 0;
206 }
208 int hvm_domain_initialise(struct domain *d)
209 {
210 int rc;
212 if ( !hvm_enabled )
213 {
214 gdprintk(XENLOG_WARNING, "Attempt to create a HVM guest "
215 "on a non-VT/AMDV platform.\n");
216 return -EINVAL;
217 }
219 spin_lock_init(&d->arch.hvm_domain.pbuf_lock);
220 spin_lock_init(&d->arch.hvm_domain.irq_lock);
222 rc = paging_enable(d, PG_refcounts|PG_translate|PG_external);
223 if ( rc != 0 )
224 return rc;
226 vpic_init(d);
227 vioapic_init(d);
229 hvm_init_ioreq_page(d, &d->arch.hvm_domain.ioreq);
230 hvm_init_ioreq_page(d, &d->arch.hvm_domain.buf_ioreq);
232 return hvm_funcs.domain_initialise(d);
233 }
235 void hvm_domain_relinquish_resources(struct domain *d)
236 {
237 hvm_destroy_ioreq_page(d, &d->arch.hvm_domain.ioreq);
238 hvm_destroy_ioreq_page(d, &d->arch.hvm_domain.buf_ioreq);
240 pit_deinit(d);
241 rtc_deinit(d);
242 pmtimer_deinit(d);
243 hpet_deinit(d);
244 }
246 void hvm_domain_destroy(struct domain *d)
247 {
248 hvm_funcs.domain_destroy(d);
249 }
251 static int hvm_save_cpu_ctxt(struct domain *d, hvm_domain_context_t *h)
252 {
253 struct vcpu *v;
254 struct hvm_hw_cpu ctxt;
255 struct vcpu_guest_context *vc;
257 for_each_vcpu(d, v)
258 {
259 /* We don't need to save state for a vcpu that is down; the restore
260 * code will leave it down if there is nothing saved. */
261 if ( test_bit(_VPF_down, &v->pause_flags) )
262 continue;
264 /* Architecture-specific vmcs/vmcb bits */
265 hvm_funcs.save_cpu_ctxt(v, &ctxt);
267 /* Other vcpu register state */
268 vc = &v->arch.guest_context;
269 if ( v->fpu_initialised )
270 memcpy(ctxt.fpu_regs, &vc->fpu_ctxt, sizeof(ctxt.fpu_regs));
271 else
272 memset(ctxt.fpu_regs, 0, sizeof(ctxt.fpu_regs));
273 ctxt.rax = vc->user_regs.eax;
274 ctxt.rbx = vc->user_regs.ebx;
275 ctxt.rcx = vc->user_regs.ecx;
276 ctxt.rdx = vc->user_regs.edx;
277 ctxt.rbp = vc->user_regs.ebp;
278 ctxt.rsi = vc->user_regs.esi;
279 ctxt.rdi = vc->user_regs.edi;
280 /* %rsp handled by arch-specific call above */
281 #ifdef __x86_64__
282 ctxt.r8 = vc->user_regs.r8;
283 ctxt.r9 = vc->user_regs.r9;
284 ctxt.r10 = vc->user_regs.r10;
285 ctxt.r11 = vc->user_regs.r11;
286 ctxt.r12 = vc->user_regs.r12;
287 ctxt.r13 = vc->user_regs.r13;
288 ctxt.r14 = vc->user_regs.r14;
289 ctxt.r15 = vc->user_regs.r15;
290 #endif
291 ctxt.dr0 = vc->debugreg[0];
292 ctxt.dr1 = vc->debugreg[1];
293 ctxt.dr2 = vc->debugreg[2];
294 ctxt.dr3 = vc->debugreg[3];
295 ctxt.dr6 = vc->debugreg[6];
296 ctxt.dr7 = vc->debugreg[7];
298 if ( hvm_save_entry(CPU, v->vcpu_id, h, &ctxt) != 0 )
299 return 1;
300 }
301 return 0;
302 }
304 static int hvm_load_cpu_ctxt(struct domain *d, hvm_domain_context_t *h)
305 {
306 int vcpuid, rc;
307 struct vcpu *v;
308 struct hvm_hw_cpu ctxt;
309 struct vcpu_guest_context *vc;
311 /* Which vcpu is this? */
312 vcpuid = hvm_load_instance(h);
313 if ( vcpuid > MAX_VIRT_CPUS || (v = d->vcpu[vcpuid]) == NULL )
314 {
315 gdprintk(XENLOG_ERR, "HVM restore: domain has no vcpu %u\n", vcpuid);
316 return -EINVAL;
317 }
318 vc = &v->arch.guest_context;
320 /* Need to init this vcpu before loading its contents */
321 LOCK_BIGLOCK(d);
322 if ( !v->is_initialised )
323 if ( (rc = boot_vcpu(d, vcpuid, vc)) != 0 )
324 return rc;
325 UNLOCK_BIGLOCK(d);
327 if ( hvm_load_entry(CPU, h, &ctxt) != 0 )
328 return -EINVAL;
330 /* Architecture-specific vmcs/vmcb bits */
331 if ( hvm_funcs.load_cpu_ctxt(v, &ctxt) < 0 )
332 return -EINVAL;
334 /* Other vcpu register state */
335 memcpy(&vc->fpu_ctxt, ctxt.fpu_regs, sizeof(ctxt.fpu_regs));
336 vc->user_regs.eax = ctxt.rax;
337 vc->user_regs.ebx = ctxt.rbx;
338 vc->user_regs.ecx = ctxt.rcx;
339 vc->user_regs.edx = ctxt.rdx;
340 vc->user_regs.ebp = ctxt.rbp;
341 vc->user_regs.esi = ctxt.rsi;
342 vc->user_regs.edi = ctxt.rdi;
343 vc->user_regs.esp = ctxt.rsp;
344 #ifdef __x86_64__
345 vc->user_regs.r8 = ctxt.r8;
346 vc->user_regs.r9 = ctxt.r9;
347 vc->user_regs.r10 = ctxt.r10;
348 vc->user_regs.r11 = ctxt.r11;
349 vc->user_regs.r12 = ctxt.r12;
350 vc->user_regs.r13 = ctxt.r13;
351 vc->user_regs.r14 = ctxt.r14;
352 vc->user_regs.r15 = ctxt.r15;
353 #endif
354 vc->debugreg[0] = ctxt.dr0;
355 vc->debugreg[1] = ctxt.dr1;
356 vc->debugreg[2] = ctxt.dr2;
357 vc->debugreg[3] = ctxt.dr3;
358 vc->debugreg[6] = ctxt.dr6;
359 vc->debugreg[7] = ctxt.dr7;
361 vc->flags = VGCF_online;
362 v->fpu_initialised = 1;
364 /* Auxiliary processors should be woken immediately. */
365 if ( test_and_clear_bit(_VPF_down, &v->pause_flags) )
366 vcpu_wake(v);
368 return 0;
369 }
371 HVM_REGISTER_SAVE_RESTORE(CPU, hvm_save_cpu_ctxt, hvm_load_cpu_ctxt,
372 1, HVMSR_PER_VCPU);
374 int hvm_vcpu_initialise(struct vcpu *v)
375 {
376 int rc;
378 if ( (rc = vlapic_init(v)) != 0 )
379 return rc;
381 if ( (rc = hvm_funcs.vcpu_initialise(v)) != 0 )
382 {
383 vlapic_destroy(v);
384 return rc;
385 }
387 /* Create ioreq event channel. */
388 rc = alloc_unbound_xen_event_channel(v, 0);
389 if ( rc < 0 )
390 {
391 hvm_funcs.vcpu_destroy(v);
392 vlapic_destroy(v);
393 return rc;
394 }
396 /* Register ioreq event channel. */
397 v->arch.hvm_vcpu.xen_port = rc;
398 spin_lock(&v->domain->arch.hvm_domain.ioreq.lock);
399 if ( v->domain->arch.hvm_domain.ioreq.va != NULL )
400 get_ioreq(v)->vp_eport = v->arch.hvm_vcpu.xen_port;
401 spin_unlock(&v->domain->arch.hvm_domain.ioreq.lock);
403 spin_lock_init(&v->arch.hvm_vcpu.tm_lock);
404 INIT_LIST_HEAD(&v->arch.hvm_vcpu.tm_list);
406 if ( v->vcpu_id == 0 )
407 {
408 /* NB. All these really belong in hvm_domain_initialise(). */
409 pit_init(v, cpu_khz);
410 rtc_init(v, RTC_PORT(0));
411 pmtimer_init(v);
412 hpet_init(v);
414 /* Init guest TSC to start from zero. */
415 hvm_set_guest_time(v, 0);
416 }
418 return 0;
419 }
421 void hvm_vcpu_destroy(struct vcpu *v)
422 {
423 vlapic_destroy(v);
424 hvm_funcs.vcpu_destroy(v);
426 /* Event channel is already freed by evtchn_destroy(). */
427 /*free_xen_event_channel(v, v->arch.hvm_vcpu.xen_port);*/
428 }
431 void hvm_vcpu_reset(struct vcpu *v)
432 {
433 vcpu_pause(v);
435 vlapic_reset(vcpu_vlapic(v));
437 hvm_funcs.vcpu_initialise(v);
439 set_bit(_VPF_down, &v->pause_flags);
440 clear_bit(_VPF_blocked, &v->pause_flags);
441 v->fpu_initialised = 0;
442 v->fpu_dirtied = 0;
443 v->is_initialised = 0;
445 vcpu_unpause(v);
446 }
448 static void hvm_vcpu_down(void)
449 {
450 struct vcpu *v = current;
451 struct domain *d = v->domain;
452 int online_count = 0;
454 gdprintk(XENLOG_INFO, "DOM%d/VCPU%d: going offline.\n",
455 d->domain_id, v->vcpu_id);
457 /* Doesn't halt us immediately, but we'll never return to guest context. */
458 set_bit(_VPF_down, &v->pause_flags);
459 vcpu_sleep_nosync(v);
461 /* Any other VCPUs online? ... */
462 LOCK_BIGLOCK(d);
463 for_each_vcpu ( d, v )
464 if ( !test_bit(_VPF_down, &v->pause_flags) )
465 online_count++;
466 UNLOCK_BIGLOCK(d);
468 /* ... Shut down the domain if not. */
469 if ( online_count == 0 )
470 {
471 gdprintk(XENLOG_INFO, "DOM%d: all CPUs offline -- powering off.\n",
472 d->domain_id);
473 domain_shutdown(d, SHUTDOWN_poweroff);
474 }
475 }
477 void hvm_send_assist_req(struct vcpu *v)
478 {
479 ioreq_t *p;
481 if ( unlikely(!vcpu_start_shutdown_deferral(v)) )
482 return; /* implicitly bins the i/o operation */
484 p = &get_ioreq(v)->vp_ioreq;
485 if ( unlikely(p->state != STATE_IOREQ_NONE) )
486 {
487 /* This indicates a bug in the device model. Crash the domain. */
488 gdprintk(XENLOG_ERR, "Device model set bad IO state %d.\n", p->state);
489 domain_crash_synchronous();
490 }
492 prepare_wait_on_xen_event_channel(v->arch.hvm_vcpu.xen_port);
494 /*
495 * Following happens /after/ blocking and setting up ioreq contents.
496 * prepare_wait_on_xen_event_channel() is an implicit barrier.
497 */
498 p->state = STATE_IOREQ_READY;
499 notify_via_xen_event_channel(v->arch.hvm_vcpu.xen_port);
500 }
502 void hvm_hlt(unsigned long rflags)
503 {
504 /*
505 * If we halt with interrupts disabled, that's a pretty sure sign that we
506 * want to shut down. In a real processor, NMIs are the only way to break
507 * out of this.
508 */
509 if ( unlikely(!(rflags & X86_EFLAGS_IF)) )
510 return hvm_vcpu_down();
512 do_sched_op_compat(SCHEDOP_block, 0);
513 }
515 void hvm_triple_fault(void)
516 {
517 struct vcpu *v = current;
518 gdprintk(XENLOG_INFO, "Triple fault on VCPU%d - "
519 "invoking HVM system reset.\n", v->vcpu_id);
520 domain_shutdown(v->domain, SHUTDOWN_reboot);
521 }
523 int hvm_set_cr3(unsigned long value)
524 {
525 unsigned long old_base_mfn, mfn;
526 struct vcpu *v = current;
528 if ( paging_mode_hap(v->domain) || !hvm_paging_enabled(v) )
529 {
530 /* Nothing to do. */
531 }
532 else if ( value == v->arch.hvm_vcpu.guest_cr[3] )
533 {
534 /* Shadow-mode TLB flush. Invalidate the shadow. */
535 mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
536 if ( mfn != pagetable_get_pfn(v->arch.guest_table) )
537 goto bad_cr3;
538 }
539 else
540 {
541 /* Shadow-mode CR3 change. Check PDBR and then make a new shadow. */
542 HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value);
543 mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
544 if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) )
545 goto bad_cr3;
547 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
548 v->arch.guest_table = pagetable_from_pfn(mfn);
550 if ( old_base_mfn )
551 put_page(mfn_to_page(old_base_mfn));
553 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx", value);
554 }
556 v->arch.hvm_vcpu.guest_cr[3] = value;
557 paging_update_cr3(v);
558 return 1;
560 bad_cr3:
561 gdprintk(XENLOG_ERR, "Invalid CR3\n");
562 domain_crash(v->domain);
563 return 0;
564 }
566 int hvm_set_cr4(unsigned long value)
567 {
568 struct vcpu *v = current;
569 unsigned long old_cr;
571 if ( value & HVM_CR4_GUEST_RESERVED_BITS )
572 {
573 HVM_DBG_LOG(DBG_LEVEL_1,
574 "Guest attempts to set reserved bit in CR4: %lx",
575 value);
576 goto gpf;
577 }
579 if ( !(value & X86_CR4_PAE) && hvm_long_mode_enabled(v) )
580 {
581 HVM_DBG_LOG(DBG_LEVEL_1, "Guest cleared CR4.PAE while "
582 "EFER.LMA is set");
583 goto gpf;
584 }
586 old_cr = v->arch.hvm_vcpu.guest_cr[4];
587 v->arch.hvm_vcpu.guest_cr[4] = value;
588 v->arch.hvm_vcpu.hw_cr[4] = HVM_CR4_HOST_MASK;
589 if ( paging_mode_hap(v->domain) )
590 v->arch.hvm_vcpu.hw_cr[4] &= ~X86_CR4_PAE;
591 v->arch.hvm_vcpu.hw_cr[4] |= value;
592 hvm_update_guest_cr(v, 4);
594 /* Modifying CR4.{PSE,PAE,PGE} invalidates all TLB entries, inc. Global. */
595 if ( (old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE) )
596 paging_update_paging_modes(v);
598 return 1;
600 gpf:
601 hvm_inject_exception(TRAP_gp_fault, 0, 0);
602 return 0;
603 }
605 /*
606 * __hvm_copy():
607 * @buf = hypervisor buffer
608 * @addr = guest address to copy to/from
609 * @size = number of bytes to copy
610 * @dir = copy *to* guest (TRUE) or *from* guest (FALSE)?
611 * @virt = addr is *virtual* (TRUE) or *guest physical* (FALSE)?
612 * Returns number of bytes failed to copy (0 == complete success).
613 */
614 static int __hvm_copy(void *buf, paddr_t addr, int size, int dir, int virt)
615 {
616 unsigned long gfn, mfn;
617 char *p;
618 int count, todo;
620 todo = size;
621 while ( todo > 0 )
622 {
623 count = min_t(int, PAGE_SIZE - (addr & ~PAGE_MASK), todo);
625 if ( virt )
626 gfn = paging_gva_to_gfn(current, addr);
627 else
628 gfn = addr >> PAGE_SHIFT;
630 mfn = get_mfn_from_gpfn(gfn);
632 if ( mfn == INVALID_MFN )
633 return todo;
635 p = (char *)map_domain_page(mfn) + (addr & ~PAGE_MASK);
637 if ( dir )
638 {
639 memcpy(p, buf, count); /* dir == TRUE: *to* guest */
640 paging_mark_dirty(current->domain, mfn);
641 }
642 else
643 memcpy(buf, p, count); /* dir == FALSE: *from guest */
645 unmap_domain_page(p);
647 addr += count;
648 buf += count;
649 todo -= count;
650 }
652 return 0;
653 }
655 int hvm_copy_to_guest_phys(paddr_t paddr, void *buf, int size)
656 {
657 return __hvm_copy(buf, paddr, size, 1, 0);
658 }
660 int hvm_copy_from_guest_phys(void *buf, paddr_t paddr, int size)
661 {
662 return __hvm_copy(buf, paddr, size, 0, 0);
663 }
665 int hvm_copy_to_guest_virt(unsigned long vaddr, void *buf, int size)
666 {
667 return __hvm_copy(buf, vaddr, size, 1, 1);
668 }
670 int hvm_copy_from_guest_virt(void *buf, unsigned long vaddr, int size)
671 {
672 return __hvm_copy(buf, vaddr, size, 0, 1);
673 }
676 /* HVM specific printbuf. Mostly used for hvmloader chit-chat. */
677 void hvm_print_line(struct vcpu *v, const char c)
678 {
679 struct hvm_domain *hd = &v->domain->arch.hvm_domain;
681 spin_lock(&hd->pbuf_lock);
682 hd->pbuf[hd->pbuf_idx++] = c;
683 if ( (hd->pbuf_idx == (sizeof(hd->pbuf) - 2)) || (c == '\n') )
684 {
685 if ( c != '\n' )
686 hd->pbuf[hd->pbuf_idx++] = '\n';
687 hd->pbuf[hd->pbuf_idx] = '\0';
688 printk(XENLOG_G_DEBUG "HVM%u: %s", v->domain->domain_id, hd->pbuf);
689 hd->pbuf_idx = 0;
690 }
691 spin_unlock(&hd->pbuf_lock);
692 }
694 void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
695 unsigned int *ecx, unsigned int *edx)
696 {
697 if ( !cpuid_hypervisor_leaves(input, eax, ebx, ecx, edx) )
698 {
699 cpuid(input, eax, ebx, ecx, edx);
701 if ( input == 0x00000001 )
702 {
703 struct vcpu *v = current;
705 clear_bit(X86_FEATURE_MWAIT & 31, ecx);
707 if ( vlapic_hw_disabled(vcpu_vlapic(v)) )
708 clear_bit(X86_FEATURE_APIC & 31, edx);
710 #if CONFIG_PAGING_LEVELS >= 3
711 if ( !v->domain->arch.hvm_domain.params[HVM_PARAM_PAE_ENABLED] )
712 #endif
713 clear_bit(X86_FEATURE_PAE & 31, edx);
714 clear_bit(X86_FEATURE_PSE36 & 31, edx);
715 }
716 else if ( input == 0x80000001 )
717 {
718 #if CONFIG_PAGING_LEVELS >= 3
719 struct vcpu *v = current;
720 if ( !v->domain->arch.hvm_domain.params[HVM_PARAM_PAE_ENABLED] )
721 #endif
722 clear_bit(X86_FEATURE_NX & 31, edx);
723 #ifdef __i386__
724 /* Mask feature for Intel ia32e or AMD long mode. */
725 clear_bit(X86_FEATURE_LAHF_LM & 31, ecx);
727 clear_bit(X86_FEATURE_LM & 31, edx);
728 clear_bit(X86_FEATURE_SYSCALL & 31, edx);
729 #endif
730 }
731 }
732 }
734 static long hvm_grant_table_op(
735 unsigned int cmd, XEN_GUEST_HANDLE(void) uop, unsigned int count)
736 {
737 if ( cmd != GNTTABOP_query_size )
738 return -ENOSYS; /* all other commands need auditing */
739 return do_grant_table_op(cmd, uop, count);
740 }
742 typedef unsigned long hvm_hypercall_t(
743 unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
745 #define HYPERCALL(x) \
746 [ __HYPERVISOR_ ## x ] = (hvm_hypercall_t *) do_ ## x
748 #if defined(__i386__)
750 static hvm_hypercall_t *hvm_hypercall32_table[NR_hypercalls] = {
751 HYPERCALL(memory_op),
752 [ __HYPERVISOR_grant_table_op ] = (hvm_hypercall_t *)hvm_grant_table_op,
753 HYPERCALL(xen_version),
754 HYPERCALL(grant_table_op),
755 HYPERCALL(event_channel_op),
756 HYPERCALL(sched_op),
757 HYPERCALL(hvm_op)
758 };
760 #else /* defined(__x86_64__) */
762 static long do_memory_op_compat32(int cmd, XEN_GUEST_HANDLE(void) arg)
763 {
764 extern long do_add_to_physmap(struct xen_add_to_physmap *xatp);
765 long rc;
767 switch ( cmd )
768 {
769 case XENMEM_add_to_physmap:
770 {
771 struct {
772 domid_t domid;
773 uint32_t space;
774 uint32_t idx;
775 uint32_t gpfn;
776 } u;
777 struct xen_add_to_physmap h;
779 if ( copy_from_guest(&u, arg, 1) )
780 return -EFAULT;
782 h.domid = u.domid;
783 h.space = u.space;
784 h.idx = u.idx;
785 h.gpfn = u.gpfn;
787 this_cpu(guest_handles_in_xen_space) = 1;
788 rc = do_memory_op(cmd, guest_handle_from_ptr(&h, void));
789 this_cpu(guest_handles_in_xen_space) = 0;
791 break;
792 }
794 default:
795 gdprintk(XENLOG_WARNING, "memory_op %d.\n", cmd);
796 rc = -ENOSYS;
797 break;
798 }
800 return rc;
801 }
803 static hvm_hypercall_t *hvm_hypercall64_table[NR_hypercalls] = {
804 HYPERCALL(memory_op),
805 [ __HYPERVISOR_grant_table_op ] = (hvm_hypercall_t *)hvm_grant_table_op,
806 HYPERCALL(xen_version),
807 HYPERCALL(grant_table_op),
808 HYPERCALL(event_channel_op),
809 HYPERCALL(sched_op),
810 HYPERCALL(hvm_op)
811 };
813 static hvm_hypercall_t *hvm_hypercall32_table[NR_hypercalls] = {
814 [ __HYPERVISOR_memory_op ] = (hvm_hypercall_t *)do_memory_op_compat32,
815 [ __HYPERVISOR_grant_table_op ] = (hvm_hypercall_t *)hvm_grant_table_op,
816 HYPERCALL(xen_version),
817 HYPERCALL(grant_table_op),
818 HYPERCALL(event_channel_op),
819 HYPERCALL(sched_op),
820 HYPERCALL(hvm_op)
821 };
823 #endif /* defined(__x86_64__) */
825 int hvm_do_hypercall(struct cpu_user_regs *regs)
826 {
827 int flush, mode = hvm_guest_x86_mode(current);
828 uint32_t eax = regs->eax;
830 switch ( mode )
831 {
832 #ifdef __x86_64__
833 case 8:
834 #endif
835 case 4:
836 case 2:
837 hvm_store_cpu_guest_regs(current, regs, NULL);
838 if ( unlikely(ring_3(regs)) )
839 {
840 default:
841 regs->eax = -EPERM;
842 return HVM_HCALL_completed;
843 }
844 case 0:
845 break;
846 }
848 if ( (eax >= NR_hypercalls) || !hvm_hypercall32_table[eax] )
849 {
850 regs->eax = -ENOSYS;
851 return HVM_HCALL_completed;
852 }
854 /*
855 * NB. In future flush only on decrease_reservation.
856 * For now we also need to flush when pages are added, as qemu-dm is not
857 * yet capable of faulting pages into an existing valid mapcache bucket.
858 */
859 flush = ((eax == __HYPERVISOR_memory_op) ||
860 (eax == __HYPERVISOR_grant_table_op)); /* needed ? */
861 this_cpu(hc_preempted) = 0;
863 #ifdef __x86_64__
864 if ( mode == 8 )
865 {
866 HVM_DBG_LOG(DBG_LEVEL_HCALL, "hcall%u(%lx, %lx, %lx, %lx, %lx)", eax,
867 regs->rdi, regs->rsi, regs->rdx, regs->r10, regs->r8);
869 regs->rax = hvm_hypercall64_table[eax](regs->rdi,
870 regs->rsi,
871 regs->rdx,
872 regs->r10,
873 regs->r8);
874 }
875 else
876 #endif
877 {
878 HVM_DBG_LOG(DBG_LEVEL_HCALL, "hcall%u(%x, %x, %x, %x, %x)", eax,
879 (uint32_t)regs->ebx, (uint32_t)regs->ecx,
880 (uint32_t)regs->edx, (uint32_t)regs->esi,
881 (uint32_t)regs->edi);
883 regs->eax = hvm_hypercall32_table[eax]((uint32_t)regs->ebx,
884 (uint32_t)regs->ecx,
885 (uint32_t)regs->edx,
886 (uint32_t)regs->esi,
887 (uint32_t)regs->edi);
888 }
890 HVM_DBG_LOG(DBG_LEVEL_HCALL, "hcall%u -> %lx",
891 eax, (unsigned long)regs->eax);
893 return (this_cpu(hc_preempted) ? HVM_HCALL_preempted :
894 flush ? HVM_HCALL_invalidate : HVM_HCALL_completed);
895 }
897 static void hvm_latch_shinfo_size(struct domain *d)
898 {
899 /*
900 * Called from operations which are among the very first executed by
901 * PV drivers on initialisation or after save/restore. These are sensible
902 * points at which to sample the execution mode of the guest and latch
903 * 32- or 64-bit format for shared state.
904 */
905 if ( current->domain == d )
906 d->arch.has_32bit_shinfo = (hvm_guest_x86_mode(current) != 8);
907 }
909 /* Initialise a hypercall transfer page for a VMX domain using
910 paravirtualised drivers. */
911 void hvm_hypercall_page_initialise(struct domain *d,
912 void *hypercall_page)
913 {
914 hvm_latch_shinfo_size(d);
915 hvm_funcs.init_hypercall_page(d, hypercall_page);
916 }
919 /*
920 * only called in HVM domain BSP context
921 * when booting, vcpuid is always equal to apic_id
922 */
923 int hvm_bringup_ap(int vcpuid, int trampoline_vector)
924 {
925 struct vcpu *v;
926 struct domain *d = current->domain;
927 struct vcpu_guest_context *ctxt;
928 int rc = 0;
930 BUG_ON(!is_hvm_domain(d));
932 if ( (v = d->vcpu[vcpuid]) == NULL )
933 return -ENOENT;
935 if ( (ctxt = xmalloc(struct vcpu_guest_context)) == NULL )
936 {
937 gdprintk(XENLOG_ERR,
938 "Failed to allocate memory in hvm_bringup_ap.\n");
939 return -ENOMEM;
940 }
942 hvm_init_ap_context(ctxt, vcpuid, trampoline_vector);
944 /* Sync AP's TSC with BSP's. */
945 v->arch.hvm_vcpu.cache_tsc_offset =
946 v->domain->vcpu[0]->arch.hvm_vcpu.cache_tsc_offset;
947 hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset);
949 LOCK_BIGLOCK(d);
950 rc = -EEXIST;
951 if ( !v->is_initialised )
952 rc = boot_vcpu(d, vcpuid, ctxt);
953 UNLOCK_BIGLOCK(d);
955 if ( rc != 0 )
956 {
957 gdprintk(XENLOG_ERR,
958 "AP %d bringup failed in boot_vcpu %x.\n", vcpuid, rc);
959 goto out;
960 }
962 if ( test_and_clear_bit(_VPF_down, &v->pause_flags) )
963 vcpu_wake(v);
964 gdprintk(XENLOG_INFO, "AP %d bringup suceeded.\n", vcpuid);
966 out:
967 xfree(ctxt);
968 return rc;
969 }
971 static int hvmop_set_pci_intx_level(
972 XEN_GUEST_HANDLE(xen_hvm_set_pci_intx_level_t) uop)
973 {
974 struct xen_hvm_set_pci_intx_level op;
975 struct domain *d;
976 int rc;
978 if ( copy_from_guest(&op, uop, 1) )
979 return -EFAULT;
981 if ( !IS_PRIV(current->domain) )
982 return -EPERM;
984 if ( (op.domain > 0) || (op.bus > 0) || (op.device > 31) || (op.intx > 3) )
985 return -EINVAL;
987 d = rcu_lock_domain_by_id(op.domid);
988 if ( d == NULL )
989 return -ESRCH;
991 rc = -EINVAL;
992 if ( !is_hvm_domain(d) )
993 goto out;
995 rc = 0;
996 switch ( op.level )
997 {
998 case 0:
999 hvm_pci_intx_deassert(d, op.device, op.intx);
1000 break;
1001 case 1:
1002 hvm_pci_intx_assert(d, op.device, op.intx);
1003 break;
1004 default:
1005 rc = -EINVAL;
1006 break;
1009 out:
1010 rcu_unlock_domain(d);
1011 return rc;
1014 static int hvmop_set_isa_irq_level(
1015 XEN_GUEST_HANDLE(xen_hvm_set_isa_irq_level_t) uop)
1017 struct xen_hvm_set_isa_irq_level op;
1018 struct domain *d;
1019 int rc;
1021 if ( copy_from_guest(&op, uop, 1) )
1022 return -EFAULT;
1024 if ( !IS_PRIV(current->domain) )
1025 return -EPERM;
1027 if ( op.isa_irq > 15 )
1028 return -EINVAL;
1030 d = rcu_lock_domain_by_id(op.domid);
1031 if ( d == NULL )
1032 return -ESRCH;
1034 rc = -EINVAL;
1035 if ( !is_hvm_domain(d) )
1036 goto out;
1038 rc = 0;
1039 switch ( op.level )
1041 case 0:
1042 hvm_isa_irq_deassert(d, op.isa_irq);
1043 break;
1044 case 1:
1045 hvm_isa_irq_assert(d, op.isa_irq);
1046 break;
1047 default:
1048 rc = -EINVAL;
1049 break;
1052 out:
1053 rcu_unlock_domain(d);
1054 return rc;
1057 static int hvmop_set_pci_link_route(
1058 XEN_GUEST_HANDLE(xen_hvm_set_pci_link_route_t) uop)
1060 struct xen_hvm_set_pci_link_route op;
1061 struct domain *d;
1062 int rc;
1064 if ( copy_from_guest(&op, uop, 1) )
1065 return -EFAULT;
1067 if ( !IS_PRIV(current->domain) )
1068 return -EPERM;
1070 if ( (op.link > 3) || (op.isa_irq > 15) )
1071 return -EINVAL;
1073 d = rcu_lock_domain_by_id(op.domid);
1074 if ( d == NULL )
1075 return -ESRCH;
1077 rc = -EINVAL;
1078 if ( !is_hvm_domain(d) )
1079 goto out;
1081 rc = 0;
1082 hvm_set_pci_link_route(d, op.link, op.isa_irq);
1084 out:
1085 rcu_unlock_domain(d);
1086 return rc;
1089 long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE(void) arg)
1092 long rc = 0;
1094 switch ( op )
1096 case HVMOP_set_param:
1097 case HVMOP_get_param:
1099 struct xen_hvm_param a;
1100 struct hvm_ioreq_page *iorp;
1101 struct domain *d;
1102 struct vcpu *v;
1104 if ( copy_from_guest(&a, arg, 1) )
1105 return -EFAULT;
1107 if ( a.index >= HVM_NR_PARAMS )
1108 return -EINVAL;
1110 if ( a.domid == DOMID_SELF )
1111 d = rcu_lock_current_domain();
1112 else if ( IS_PRIV(current->domain) )
1113 d = rcu_lock_domain_by_id(a.domid);
1114 else
1115 return -EPERM;
1117 if ( d == NULL )
1118 return -ESRCH;
1120 rc = -EINVAL;
1121 if ( !is_hvm_domain(d) )
1122 goto param_fail;
1124 if ( op == HVMOP_set_param )
1126 switch ( a.index )
1128 case HVM_PARAM_IOREQ_PFN:
1129 iorp = &d->arch.hvm_domain.ioreq;
1130 rc = hvm_set_ioreq_page(d, iorp, a.value);
1131 spin_lock(&iorp->lock);
1132 if ( (rc == 0) && (iorp->va != NULL) )
1133 /* Initialise evtchn port info if VCPUs already created. */
1134 for_each_vcpu ( d, v )
1135 get_ioreq(v)->vp_eport = v->arch.hvm_vcpu.xen_port;
1136 spin_unlock(&iorp->lock);
1137 break;
1138 case HVM_PARAM_BUFIOREQ_PFN:
1139 iorp = &d->arch.hvm_domain.buf_ioreq;
1140 rc = hvm_set_ioreq_page(d, iorp, a.value);
1141 break;
1142 case HVM_PARAM_CALLBACK_IRQ:
1143 hvm_set_callback_via(d, a.value);
1144 hvm_latch_shinfo_size(d);
1145 break;
1147 d->arch.hvm_domain.params[a.index] = a.value;
1148 rc = 0;
1150 else
1152 a.value = d->arch.hvm_domain.params[a.index];
1153 rc = copy_to_guest(arg, &a, 1) ? -EFAULT : 0;
1156 HVM_DBG_LOG(DBG_LEVEL_HCALL, "%s param %u = %"PRIx64,
1157 op == HVMOP_set_param ? "set" : "get",
1158 a.index, a.value);
1160 param_fail:
1161 rcu_unlock_domain(d);
1162 break;
1165 case HVMOP_set_pci_intx_level:
1166 rc = hvmop_set_pci_intx_level(
1167 guest_handle_cast(arg, xen_hvm_set_pci_intx_level_t));
1168 break;
1170 case HVMOP_set_isa_irq_level:
1171 rc = hvmop_set_isa_irq_level(
1172 guest_handle_cast(arg, xen_hvm_set_isa_irq_level_t));
1173 break;
1175 case HVMOP_set_pci_link_route:
1176 rc = hvmop_set_pci_link_route(
1177 guest_handle_cast(arg, xen_hvm_set_pci_link_route_t));
1178 break;
1180 default:
1182 gdprintk(XENLOG_WARNING, "Bad HVM op %ld.\n", op);
1183 rc = -ENOSYS;
1184 break;
1188 return rc;
1191 /*
1192 * Local variables:
1193 * mode: C
1194 * c-set-style: "BSD"
1195 * c-basic-offset: 4
1196 * tab-width: 4
1197 * indent-tabs-mode: nil
1198 * End:
1199 */