ia64/xen-unstable

view xen/arch/x86/hvm/hvm.c @ 14920:74b712492dba

hvm: Choose correct shared_info format for 32-bit PV drivers on 64-bit Xen.
Signed-off-by: K. Y. Srinivasan <ksrinivasan@novell.com>
Signed-off-by: Keir Fraser <keir@xensource.com>
author Keir Fraser <keir@xensource.com>
date Tue Apr 24 22:00:45 2007 +0100 (2007-04-24)
parents 52474e2df628
children 405573aedd24
line source
1 /*
2 * hvm.c: Common hardware virtual machine abstractions.
3 *
4 * Copyright (c) 2004, Intel Corporation.
5 * Copyright (c) 2005, International Business Machines Corporation.
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms and conditions of the GNU General Public License,
9 * version 2, as published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 * more details.
15 *
16 * You should have received a copy of the GNU General Public License along with
17 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
18 * Place - Suite 330, Boston, MA 02111-1307 USA.
19 */
21 #include <xen/config.h>
22 #include <xen/init.h>
23 #include <xen/lib.h>
24 #include <xen/trace.h>
25 #include <xen/sched.h>
26 #include <xen/irq.h>
27 #include <xen/softirq.h>
28 #include <xen/domain.h>
29 #include <xen/domain_page.h>
30 #include <xen/hypercall.h>
31 #include <xen/guest_access.h>
32 #include <xen/event.h>
33 #include <asm/current.h>
34 #include <asm/e820.h>
35 #include <asm/io.h>
36 #include <asm/paging.h>
37 #include <asm/regs.h>
38 #include <asm/cpufeature.h>
39 #include <asm/processor.h>
40 #include <asm/types.h>
41 #include <asm/msr.h>
42 #include <asm/mc146818rtc.h>
43 #include <asm/spinlock.h>
44 #include <asm/hvm/hvm.h>
45 #include <asm/hvm/vpt.h>
46 #include <asm/hvm/support.h>
47 #include <public/sched.h>
48 #include <public/hvm/ioreq.h>
49 #include <public/version.h>
50 #include <public/memory.h>
52 int hvm_enabled __read_mostly;
54 unsigned int opt_hvm_debug_level __read_mostly;
55 integer_param("hvm_debug", opt_hvm_debug_level);
57 struct hvm_function_table hvm_funcs __read_mostly;
59 /* I/O permission bitmap is globally shared by all HVM guests. */
60 char __attribute__ ((__section__ (".bss.page_aligned")))
61 hvm_io_bitmap[3*PAGE_SIZE];
63 void hvm_enable(struct hvm_function_table *fns)
64 {
65 BUG_ON(hvm_enabled);
66 printk("HVM: %s enabled\n", fns->name);
68 /*
69 * Allow direct access to the PC debug port (it is often used for I/O
70 * delays, but the vmexits simply slow things down).
71 */
72 memset(hvm_io_bitmap, ~0, sizeof(hvm_io_bitmap));
73 clear_bit(0x80, hvm_io_bitmap);
75 hvm_funcs = *fns;
76 hvm_enabled = 1;
77 }
79 void hvm_disable(void)
80 {
81 if ( hvm_enabled )
82 hvm_funcs.disable();
83 }
85 void hvm_stts(struct vcpu *v)
86 {
87 /* FPU state already dirty? Then no need to setup_fpu() lazily. */
88 if ( !v->fpu_dirtied )
89 hvm_funcs.stts(v);
90 }
92 void hvm_set_guest_time(struct vcpu *v, u64 gtime)
93 {
94 u64 host_tsc;
96 rdtscll(host_tsc);
98 v->arch.hvm_vcpu.cache_tsc_offset = gtime - host_tsc;
99 hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset);
100 }
102 u64 hvm_get_guest_time(struct vcpu *v)
103 {
104 u64 host_tsc;
106 rdtscll(host_tsc);
107 return host_tsc + v->arch.hvm_vcpu.cache_tsc_offset;
108 }
110 void hvm_migrate_timers(struct vcpu *v)
111 {
112 pit_migrate_timers(v);
113 rtc_migrate_timers(v);
114 hpet_migrate_timers(v);
115 if ( vcpu_vlapic(v)->pt.enabled )
116 migrate_timer(&vcpu_vlapic(v)->pt.timer, v->processor);
117 }
119 void hvm_do_resume(struct vcpu *v)
120 {
121 ioreq_t *p;
123 hvm_stts(v);
125 pt_thaw_time(v);
127 /* NB. Optimised for common case (p->state == STATE_IOREQ_NONE). */
128 p = &get_ioreq(v)->vp_ioreq;
129 while ( p->state != STATE_IOREQ_NONE )
130 {
131 switch ( p->state )
132 {
133 case STATE_IORESP_READY: /* IORESP_READY -> NONE */
134 hvm_io_assist();
135 break;
136 case STATE_IOREQ_READY: /* IOREQ_{READY,INPROCESS} -> IORESP_READY */
137 case STATE_IOREQ_INPROCESS:
138 wait_on_xen_event_channel(v->arch.hvm_vcpu.xen_port,
139 (p->state != STATE_IOREQ_READY) &&
140 (p->state != STATE_IOREQ_INPROCESS));
141 break;
142 default:
143 gdprintk(XENLOG_ERR, "Weird HVM iorequest state %d.\n", p->state);
144 domain_crash_synchronous();
145 }
146 }
147 }
149 static void hvm_init_ioreq_page(
150 struct domain *d, struct hvm_ioreq_page *iorp)
151 {
152 memset(iorp, 0, sizeof(*iorp));
153 spin_lock_init(&iorp->lock);
154 domain_pause(d);
155 }
157 static void hvm_destroy_ioreq_page(
158 struct domain *d, struct hvm_ioreq_page *iorp)
159 {
160 spin_lock(&iorp->lock);
162 ASSERT(d->is_dying);
164 if ( iorp->va != NULL )
165 {
166 unmap_domain_page_global(iorp->va);
167 put_page_and_type(iorp->page);
168 iorp->va = NULL;
169 }
171 spin_unlock(&iorp->lock);
172 }
174 static int hvm_set_ioreq_page(
175 struct domain *d, struct hvm_ioreq_page *iorp, unsigned long gmfn)
176 {
177 struct page_info *page;
178 unsigned long mfn;
179 void *va;
181 mfn = gmfn_to_mfn(d, gmfn);
182 if ( !mfn_valid(mfn) )
183 return -EINVAL;
185 page = mfn_to_page(mfn);
186 if ( !get_page_and_type(page, d, PGT_writable_page) )
187 return -EINVAL;
189 va = map_domain_page_global(mfn);
190 if ( va == NULL )
191 {
192 put_page_and_type(page);
193 return -ENOMEM;
194 }
196 spin_lock(&iorp->lock);
198 if ( (iorp->va != NULL) || d->is_dying )
199 {
200 spin_unlock(&iorp->lock);
201 unmap_domain_page_global(va);
202 put_page_and_type(mfn_to_page(mfn));
203 return -EINVAL;
204 }
206 iorp->va = va;
207 iorp->page = page;
209 spin_unlock(&iorp->lock);
211 domain_unpause(d);
213 return 0;
214 }
216 int hvm_domain_initialise(struct domain *d)
217 {
218 int rc;
220 if ( !hvm_enabled )
221 {
222 gdprintk(XENLOG_WARNING, "Attempt to create a HVM guest "
223 "on a non-VT/AMDV platform.\n");
224 return -EINVAL;
225 }
227 spin_lock_init(&d->arch.hvm_domain.pbuf_lock);
228 spin_lock_init(&d->arch.hvm_domain.irq_lock);
230 rc = paging_enable(d, PG_refcounts|PG_translate|PG_external);
231 if ( rc != 0 )
232 return rc;
234 vpic_init(d);
235 vioapic_init(d);
237 hvm_init_ioreq_page(d, &d->arch.hvm_domain.ioreq);
238 hvm_init_ioreq_page(d, &d->arch.hvm_domain.buf_ioreq);
240 return 0;
241 }
243 void hvm_domain_relinquish_resources(struct domain *d)
244 {
245 hvm_destroy_ioreq_page(d, &d->arch.hvm_domain.ioreq);
246 hvm_destroy_ioreq_page(d, &d->arch.hvm_domain.buf_ioreq);
247 }
249 void hvm_domain_destroy(struct domain *d)
250 {
251 pit_deinit(d);
252 rtc_deinit(d);
253 pmtimer_deinit(d);
254 hpet_deinit(d);
255 }
257 static int hvm_save_cpu_ctxt(struct domain *d, hvm_domain_context_t *h)
258 {
259 struct vcpu *v;
260 struct hvm_hw_cpu ctxt;
261 struct vcpu_guest_context *vc;
263 for_each_vcpu(d, v)
264 {
265 /* We don't need to save state for a vcpu that is down; the restore
266 * code will leave it down if there is nothing saved. */
267 if ( test_bit(_VPF_down, &v->pause_flags) )
268 continue;
270 /* Architecture-specific vmcs/vmcb bits */
271 hvm_funcs.save_cpu_ctxt(v, &ctxt);
273 /* Other vcpu register state */
274 vc = &v->arch.guest_context;
275 if ( vc->flags & VGCF_i387_valid )
276 memcpy(ctxt.fpu_regs, &vc->fpu_ctxt, sizeof(ctxt.fpu_regs));
277 else
278 memset(ctxt.fpu_regs, 0, sizeof(ctxt.fpu_regs));
279 ctxt.rax = vc->user_regs.eax;
280 ctxt.rbx = vc->user_regs.ebx;
281 ctxt.rcx = vc->user_regs.ecx;
282 ctxt.rdx = vc->user_regs.edx;
283 ctxt.rbp = vc->user_regs.ebp;
284 ctxt.rsi = vc->user_regs.esi;
285 ctxt.rdi = vc->user_regs.edi;
286 /* %rsp handled by arch-specific call above */
287 #ifdef __x86_64__
288 ctxt.r8 = vc->user_regs.r8;
289 ctxt.r9 = vc->user_regs.r9;
290 ctxt.r10 = vc->user_regs.r10;
291 ctxt.r11 = vc->user_regs.r11;
292 ctxt.r12 = vc->user_regs.r12;
293 ctxt.r13 = vc->user_regs.r13;
294 ctxt.r14 = vc->user_regs.r14;
295 ctxt.r15 = vc->user_regs.r15;
296 #endif
297 ctxt.dr0 = vc->debugreg[0];
298 ctxt.dr1 = vc->debugreg[1];
299 ctxt.dr2 = vc->debugreg[2];
300 ctxt.dr3 = vc->debugreg[3];
301 ctxt.dr6 = vc->debugreg[6];
302 ctxt.dr7 = vc->debugreg[7];
304 if ( hvm_save_entry(CPU, v->vcpu_id, h, &ctxt) != 0 )
305 return 1;
306 }
307 return 0;
308 }
310 static int hvm_load_cpu_ctxt(struct domain *d, hvm_domain_context_t *h)
311 {
312 int vcpuid, rc;
313 struct vcpu *v;
314 struct hvm_hw_cpu ctxt;
315 struct vcpu_guest_context *vc;
317 /* Which vcpu is this? */
318 vcpuid = hvm_load_instance(h);
319 if ( vcpuid > MAX_VIRT_CPUS || (v = d->vcpu[vcpuid]) == NULL )
320 {
321 gdprintk(XENLOG_ERR, "HVM restore: domain has no vcpu %u\n", vcpuid);
322 return -EINVAL;
323 }
324 vc = &v->arch.guest_context;
326 /* Need to init this vcpu before loading its contents */
327 LOCK_BIGLOCK(d);
328 if ( !v->is_initialised )
329 if ( (rc = boot_vcpu(d, vcpuid, vc)) != 0 )
330 return rc;
331 UNLOCK_BIGLOCK(d);
333 if ( hvm_load_entry(CPU, h, &ctxt) != 0 )
334 return -EINVAL;
336 /* Architecture-specific vmcs/vmcb bits */
337 if ( hvm_funcs.load_cpu_ctxt(v, &ctxt) < 0 )
338 return -EINVAL;
340 /* Other vcpu register state */
341 memcpy(&vc->fpu_ctxt, ctxt.fpu_regs, sizeof(ctxt.fpu_regs));
342 vc->user_regs.eax = ctxt.rax;
343 vc->user_regs.ebx = ctxt.rbx;
344 vc->user_regs.ecx = ctxt.rcx;
345 vc->user_regs.edx = ctxt.rdx;
346 vc->user_regs.ebp = ctxt.rbp;
347 vc->user_regs.esi = ctxt.rsi;
348 vc->user_regs.edi = ctxt.rdi;
349 vc->user_regs.esp = ctxt.rsp;
350 #ifdef __x86_64__
351 vc->user_regs.r8 = ctxt.r8;
352 vc->user_regs.r9 = ctxt.r9;
353 vc->user_regs.r10 = ctxt.r10;
354 vc->user_regs.r11 = ctxt.r11;
355 vc->user_regs.r12 = ctxt.r12;
356 vc->user_regs.r13 = ctxt.r13;
357 vc->user_regs.r14 = ctxt.r14;
358 vc->user_regs.r15 = ctxt.r15;
359 #endif
360 vc->debugreg[0] = ctxt.dr0;
361 vc->debugreg[1] = ctxt.dr1;
362 vc->debugreg[2] = ctxt.dr2;
363 vc->debugreg[3] = ctxt.dr3;
364 vc->debugreg[6] = ctxt.dr6;
365 vc->debugreg[7] = ctxt.dr7;
367 vc->flags = VGCF_i387_valid | VGCF_online;
368 v->fpu_initialised = 1;
370 /* Auxiliary processors should be woken immediately. */
371 if ( test_and_clear_bit(_VPF_down, &v->pause_flags) )
372 vcpu_wake(v);
374 return 0;
375 }
377 HVM_REGISTER_SAVE_RESTORE(CPU, hvm_save_cpu_ctxt, hvm_load_cpu_ctxt,
378 1, HVMSR_PER_VCPU);
380 int hvm_vcpu_initialise(struct vcpu *v)
381 {
382 int rc;
384 if ( (rc = vlapic_init(v)) != 0 )
385 return rc;
387 if ( (rc = hvm_funcs.vcpu_initialise(v)) != 0 )
388 {
389 vlapic_destroy(v);
390 return rc;
391 }
393 /* Create ioreq event channel. */
394 rc = alloc_unbound_xen_event_channel(v, 0);
395 if ( rc < 0 )
396 {
397 hvm_funcs.vcpu_destroy(v);
398 vlapic_destroy(v);
399 return rc;
400 }
402 /* Register ioreq event channel. */
403 v->arch.hvm_vcpu.xen_port = rc;
404 spin_lock(&v->domain->arch.hvm_domain.ioreq.lock);
405 if ( v->domain->arch.hvm_domain.ioreq.va != NULL )
406 get_ioreq(v)->vp_eport = v->arch.hvm_vcpu.xen_port;
407 spin_unlock(&v->domain->arch.hvm_domain.ioreq.lock);
409 INIT_LIST_HEAD(&v->arch.hvm_vcpu.tm_list);
411 if ( v->vcpu_id != 0 )
412 return 0;
414 pit_init(v, cpu_khz);
415 rtc_init(v, RTC_PORT(0));
416 pmtimer_init(v);
417 hpet_init(v);
419 /* Init guest TSC to start from zero. */
420 hvm_set_guest_time(v, 0);
422 return 0;
423 }
425 void hvm_vcpu_destroy(struct vcpu *v)
426 {
427 vlapic_destroy(v);
428 hvm_funcs.vcpu_destroy(v);
430 /* Event channel is already freed by evtchn_destroy(). */
431 /*free_xen_event_channel(v, v->arch.hvm_vcpu.xen_port);*/
432 }
435 void hvm_vcpu_reset(struct vcpu *v)
436 {
437 vcpu_pause(v);
439 vlapic_reset(vcpu_vlapic(v));
441 hvm_funcs.vcpu_initialise(v);
443 set_bit(_VPF_down, &v->pause_flags);
444 clear_bit(_VPF_blocked, &v->pause_flags);
445 v->fpu_initialised = 0;
446 v->fpu_dirtied = 0;
447 v->is_initialised = 0;
449 vcpu_unpause(v);
450 }
452 static void hvm_vcpu_down(void)
453 {
454 struct vcpu *v = current;
455 struct domain *d = v->domain;
456 int online_count = 0;
458 gdprintk(XENLOG_INFO, "DOM%d/VCPU%d: going offline.\n",
459 d->domain_id, v->vcpu_id);
461 /* Doesn't halt us immediately, but we'll never return to guest context. */
462 set_bit(_VPF_down, &v->pause_flags);
463 vcpu_sleep_nosync(v);
465 /* Any other VCPUs online? ... */
466 LOCK_BIGLOCK(d);
467 for_each_vcpu ( d, v )
468 if ( !test_bit(_VPF_down, &v->pause_flags) )
469 online_count++;
470 UNLOCK_BIGLOCK(d);
472 /* ... Shut down the domain if not. */
473 if ( online_count == 0 )
474 {
475 gdprintk(XENLOG_INFO, "DOM%d: all CPUs offline -- powering off.\n",
476 d->domain_id);
477 domain_shutdown(d, SHUTDOWN_poweroff);
478 }
479 }
481 void hvm_send_assist_req(struct vcpu *v)
482 {
483 ioreq_t *p;
485 if ( unlikely(!vcpu_start_shutdown_deferral(v)) )
486 return; /* implicitly bins the i/o operation */
488 p = &get_ioreq(v)->vp_ioreq;
489 if ( unlikely(p->state != STATE_IOREQ_NONE) )
490 {
491 /* This indicates a bug in the device model. Crash the domain. */
492 gdprintk(XENLOG_ERR, "Device model set bad IO state %d.\n", p->state);
493 domain_crash_synchronous();
494 }
496 prepare_wait_on_xen_event_channel(v->arch.hvm_vcpu.xen_port);
498 /*
499 * Following happens /after/ blocking and setting up ioreq contents.
500 * prepare_wait_on_xen_event_channel() is an implicit barrier.
501 */
502 p->state = STATE_IOREQ_READY;
503 notify_via_xen_event_channel(v->arch.hvm_vcpu.xen_port);
504 }
506 void hvm_hlt(unsigned long rflags)
507 {
508 /*
509 * If we halt with interrupts disabled, that's a pretty sure sign that we
510 * want to shut down. In a real processor, NMIs are the only way to break
511 * out of this.
512 */
513 if ( unlikely(!(rflags & X86_EFLAGS_IF)) )
514 return hvm_vcpu_down();
516 do_sched_op_compat(SCHEDOP_block, 0);
517 }
519 void hvm_triple_fault(void)
520 {
521 struct vcpu *v = current;
522 gdprintk(XENLOG_INFO, "Triple fault on VCPU%d - "
523 "invoking HVM system reset.\n", v->vcpu_id);
524 domain_shutdown(v->domain, SHUTDOWN_reboot);
525 }
527 /*
528 * __hvm_copy():
529 * @buf = hypervisor buffer
530 * @addr = guest address to copy to/from
531 * @size = number of bytes to copy
532 * @dir = copy *to* guest (TRUE) or *from* guest (FALSE)?
533 * @virt = addr is *virtual* (TRUE) or *guest physical* (FALSE)?
534 * Returns number of bytes failed to copy (0 == complete success).
535 */
536 static int __hvm_copy(void *buf, paddr_t addr, int size, int dir, int virt)
537 {
538 unsigned long gfn, mfn;
539 char *p;
540 int count, todo;
542 todo = size;
543 while ( todo > 0 )
544 {
545 count = min_t(int, PAGE_SIZE - (addr & ~PAGE_MASK), todo);
547 if ( virt )
548 gfn = paging_gva_to_gfn(current, addr);
549 else
550 gfn = addr >> PAGE_SHIFT;
552 mfn = get_mfn_from_gpfn(gfn);
554 if ( mfn == INVALID_MFN )
555 return todo;
557 p = (char *)map_domain_page(mfn) + (addr & ~PAGE_MASK);
559 if ( dir )
560 {
561 memcpy(p, buf, count); /* dir == TRUE: *to* guest */
562 mark_dirty(current->domain, mfn);
563 }
564 else
565 memcpy(buf, p, count); /* dir == FALSE: *from guest */
567 unmap_domain_page(p);
569 addr += count;
570 buf += count;
571 todo -= count;
572 }
574 return 0;
575 }
577 int hvm_copy_to_guest_phys(paddr_t paddr, void *buf, int size)
578 {
579 return __hvm_copy(buf, paddr, size, 1, 0);
580 }
582 int hvm_copy_from_guest_phys(void *buf, paddr_t paddr, int size)
583 {
584 return __hvm_copy(buf, paddr, size, 0, 0);
585 }
587 int hvm_copy_to_guest_virt(unsigned long vaddr, void *buf, int size)
588 {
589 return __hvm_copy(buf, vaddr, size, 1, 1);
590 }
592 int hvm_copy_from_guest_virt(void *buf, unsigned long vaddr, int size)
593 {
594 return __hvm_copy(buf, vaddr, size, 0, 1);
595 }
598 /* HVM specific printbuf. Mostly used for hvmloader chit-chat. */
599 void hvm_print_line(struct vcpu *v, const char c)
600 {
601 struct hvm_domain *hd = &v->domain->arch.hvm_domain;
603 spin_lock(&hd->pbuf_lock);
604 hd->pbuf[hd->pbuf_idx++] = c;
605 if ( (hd->pbuf_idx == (sizeof(hd->pbuf) - 2)) || (c == '\n') )
606 {
607 if ( c != '\n' )
608 hd->pbuf[hd->pbuf_idx++] = '\n';
609 hd->pbuf[hd->pbuf_idx] = '\0';
610 printk(XENLOG_G_DEBUG "HVM%u: %s", v->domain->domain_id, hd->pbuf);
611 hd->pbuf_idx = 0;
612 }
613 spin_unlock(&hd->pbuf_lock);
614 }
616 void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
617 unsigned int *ecx, unsigned int *edx)
618 {
619 if ( !cpuid_hypervisor_leaves(input, eax, ebx, ecx, edx) )
620 {
621 cpuid(input, eax, ebx, ecx, edx);
623 if ( input == 0x00000001 )
624 {
625 struct vcpu *v = current;
627 clear_bit(X86_FEATURE_MWAIT & 31, ecx);
629 if ( vlapic_hw_disabled(vcpu_vlapic(v)) )
630 clear_bit(X86_FEATURE_APIC & 31, edx);
632 #if CONFIG_PAGING_LEVELS >= 3
633 if ( !v->domain->arch.hvm_domain.params[HVM_PARAM_PAE_ENABLED] )
634 #endif
635 clear_bit(X86_FEATURE_PAE & 31, edx);
636 clear_bit(X86_FEATURE_PSE36 & 31, edx);
637 }
638 else if ( input == 0x80000001 )
639 {
640 #if CONFIG_PAGING_LEVELS >= 3
641 struct vcpu *v = current;
642 if ( !v->domain->arch.hvm_domain.params[HVM_PARAM_PAE_ENABLED] )
643 #endif
644 clear_bit(X86_FEATURE_NX & 31, edx);
645 #ifdef __i386__
646 /* Mask feature for Intel ia32e or AMD long mode. */
647 clear_bit(X86_FEATURE_LAHF_LM & 31, ecx);
649 clear_bit(X86_FEATURE_LM & 31, edx);
650 clear_bit(X86_FEATURE_SYSCALL & 31, edx);
651 #endif
652 }
653 }
654 }
656 typedef unsigned long hvm_hypercall_t(
657 unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
659 #define HYPERCALL(x) \
660 [ __HYPERVISOR_ ## x ] = (hvm_hypercall_t *) do_ ## x
661 #define HYPERCALL_COMPAT32(x) \
662 [ __HYPERVISOR_ ## x ] = (hvm_hypercall_t *) do_ ## x ## _compat32
664 #if defined(__i386__)
666 static hvm_hypercall_t *hvm_hypercall_table[NR_hypercalls] = {
667 HYPERCALL(memory_op),
668 HYPERCALL(multicall),
669 HYPERCALL(xen_version),
670 HYPERCALL(event_channel_op),
671 HYPERCALL(sched_op),
672 HYPERCALL(hvm_op)
673 };
675 static void __hvm_do_hypercall(struct cpu_user_regs *pregs)
676 {
677 if ( (pregs->eax >= NR_hypercalls) || !hvm_hypercall_table[pregs->eax] )
678 {
679 if ( pregs->eax != __HYPERVISOR_grant_table_op )
680 gdprintk(XENLOG_WARNING, "HVM vcpu %d:%d bad hypercall %d.\n",
681 current->domain->domain_id, current->vcpu_id, pregs->eax);
682 pregs->eax = -ENOSYS;
683 return;
684 }
686 pregs->eax = hvm_hypercall_table[pregs->eax](
687 pregs->ebx, pregs->ecx, pregs->edx, pregs->esi, pregs->edi);
688 }
690 #else /* defined(__x86_64__) */
692 static long do_memory_op_compat32(int cmd, XEN_GUEST_HANDLE(void) arg)
693 {
694 extern long do_add_to_physmap(struct xen_add_to_physmap *xatp);
695 long rc;
697 switch ( cmd )
698 {
699 case XENMEM_add_to_physmap:
700 {
701 struct {
702 domid_t domid;
703 uint32_t space;
704 uint32_t idx;
705 uint32_t gpfn;
706 } u;
707 struct xen_add_to_physmap h;
709 if ( copy_from_guest(&u, arg, 1) )
710 return -EFAULT;
712 h.domid = u.domid;
713 h.space = u.space;
714 h.idx = u.idx;
715 h.gpfn = u.gpfn;
717 this_cpu(guest_handles_in_xen_space) = 1;
718 rc = do_memory_op(cmd, guest_handle_from_ptr(&h, void));
719 this_cpu(guest_handles_in_xen_space) = 0;
721 break;
722 }
724 default:
725 gdprintk(XENLOG_WARNING, "memory_op %d.\n", cmd);
726 rc = -ENOSYS;
727 break;
728 }
730 return rc;
731 }
733 static hvm_hypercall_t *hvm_hypercall64_table[NR_hypercalls] = {
734 HYPERCALL(memory_op),
735 HYPERCALL(xen_version),
736 HYPERCALL(event_channel_op),
737 HYPERCALL(sched_op),
738 HYPERCALL(hvm_op)
739 };
741 static hvm_hypercall_t *hvm_hypercall32_table[NR_hypercalls] = {
742 HYPERCALL_COMPAT32(memory_op),
743 HYPERCALL(xen_version),
744 HYPERCALL(event_channel_op),
745 HYPERCALL(sched_op),
746 HYPERCALL(hvm_op)
747 };
749 static void __hvm_do_hypercall(struct cpu_user_regs *pregs)
750 {
751 pregs->rax = (uint32_t)pregs->eax; /* mask in case compat32 caller */
752 if ( (pregs->rax >= NR_hypercalls) || !hvm_hypercall64_table[pregs->rax] )
753 {
754 if ( pregs->rax != __HYPERVISOR_grant_table_op )
755 gdprintk(XENLOG_WARNING, "HVM vcpu %d:%d bad hypercall %ld.\n",
756 current->domain->domain_id, current->vcpu_id, pregs->rax);
757 pregs->rax = -ENOSYS;
758 return;
759 }
761 if ( current->arch.paging.mode->guest_levels == 4 )
762 {
763 pregs->rax = hvm_hypercall64_table[pregs->rax](pregs->rdi,
764 pregs->rsi,
765 pregs->rdx,
766 pregs->r10,
767 pregs->r8);
768 }
769 else
770 {
771 pregs->eax = hvm_hypercall32_table[pregs->eax]((uint32_t)pregs->ebx,
772 (uint32_t)pregs->ecx,
773 (uint32_t)pregs->edx,
774 (uint32_t)pregs->esi,
775 (uint32_t)pregs->edi);
776 }
777 }
779 #endif /* defined(__x86_64__) */
781 int hvm_do_hypercall(struct cpu_user_regs *pregs)
782 {
783 int flush, preempted;
784 unsigned long old_eip;
786 if ( unlikely(ring_3(pregs)) )
787 {
788 pregs->eax = -EPERM;
789 return 0;
790 }
792 /*
793 * NB. In future flush only on decrease_reservation.
794 * For now we also need to flush when pages are added, as qemu-dm is not
795 * yet capable of faulting pages into an existing valid mapcache bucket.
796 */
797 flush = ((uint32_t)pregs->eax == __HYPERVISOR_memory_op);
799 /* Check for preemption: RIP will be modified from this dummy value. */
800 old_eip = pregs->eip;
801 pregs->eip = 0xF0F0F0FF;
803 __hvm_do_hypercall(pregs);
805 preempted = (pregs->eip != 0xF0F0F0FF);
806 pregs->eip = old_eip;
808 return (preempted ? HVM_HCALL_preempted :
809 flush ? HVM_HCALL_invalidate : HVM_HCALL_completed);
810 }
812 void hvm_update_guest_cr3(struct vcpu *v, unsigned long guest_cr3)
813 {
814 v->arch.hvm_vcpu.hw_cr3 = guest_cr3;
815 hvm_funcs.update_guest_cr3(v);
816 }
818 /* Initialise a hypercall transfer page for a VMX domain using
819 paravirtualised drivers. */
820 void hvm_hypercall_page_initialise(struct domain *d,
821 void *hypercall_page)
822 {
823 hvm_funcs.init_hypercall_page(d, hypercall_page);
824 }
827 /*
828 * only called in HVM domain BSP context
829 * when booting, vcpuid is always equal to apic_id
830 */
831 int hvm_bringup_ap(int vcpuid, int trampoline_vector)
832 {
833 struct vcpu *v;
834 struct domain *d = current->domain;
835 struct vcpu_guest_context *ctxt;
836 int rc = 0;
838 BUG_ON(!is_hvm_domain(d));
840 if ( (v = d->vcpu[vcpuid]) == NULL )
841 return -ENOENT;
843 if ( (ctxt = xmalloc(struct vcpu_guest_context)) == NULL )
844 {
845 gdprintk(XENLOG_ERR,
846 "Failed to allocate memory in hvm_bringup_ap.\n");
847 return -ENOMEM;
848 }
850 hvm_init_ap_context(ctxt, vcpuid, trampoline_vector);
852 /* Sync AP's TSC with BSP's. */
853 v->arch.hvm_vcpu.cache_tsc_offset =
854 v->domain->vcpu[0]->arch.hvm_vcpu.cache_tsc_offset;
855 hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset);
857 LOCK_BIGLOCK(d);
858 rc = -EEXIST;
859 if ( !v->is_initialised )
860 rc = boot_vcpu(d, vcpuid, ctxt);
861 UNLOCK_BIGLOCK(d);
863 if ( rc != 0 )
864 {
865 gdprintk(XENLOG_ERR,
866 "AP %d bringup failed in boot_vcpu %x.\n", vcpuid, rc);
867 goto out;
868 }
870 if ( test_and_clear_bit(_VPF_down, &v->pause_flags) )
871 vcpu_wake(v);
872 gdprintk(XENLOG_INFO, "AP %d bringup suceeded.\n", vcpuid);
874 out:
875 xfree(ctxt);
876 return rc;
877 }
879 static int hvmop_set_pci_intx_level(
880 XEN_GUEST_HANDLE(xen_hvm_set_pci_intx_level_t) uop)
881 {
882 struct xen_hvm_set_pci_intx_level op;
883 struct domain *d;
884 int rc;
886 if ( copy_from_guest(&op, uop, 1) )
887 return -EFAULT;
889 if ( !IS_PRIV(current->domain) )
890 return -EPERM;
892 if ( (op.domain > 0) || (op.bus > 0) || (op.device > 31) || (op.intx > 3) )
893 return -EINVAL;
895 d = rcu_lock_domain_by_id(op.domid);
896 if ( d == NULL )
897 return -ESRCH;
899 rc = -EINVAL;
900 if ( !is_hvm_domain(d) )
901 goto out;
903 rc = 0;
904 switch ( op.level )
905 {
906 case 0:
907 hvm_pci_intx_deassert(d, op.device, op.intx);
908 break;
909 case 1:
910 hvm_pci_intx_assert(d, op.device, op.intx);
911 break;
912 default:
913 rc = -EINVAL;
914 break;
915 }
917 out:
918 rcu_unlock_domain(d);
919 return rc;
920 }
922 static int hvmop_set_isa_irq_level(
923 XEN_GUEST_HANDLE(xen_hvm_set_isa_irq_level_t) uop)
924 {
925 struct xen_hvm_set_isa_irq_level op;
926 struct domain *d;
927 int rc;
929 if ( copy_from_guest(&op, uop, 1) )
930 return -EFAULT;
932 if ( !IS_PRIV(current->domain) )
933 return -EPERM;
935 if ( op.isa_irq > 15 )
936 return -EINVAL;
938 d = rcu_lock_domain_by_id(op.domid);
939 if ( d == NULL )
940 return -ESRCH;
942 rc = -EINVAL;
943 if ( !is_hvm_domain(d) )
944 goto out;
946 rc = 0;
947 switch ( op.level )
948 {
949 case 0:
950 hvm_isa_irq_deassert(d, op.isa_irq);
951 break;
952 case 1:
953 hvm_isa_irq_assert(d, op.isa_irq);
954 break;
955 default:
956 rc = -EINVAL;
957 break;
958 }
960 out:
961 rcu_unlock_domain(d);
962 return rc;
963 }
965 static int hvmop_set_pci_link_route(
966 XEN_GUEST_HANDLE(xen_hvm_set_pci_link_route_t) uop)
967 {
968 struct xen_hvm_set_pci_link_route op;
969 struct domain *d;
970 int rc;
972 if ( copy_from_guest(&op, uop, 1) )
973 return -EFAULT;
975 if ( !IS_PRIV(current->domain) )
976 return -EPERM;
978 if ( (op.link > 3) || (op.isa_irq > 15) )
979 return -EINVAL;
981 d = rcu_lock_domain_by_id(op.domid);
982 if ( d == NULL )
983 return -ESRCH;
985 rc = -EINVAL;
986 if ( !is_hvm_domain(d) )
987 goto out;
989 rc = 0;
990 hvm_set_pci_link_route(d, op.link, op.isa_irq);
992 out:
993 rcu_unlock_domain(d);
994 return rc;
995 }
997 long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE(void) arg)
999 {
1000 long rc = 0;
1002 switch ( op )
1004 case HVMOP_set_param:
1005 case HVMOP_get_param:
1007 struct xen_hvm_param a;
1008 struct hvm_ioreq_page *iorp;
1009 struct domain *d;
1010 struct vcpu *v;
1012 if ( copy_from_guest(&a, arg, 1) )
1013 return -EFAULT;
1015 if ( a.index >= HVM_NR_PARAMS )
1016 return -EINVAL;
1018 if ( a.domid == DOMID_SELF )
1019 d = rcu_lock_current_domain();
1020 else if ( IS_PRIV(current->domain) )
1021 d = rcu_lock_domain_by_id(a.domid);
1022 else
1023 return -EPERM;
1025 if ( d == NULL )
1026 return -ESRCH;
1028 rc = -EINVAL;
1029 if ( !is_hvm_domain(d) )
1030 goto param_fail;
1032 if ( op == HVMOP_set_param )
1034 switch ( a.index )
1036 case HVM_PARAM_IOREQ_PFN:
1037 iorp = &d->arch.hvm_domain.ioreq;
1038 rc = hvm_set_ioreq_page(d, iorp, a.value);
1039 spin_lock(&iorp->lock);
1040 if ( (rc == 0) && (iorp->va != NULL) )
1041 /* Initialise evtchn port info if VCPUs already created. */
1042 for_each_vcpu ( d, v )
1043 get_ioreq(v)->vp_eport = v->arch.hvm_vcpu.xen_port;
1044 spin_unlock(&iorp->lock);
1045 break;
1046 case HVM_PARAM_BUFIOREQ_PFN:
1047 iorp = &d->arch.hvm_domain.buf_ioreq;
1048 rc = hvm_set_ioreq_page(d, iorp, a.value);
1049 break;
1050 case HVM_PARAM_CALLBACK_IRQ:
1051 hvm_set_callback_via(d, a.value);
1052 #if defined(__x86_64__)
1053 /*
1054 * Since this operation is one of the very first executed
1055 * by PV drivers on initialisation or after save/restore, it
1056 * is a sensible point at which to sample the execution mode of
1057 * the guest and latch 32- or 64-bit format for shared state.
1058 */
1059 d->is_compat = (hvm_guest_x86_mode(current) == 4);
1060 #endif
1061 break;
1063 d->arch.hvm_domain.params[a.index] = a.value;
1064 rc = 0;
1066 else
1068 a.value = d->arch.hvm_domain.params[a.index];
1069 rc = copy_to_guest(arg, &a, 1) ? -EFAULT : 0;
1072 param_fail:
1073 rcu_unlock_domain(d);
1074 break;
1077 case HVMOP_set_pci_intx_level:
1078 rc = hvmop_set_pci_intx_level(
1079 guest_handle_cast(arg, xen_hvm_set_pci_intx_level_t));
1080 break;
1082 case HVMOP_set_isa_irq_level:
1083 rc = hvmop_set_isa_irq_level(
1084 guest_handle_cast(arg, xen_hvm_set_isa_irq_level_t));
1085 break;
1087 case HVMOP_set_pci_link_route:
1088 rc = hvmop_set_pci_link_route(
1089 guest_handle_cast(arg, xen_hvm_set_pci_link_route_t));
1090 break;
1092 default:
1094 gdprintk(XENLOG_WARNING, "Bad HVM op %ld.\n", op);
1095 rc = -ENOSYS;
1096 break;
1100 return rc;
1103 /*
1104 * Local variables:
1105 * mode: C
1106 * c-set-style: "BSD"
1107 * c-basic-offset: 4
1108 * tab-width: 4
1109 * indent-tabs-mode: nil
1110 * End:
1111 */