ia64/xen-unstable

view xen/arch/x86/hvm/hvm.c @ 19615:13a4f4e6d0a3

x86 hvm: Correctly emulate task switches into vm86 mode.

Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Tue May 19 02:12:04 2009 +0100 (2009-05-19)
parents f71c5e3ac252
children 822ea2bf0c54
line source
1 /*
2 * hvm.c: Common hardware virtual machine abstractions.
3 *
4 * Copyright (c) 2004, Intel Corporation.
5 * Copyright (c) 2005, International Business Machines Corporation.
6 * Copyright (c) 2008, Citrix Systems, Inc.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License along with
18 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
19 * Place - Suite 330, Boston, MA 02111-1307 USA.
20 */
22 #include <xen/config.h>
23 #include <xen/ctype.h>
24 #include <xen/init.h>
25 #include <xen/lib.h>
26 #include <xen/trace.h>
27 #include <xen/sched.h>
28 #include <xen/irq.h>
29 #include <xen/softirq.h>
30 #include <xen/domain.h>
31 #include <xen/domain_page.h>
32 #include <xen/hypercall.h>
33 #include <xen/guest_access.h>
34 #include <xen/event.h>
35 #include <xen/paging.h>
36 #include <asm/shadow.h>
37 #include <asm/current.h>
38 #include <asm/e820.h>
39 #include <asm/io.h>
40 #include <asm/regs.h>
41 #include <asm/cpufeature.h>
42 #include <asm/processor.h>
43 #include <asm/types.h>
44 #include <asm/msr.h>
45 #include <asm/mc146818rtc.h>
46 #include <asm/spinlock.h>
47 #include <asm/hvm/hvm.h>
48 #include <asm/hvm/vpt.h>
49 #include <asm/hvm/support.h>
50 #include <asm/hvm/cacheattr.h>
51 #include <asm/hvm/trace.h>
52 #include <public/sched.h>
53 #include <public/hvm/ioreq.h>
54 #include <public/version.h>
55 #include <public/memory.h>
57 int hvm_enabled __read_mostly;
59 unsigned int opt_hvm_debug_level __read_mostly;
60 integer_param("hvm_debug", opt_hvm_debug_level);
62 int opt_softtsc;
63 boolean_param("softtsc", opt_softtsc);
65 struct hvm_function_table hvm_funcs __read_mostly;
67 /* I/O permission bitmap is globally shared by all HVM guests. */
68 unsigned long __attribute__ ((__section__ (".bss.page_aligned")))
69 hvm_io_bitmap[3*PAGE_SIZE/BYTES_PER_LONG];
71 void hvm_enable(struct hvm_function_table *fns)
72 {
73 BUG_ON(hvm_enabled);
74 printk("HVM: %s enabled\n", fns->name);
76 /*
77 * Allow direct access to the PC debug ports 0x80 and 0xed (they are
78 * often used for I/O delays, but the vmexits simply slow things down).
79 */
80 memset(hvm_io_bitmap, ~0, sizeof(hvm_io_bitmap));
81 __clear_bit(0x80, hvm_io_bitmap);
82 __clear_bit(0xed, hvm_io_bitmap);
84 hvm_funcs = *fns;
85 hvm_enabled = 1;
87 if ( hvm_funcs.hap_supported )
88 printk("HVM: Hardware Assisted Paging detected.\n");
89 }
91 /*
92 * Need to re-inject a given event? We avoid re-injecting software exceptions
93 * and interrupts because the faulting/trapping instruction can simply be
94 * re-executed (neither VMX nor SVM update RIP when they VMEXIT during
95 * INT3/INTO/INTn).
96 */
97 int hvm_event_needs_reinjection(uint8_t type, uint8_t vector)
98 {
99 switch ( type )
100 {
101 case X86_EVENTTYPE_EXT_INTR:
102 case X86_EVENTTYPE_NMI:
103 return 1;
104 case X86_EVENTTYPE_HW_EXCEPTION:
105 /*
106 * SVM uses type 3 ("HW Exception") for #OF and #BP. We explicitly
107 * check for these vectors, as they are really SW Exceptions. SVM has
108 * not updated RIP to point after the trapping instruction (INT3/INTO).
109 */
110 return (vector != 3) && (vector != 4);
111 default:
112 /* Software exceptions/interrupts can be re-executed (e.g., INT n). */
113 break;
114 }
115 return 0;
116 }
118 /*
119 * Combine two hardware exceptions: @vec2 was raised during delivery of @vec1.
120 * This means we can assume that @vec2 is contributory or a page fault.
121 */
122 uint8_t hvm_combine_hw_exceptions(uint8_t vec1, uint8_t vec2)
123 {
124 /* Exception during double-fault delivery always causes a triple fault. */
125 if ( vec1 == TRAP_double_fault )
126 {
127 hvm_triple_fault();
128 return TRAP_double_fault; /* dummy return */
129 }
131 /* Exception during page-fault delivery always causes a double fault. */
132 if ( vec1 == TRAP_page_fault )
133 return TRAP_double_fault;
135 /* Discard the first exception if it's benign or if we now have a #PF. */
136 if ( !((1u << vec1) & 0x7c01u) || (vec2 == TRAP_page_fault) )
137 return vec2;
139 /* Cannot combine the exceptions: double fault. */
140 return TRAP_double_fault;
141 }
143 void hvm_set_guest_tsc(struct vcpu *v, u64 guest_tsc)
144 {
145 u64 host_tsc;
147 rdtscll(host_tsc);
149 v->arch.hvm_vcpu.cache_tsc_offset = guest_tsc - host_tsc;
150 hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset);
151 }
153 u64 hvm_get_guest_tsc(struct vcpu *v)
154 {
155 u64 host_tsc;
157 if ( opt_softtsc )
158 host_tsc = hvm_get_guest_time(v);
159 else
160 rdtscll(host_tsc);
162 return host_tsc + v->arch.hvm_vcpu.cache_tsc_offset;
163 }
165 void hvm_migrate_timers(struct vcpu *v)
166 {
167 rtc_migrate_timers(v);
168 pt_migrate(v);
169 }
171 void hvm_do_resume(struct vcpu *v)
172 {
173 ioreq_t *p;
175 pt_restore_timer(v);
177 /* NB. Optimised for common case (p->state == STATE_IOREQ_NONE). */
178 p = &get_ioreq(v)->vp_ioreq;
179 while ( p->state != STATE_IOREQ_NONE )
180 {
181 switch ( p->state )
182 {
183 case STATE_IORESP_READY: /* IORESP_READY -> NONE */
184 hvm_io_assist();
185 break;
186 case STATE_IOREQ_READY: /* IOREQ_{READY,INPROCESS} -> IORESP_READY */
187 case STATE_IOREQ_INPROCESS:
188 wait_on_xen_event_channel(v->arch.hvm_vcpu.xen_port,
189 (p->state != STATE_IOREQ_READY) &&
190 (p->state != STATE_IOREQ_INPROCESS));
191 break;
192 default:
193 gdprintk(XENLOG_ERR, "Weird HVM iorequest state %d.\n", p->state);
194 domain_crash(v->domain);
195 return; /* bail */
196 }
197 }
198 }
200 static void hvm_init_ioreq_page(
201 struct domain *d, struct hvm_ioreq_page *iorp)
202 {
203 memset(iorp, 0, sizeof(*iorp));
204 spin_lock_init(&iorp->lock);
205 domain_pause(d);
206 }
208 static void hvm_destroy_ioreq_page(
209 struct domain *d, struct hvm_ioreq_page *iorp)
210 {
211 spin_lock(&iorp->lock);
213 ASSERT(d->is_dying);
215 if ( iorp->va != NULL )
216 {
217 unmap_domain_page_global(iorp->va);
218 put_page_and_type(iorp->page);
219 iorp->va = NULL;
220 }
222 spin_unlock(&iorp->lock);
223 }
225 static int hvm_set_ioreq_page(
226 struct domain *d, struct hvm_ioreq_page *iorp, unsigned long gmfn)
227 {
228 struct page_info *page;
229 p2m_type_t p2mt;
230 unsigned long mfn;
231 void *va;
233 mfn = mfn_x(gfn_to_mfn(d, gmfn, &p2mt));
234 if ( !p2m_is_ram(p2mt) )
235 return -EINVAL;
236 ASSERT(mfn_valid(mfn));
238 page = mfn_to_page(mfn);
239 if ( !get_page_and_type(page, d, PGT_writable_page) )
240 return -EINVAL;
242 va = map_domain_page_global(mfn);
243 if ( va == NULL )
244 {
245 put_page_and_type(page);
246 return -ENOMEM;
247 }
249 spin_lock(&iorp->lock);
251 if ( (iorp->va != NULL) || d->is_dying )
252 {
253 spin_unlock(&iorp->lock);
254 unmap_domain_page_global(va);
255 put_page_and_type(mfn_to_page(mfn));
256 return -EINVAL;
257 }
259 iorp->va = va;
260 iorp->page = page;
262 spin_unlock(&iorp->lock);
264 domain_unpause(d);
266 return 0;
267 }
269 static int hvm_print_line(
270 int dir, uint32_t port, uint32_t bytes, uint32_t *val)
271 {
272 struct vcpu *curr = current;
273 struct hvm_domain *hd = &curr->domain->arch.hvm_domain;
274 char c = *val;
276 BUG_ON(bytes != 1);
278 /* Accept only printable characters, newline, and horizontal tab. */
279 if ( !isprint(c) && (c != '\n') && (c != '\t') )
280 return X86EMUL_OKAY;
282 spin_lock(&hd->pbuf_lock);
283 hd->pbuf[hd->pbuf_idx++] = c;
284 if ( (hd->pbuf_idx == (sizeof(hd->pbuf) - 2)) || (c == '\n') )
285 {
286 if ( c != '\n' )
287 hd->pbuf[hd->pbuf_idx++] = '\n';
288 hd->pbuf[hd->pbuf_idx] = '\0';
289 printk(XENLOG_G_DEBUG "HVM%u: %s", curr->domain->domain_id, hd->pbuf);
290 hd->pbuf_idx = 0;
291 }
292 spin_unlock(&hd->pbuf_lock);
294 return X86EMUL_OKAY;
295 }
297 int hvm_domain_initialise(struct domain *d)
298 {
299 int rc;
301 if ( !hvm_enabled )
302 {
303 gdprintk(XENLOG_WARNING, "Attempt to create a HVM guest "
304 "on a non-VT/AMDV platform.\n");
305 return -EINVAL;
306 }
308 spin_lock_init(&d->arch.hvm_domain.pbuf_lock);
309 spin_lock_init(&d->arch.hvm_domain.irq_lock);
310 spin_lock_init(&d->arch.hvm_domain.uc_lock);
312 INIT_LIST_HEAD(&d->arch.hvm_domain.msixtbl_list);
313 spin_lock_init(&d->arch.hvm_domain.msixtbl_list_lock);
315 hvm_init_guest_time(d);
317 d->arch.hvm_domain.params[HVM_PARAM_HPET_ENABLED] = 1;
319 hvm_init_cacheattr_region_list(d);
321 rc = paging_enable(d, PG_refcounts|PG_translate|PG_external);
322 if ( rc != 0 )
323 goto fail1;
325 vpic_init(d);
327 rc = vioapic_init(d);
328 if ( rc != 0 )
329 goto fail1;
331 stdvga_init(d);
333 rtc_init(d);
335 hvm_init_ioreq_page(d, &d->arch.hvm_domain.ioreq);
336 hvm_init_ioreq_page(d, &d->arch.hvm_domain.buf_ioreq);
338 register_portio_handler(d, 0xe9, 1, hvm_print_line);
340 rc = hvm_funcs.domain_initialise(d);
341 if ( rc != 0 )
342 goto fail2;
344 return 0;
346 fail2:
347 rtc_deinit(d);
348 stdvga_deinit(d);
349 vioapic_deinit(d);
350 fail1:
351 hvm_destroy_cacheattr_region_list(d);
352 return rc;
353 }
355 extern void msixtbl_pt_cleanup(struct domain *d);
357 void hvm_domain_relinquish_resources(struct domain *d)
358 {
359 hvm_destroy_ioreq_page(d, &d->arch.hvm_domain.ioreq);
360 hvm_destroy_ioreq_page(d, &d->arch.hvm_domain.buf_ioreq);
362 msixtbl_pt_cleanup(d);
364 /* Stop all asynchronous timer actions. */
365 rtc_deinit(d);
366 if ( d->vcpu[0] != NULL )
367 {
368 pit_deinit(d);
369 pmtimer_deinit(d);
370 hpet_deinit(d);
371 }
372 }
374 void hvm_domain_destroy(struct domain *d)
375 {
376 hvm_funcs.domain_destroy(d);
377 rtc_deinit(d);
378 stdvga_deinit(d);
379 vioapic_deinit(d);
380 hvm_destroy_cacheattr_region_list(d);
381 }
383 static int hvm_save_cpu_ctxt(struct domain *d, hvm_domain_context_t *h)
384 {
385 struct vcpu *v;
386 struct hvm_hw_cpu ctxt;
387 struct segment_register seg;
388 struct vcpu_guest_context *vc;
390 for_each_vcpu ( d, v )
391 {
392 /* We don't need to save state for a vcpu that is down; the restore
393 * code will leave it down if there is nothing saved. */
394 if ( test_bit(_VPF_down, &v->pause_flags) )
395 continue;
397 /* Architecture-specific vmcs/vmcb bits */
398 hvm_funcs.save_cpu_ctxt(v, &ctxt);
400 hvm_get_segment_register(v, x86_seg_idtr, &seg);
401 ctxt.idtr_limit = seg.limit;
402 ctxt.idtr_base = seg.base;
404 hvm_get_segment_register(v, x86_seg_gdtr, &seg);
405 ctxt.gdtr_limit = seg.limit;
406 ctxt.gdtr_base = seg.base;
408 hvm_get_segment_register(v, x86_seg_cs, &seg);
409 ctxt.cs_sel = seg.sel;
410 ctxt.cs_limit = seg.limit;
411 ctxt.cs_base = seg.base;
412 ctxt.cs_arbytes = seg.attr.bytes;
414 hvm_get_segment_register(v, x86_seg_ds, &seg);
415 ctxt.ds_sel = seg.sel;
416 ctxt.ds_limit = seg.limit;
417 ctxt.ds_base = seg.base;
418 ctxt.ds_arbytes = seg.attr.bytes;
420 hvm_get_segment_register(v, x86_seg_es, &seg);
421 ctxt.es_sel = seg.sel;
422 ctxt.es_limit = seg.limit;
423 ctxt.es_base = seg.base;
424 ctxt.es_arbytes = seg.attr.bytes;
426 hvm_get_segment_register(v, x86_seg_ss, &seg);
427 ctxt.ss_sel = seg.sel;
428 ctxt.ss_limit = seg.limit;
429 ctxt.ss_base = seg.base;
430 ctxt.ss_arbytes = seg.attr.bytes;
432 hvm_get_segment_register(v, x86_seg_fs, &seg);
433 ctxt.fs_sel = seg.sel;
434 ctxt.fs_limit = seg.limit;
435 ctxt.fs_base = seg.base;
436 ctxt.fs_arbytes = seg.attr.bytes;
438 hvm_get_segment_register(v, x86_seg_gs, &seg);
439 ctxt.gs_sel = seg.sel;
440 ctxt.gs_limit = seg.limit;
441 ctxt.gs_base = seg.base;
442 ctxt.gs_arbytes = seg.attr.bytes;
444 hvm_get_segment_register(v, x86_seg_tr, &seg);
445 ctxt.tr_sel = seg.sel;
446 ctxt.tr_limit = seg.limit;
447 ctxt.tr_base = seg.base;
448 ctxt.tr_arbytes = seg.attr.bytes;
450 hvm_get_segment_register(v, x86_seg_ldtr, &seg);
451 ctxt.ldtr_sel = seg.sel;
452 ctxt.ldtr_limit = seg.limit;
453 ctxt.ldtr_base = seg.base;
454 ctxt.ldtr_arbytes = seg.attr.bytes;
456 vc = &v->arch.guest_context;
458 if ( v->fpu_initialised )
459 memcpy(ctxt.fpu_regs, &vc->fpu_ctxt, sizeof(ctxt.fpu_regs));
460 else
461 memset(ctxt.fpu_regs, 0, sizeof(ctxt.fpu_regs));
463 ctxt.rax = vc->user_regs.eax;
464 ctxt.rbx = vc->user_regs.ebx;
465 ctxt.rcx = vc->user_regs.ecx;
466 ctxt.rdx = vc->user_regs.edx;
467 ctxt.rbp = vc->user_regs.ebp;
468 ctxt.rsi = vc->user_regs.esi;
469 ctxt.rdi = vc->user_regs.edi;
470 ctxt.rsp = vc->user_regs.esp;
471 ctxt.rip = vc->user_regs.eip;
472 ctxt.rflags = vc->user_regs.eflags;
473 #ifdef __x86_64__
474 ctxt.r8 = vc->user_regs.r8;
475 ctxt.r9 = vc->user_regs.r9;
476 ctxt.r10 = vc->user_regs.r10;
477 ctxt.r11 = vc->user_regs.r11;
478 ctxt.r12 = vc->user_regs.r12;
479 ctxt.r13 = vc->user_regs.r13;
480 ctxt.r14 = vc->user_regs.r14;
481 ctxt.r15 = vc->user_regs.r15;
482 #endif
483 ctxt.dr0 = vc->debugreg[0];
484 ctxt.dr1 = vc->debugreg[1];
485 ctxt.dr2 = vc->debugreg[2];
486 ctxt.dr3 = vc->debugreg[3];
487 ctxt.dr6 = vc->debugreg[6];
488 ctxt.dr7 = vc->debugreg[7];
490 if ( hvm_save_entry(CPU, v->vcpu_id, h, &ctxt) != 0 )
491 return 1;
492 }
493 return 0;
494 }
496 static int hvm_load_cpu_ctxt(struct domain *d, hvm_domain_context_t *h)
497 {
498 int vcpuid, rc;
499 struct vcpu *v;
500 struct hvm_hw_cpu ctxt;
501 struct segment_register seg;
502 struct vcpu_guest_context *vc;
504 /* Which vcpu is this? */
505 vcpuid = hvm_load_instance(h);
506 if ( vcpuid > MAX_VIRT_CPUS || (v = d->vcpu[vcpuid]) == NULL )
507 {
508 gdprintk(XENLOG_ERR, "HVM restore: domain has no vcpu %u\n", vcpuid);
509 return -EINVAL;
510 }
511 vc = &v->arch.guest_context;
513 /* Need to init this vcpu before loading its contents */
514 domain_lock(d);
515 if ( !v->is_initialised )
516 if ( (rc = boot_vcpu(d, vcpuid, vc)) != 0 )
517 return rc;
518 domain_unlock(d);
520 if ( hvm_load_entry(CPU, h, &ctxt) != 0 )
521 return -EINVAL;
523 /* Sanity check some control registers. */
524 if ( (ctxt.cr0 & HVM_CR0_GUEST_RESERVED_BITS) ||
525 !(ctxt.cr0 & X86_CR0_ET) ||
526 ((ctxt.cr0 & (X86_CR0_PE|X86_CR0_PG)) == X86_CR0_PG) )
527 {
528 gdprintk(XENLOG_ERR, "HVM restore: bad CR0 0x%"PRIx64"\n",
529 ctxt.cr0);
530 return -EINVAL;
531 }
533 if ( ctxt.cr4 & HVM_CR4_GUEST_RESERVED_BITS )
534 {
535 gdprintk(XENLOG_ERR, "HVM restore: bad CR4 0x%"PRIx64"\n",
536 ctxt.cr4);
537 return -EINVAL;
538 }
540 if ( (ctxt.msr_efer & ~(EFER_FFXSE | EFER_LME | EFER_LMA |
541 EFER_NX | EFER_SCE)) ||
542 ((sizeof(long) != 8) && (ctxt.msr_efer & EFER_LME)) ||
543 (!cpu_has_nx && (ctxt.msr_efer & EFER_NX)) ||
544 (!cpu_has_syscall && (ctxt.msr_efer & EFER_SCE)) ||
545 (!cpu_has_ffxsr && (ctxt.msr_efer & EFER_FFXSE)) ||
546 ((ctxt.msr_efer & (EFER_LME|EFER_LMA)) == EFER_LMA) )
547 {
548 gdprintk(XENLOG_ERR, "HVM restore: bad EFER 0x%"PRIx64"\n",
549 ctxt.msr_efer);
550 return -EINVAL;
551 }
553 /* Older Xen versions used to save the segment arbytes directly
554 * from the VMCS on Intel hosts. Detect this and rearrange them
555 * into the struct segment_register format. */
556 #define UNFOLD_ARBYTES(_r) \
557 if ( (_r & 0xf000) && !(_r & 0x0f00) ) \
558 _r = ((_r & 0xff) | ((_r >> 4) & 0xf00))
559 UNFOLD_ARBYTES(ctxt.cs_arbytes);
560 UNFOLD_ARBYTES(ctxt.ds_arbytes);
561 UNFOLD_ARBYTES(ctxt.es_arbytes);
562 UNFOLD_ARBYTES(ctxt.fs_arbytes);
563 UNFOLD_ARBYTES(ctxt.gs_arbytes);
564 UNFOLD_ARBYTES(ctxt.ss_arbytes);
565 UNFOLD_ARBYTES(ctxt.tr_arbytes);
566 UNFOLD_ARBYTES(ctxt.ldtr_arbytes);
567 #undef UNFOLD_ARBYTES
569 /* Architecture-specific vmcs/vmcb bits */
570 if ( hvm_funcs.load_cpu_ctxt(v, &ctxt) < 0 )
571 return -EINVAL;
573 seg.limit = ctxt.idtr_limit;
574 seg.base = ctxt.idtr_base;
575 hvm_set_segment_register(v, x86_seg_idtr, &seg);
577 seg.limit = ctxt.gdtr_limit;
578 seg.base = ctxt.gdtr_base;
579 hvm_set_segment_register(v, x86_seg_gdtr, &seg);
581 seg.sel = ctxt.cs_sel;
582 seg.limit = ctxt.cs_limit;
583 seg.base = ctxt.cs_base;
584 seg.attr.bytes = ctxt.cs_arbytes;
585 hvm_set_segment_register(v, x86_seg_cs, &seg);
587 seg.sel = ctxt.ds_sel;
588 seg.limit = ctxt.ds_limit;
589 seg.base = ctxt.ds_base;
590 seg.attr.bytes = ctxt.ds_arbytes;
591 hvm_set_segment_register(v, x86_seg_ds, &seg);
593 seg.sel = ctxt.es_sel;
594 seg.limit = ctxt.es_limit;
595 seg.base = ctxt.es_base;
596 seg.attr.bytes = ctxt.es_arbytes;
597 hvm_set_segment_register(v, x86_seg_es, &seg);
599 seg.sel = ctxt.ss_sel;
600 seg.limit = ctxt.ss_limit;
601 seg.base = ctxt.ss_base;
602 seg.attr.bytes = ctxt.ss_arbytes;
603 hvm_set_segment_register(v, x86_seg_ss, &seg);
605 seg.sel = ctxt.fs_sel;
606 seg.limit = ctxt.fs_limit;
607 seg.base = ctxt.fs_base;
608 seg.attr.bytes = ctxt.fs_arbytes;
609 hvm_set_segment_register(v, x86_seg_fs, &seg);
611 seg.sel = ctxt.gs_sel;
612 seg.limit = ctxt.gs_limit;
613 seg.base = ctxt.gs_base;
614 seg.attr.bytes = ctxt.gs_arbytes;
615 hvm_set_segment_register(v, x86_seg_gs, &seg);
617 seg.sel = ctxt.tr_sel;
618 seg.limit = ctxt.tr_limit;
619 seg.base = ctxt.tr_base;
620 seg.attr.bytes = ctxt.tr_arbytes;
621 hvm_set_segment_register(v, x86_seg_tr, &seg);
623 seg.sel = ctxt.ldtr_sel;
624 seg.limit = ctxt.ldtr_limit;
625 seg.base = ctxt.ldtr_base;
626 seg.attr.bytes = ctxt.ldtr_arbytes;
627 hvm_set_segment_register(v, x86_seg_ldtr, &seg);
629 memcpy(&vc->fpu_ctxt, ctxt.fpu_regs, sizeof(ctxt.fpu_regs));
631 vc->user_regs.eax = ctxt.rax;
632 vc->user_regs.ebx = ctxt.rbx;
633 vc->user_regs.ecx = ctxt.rcx;
634 vc->user_regs.edx = ctxt.rdx;
635 vc->user_regs.ebp = ctxt.rbp;
636 vc->user_regs.esi = ctxt.rsi;
637 vc->user_regs.edi = ctxt.rdi;
638 vc->user_regs.esp = ctxt.rsp;
639 vc->user_regs.eip = ctxt.rip;
640 vc->user_regs.eflags = ctxt.rflags | 2;
641 #ifdef __x86_64__
642 vc->user_regs.r8 = ctxt.r8;
643 vc->user_regs.r9 = ctxt.r9;
644 vc->user_regs.r10 = ctxt.r10;
645 vc->user_regs.r11 = ctxt.r11;
646 vc->user_regs.r12 = ctxt.r12;
647 vc->user_regs.r13 = ctxt.r13;
648 vc->user_regs.r14 = ctxt.r14;
649 vc->user_regs.r15 = ctxt.r15;
650 #endif
651 vc->debugreg[0] = ctxt.dr0;
652 vc->debugreg[1] = ctxt.dr1;
653 vc->debugreg[2] = ctxt.dr2;
654 vc->debugreg[3] = ctxt.dr3;
655 vc->debugreg[6] = ctxt.dr6;
656 vc->debugreg[7] = ctxt.dr7;
658 vc->flags = VGCF_online;
659 v->fpu_initialised = 1;
661 /* Auxiliary processors should be woken immediately. */
662 if ( test_and_clear_bit(_VPF_down, &v->pause_flags) )
663 vcpu_wake(v);
665 return 0;
666 }
668 HVM_REGISTER_SAVE_RESTORE(CPU, hvm_save_cpu_ctxt, hvm_load_cpu_ctxt,
669 1, HVMSR_PER_VCPU);
671 int hvm_vcpu_initialise(struct vcpu *v)
672 {
673 int rc;
675 if ( (rc = vlapic_init(v)) != 0 )
676 goto fail1;
678 if ( (rc = hvm_funcs.vcpu_initialise(v)) != 0 )
679 goto fail2;
681 /* Create ioreq event channel. */
682 rc = alloc_unbound_xen_event_channel(v, 0);
683 if ( rc < 0 )
684 goto fail3;
686 /* Register ioreq event channel. */
687 v->arch.hvm_vcpu.xen_port = rc;
688 spin_lock(&v->domain->arch.hvm_domain.ioreq.lock);
689 if ( v->domain->arch.hvm_domain.ioreq.va != NULL )
690 get_ioreq(v)->vp_eport = v->arch.hvm_vcpu.xen_port;
691 spin_unlock(&v->domain->arch.hvm_domain.ioreq.lock);
693 spin_lock_init(&v->arch.hvm_vcpu.tm_lock);
694 INIT_LIST_HEAD(&v->arch.hvm_vcpu.tm_list);
696 rc = hvm_vcpu_cacheattr_init(v);
697 if ( rc != 0 )
698 goto fail3;
700 tasklet_init(&v->arch.hvm_vcpu.assert_evtchn_irq_tasklet,
701 (void(*)(unsigned long))hvm_assert_evtchn_irq,
702 (unsigned long)v);
704 v->arch.guest_context.user_regs.eflags = 2;
706 if ( v->vcpu_id == 0 )
707 {
708 /* NB. All these really belong in hvm_domain_initialise(). */
709 pit_init(v, cpu_khz);
710 pmtimer_init(v);
711 hpet_init(v);
713 /* Init guest TSC to start from zero. */
714 hvm_set_guest_tsc(v, 0);
716 /* Can start up without SIPI-SIPI or setvcpucontext domctl. */
717 v->is_initialised = 1;
718 clear_bit(_VPF_down, &v->pause_flags);
719 }
721 return 0;
723 fail3:
724 hvm_funcs.vcpu_destroy(v);
725 fail2:
726 vlapic_destroy(v);
727 fail1:
728 return rc;
729 }
731 void hvm_vcpu_destroy(struct vcpu *v)
732 {
733 tasklet_kill(&v->arch.hvm_vcpu.assert_evtchn_irq_tasklet);
734 hvm_vcpu_cacheattr_destroy(v);
735 vlapic_destroy(v);
736 hvm_funcs.vcpu_destroy(v);
738 /* Event channel is already freed by evtchn_destroy(). */
739 /*free_xen_event_channel(v, v->arch.hvm_vcpu.xen_port);*/
740 }
742 void hvm_vcpu_down(struct vcpu *v)
743 {
744 struct domain *d = v->domain;
745 int online_count = 0;
747 /* Doesn't halt us immediately, but we'll never return to guest context. */
748 set_bit(_VPF_down, &v->pause_flags);
749 vcpu_sleep_nosync(v);
751 /* Any other VCPUs online? ... */
752 domain_lock(d);
753 for_each_vcpu ( d, v )
754 if ( !test_bit(_VPF_down, &v->pause_flags) )
755 online_count++;
756 domain_unlock(d);
758 /* ... Shut down the domain if not. */
759 if ( online_count == 0 )
760 {
761 gdprintk(XENLOG_INFO, "All CPUs offline -- powering off.\n");
762 domain_shutdown(d, SHUTDOWN_poweroff);
763 }
764 }
766 void hvm_send_assist_req(struct vcpu *v)
767 {
768 ioreq_t *p;
770 if ( unlikely(!vcpu_start_shutdown_deferral(v)) )
771 return; /* implicitly bins the i/o operation */
773 p = &get_ioreq(v)->vp_ioreq;
774 if ( unlikely(p->state != STATE_IOREQ_NONE) )
775 {
776 /* This indicates a bug in the device model. Crash the domain. */
777 gdprintk(XENLOG_ERR, "Device model set bad IO state %d.\n", p->state);
778 domain_crash(v->domain);
779 return;
780 }
782 prepare_wait_on_xen_event_channel(v->arch.hvm_vcpu.xen_port);
784 /*
785 * Following happens /after/ blocking and setting up ioreq contents.
786 * prepare_wait_on_xen_event_channel() is an implicit barrier.
787 */
788 p->state = STATE_IOREQ_READY;
789 notify_via_xen_event_channel(v->arch.hvm_vcpu.xen_port);
790 }
792 void hvm_hlt(unsigned long rflags)
793 {
794 struct vcpu *curr = current;
796 if ( hvm_event_pending(curr) )
797 return;
799 /*
800 * If we halt with interrupts disabled, that's a pretty sure sign that we
801 * want to shut down. In a real processor, NMIs are the only way to break
802 * out of this.
803 */
804 if ( unlikely(!(rflags & X86_EFLAGS_IF)) )
805 return hvm_vcpu_down(curr);
807 do_sched_op_compat(SCHEDOP_block, 0);
809 HVMTRACE_1D(HLT, /* pending = */ vcpu_runnable(curr));
810 }
812 void hvm_triple_fault(void)
813 {
814 struct vcpu *v = current;
815 gdprintk(XENLOG_INFO, "Triple fault on VCPU%d - "
816 "invoking HVM system reset.\n", v->vcpu_id);
817 domain_shutdown(v->domain, SHUTDOWN_reboot);
818 }
820 int hvm_set_efer(uint64_t value)
821 {
822 struct vcpu *v = current;
824 value &= ~EFER_LMA;
826 if ( (value & ~(EFER_FFXSE | EFER_LME | EFER_NX | EFER_SCE)) ||
827 ((sizeof(long) != 8) && (value & EFER_LME)) ||
828 (!cpu_has_nx && (value & EFER_NX)) ||
829 (!cpu_has_syscall && (value & EFER_SCE)) ||
830 (!cpu_has_ffxsr && (value & EFER_FFXSE)) )
831 {
832 gdprintk(XENLOG_WARNING, "Trying to set reserved bit in "
833 "EFER: %"PRIx64"\n", value);
834 hvm_inject_exception(TRAP_gp_fault, 0, 0);
835 return X86EMUL_EXCEPTION;
836 }
838 if ( ((value ^ v->arch.hvm_vcpu.guest_efer) & EFER_LME) &&
839 hvm_paging_enabled(v) )
840 {
841 gdprintk(XENLOG_WARNING,
842 "Trying to change EFER.LME with paging enabled\n");
843 hvm_inject_exception(TRAP_gp_fault, 0, 0);
844 return X86EMUL_EXCEPTION;
845 }
847 value |= v->arch.hvm_vcpu.guest_efer & EFER_LMA;
848 v->arch.hvm_vcpu.guest_efer = value;
849 hvm_update_guest_efer(v);
851 return X86EMUL_OKAY;
852 }
854 extern void shadow_blow_tables_per_domain(struct domain *d);
855 extern bool_t mtrr_pat_not_equal(struct vcpu *vd, struct vcpu *vs);
857 /* Exit UC mode only if all VCPUs agree on MTRR/PAT and are not in no_fill. */
858 static bool_t domain_exit_uc_mode(struct vcpu *v)
859 {
860 struct domain *d = v->domain;
861 struct vcpu *vs;
863 for_each_vcpu ( d, vs )
864 {
865 if ( (vs == v) || !vs->is_initialised )
866 continue;
867 if ( (vs->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) ||
868 mtrr_pat_not_equal(vs, v) )
869 return 0;
870 }
872 return 1;
873 }
875 static void local_flush_cache(void *info)
876 {
877 wbinvd();
878 }
880 static void hvm_set_uc_mode(struct vcpu *v, bool_t is_in_uc_mode)
881 {
882 v->domain->arch.hvm_domain.is_in_uc_mode = is_in_uc_mode;
883 shadow_blow_tables_per_domain(v->domain);
884 if ( hvm_funcs.set_uc_mode )
885 return hvm_funcs.set_uc_mode(v);
886 }
888 int hvm_set_cr0(unsigned long value)
889 {
890 struct vcpu *v = current;
891 p2m_type_t p2mt;
892 unsigned long gfn, mfn, old_value = v->arch.hvm_vcpu.guest_cr[0];
894 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx", value);
896 if ( (u32)value != value )
897 {
898 HVM_DBG_LOG(DBG_LEVEL_1,
899 "Guest attempts to set upper 32 bits in CR0: %lx",
900 value);
901 goto gpf;
902 }
904 value &= ~HVM_CR0_GUEST_RESERVED_BITS;
906 /* ET is reserved and should be always be 1. */
907 value |= X86_CR0_ET;
909 if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PG )
910 goto gpf;
912 if ( (value & X86_CR0_PG) && !(old_value & X86_CR0_PG) )
913 {
914 if ( v->arch.hvm_vcpu.guest_efer & EFER_LME )
915 {
916 if ( !(v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_PAE) )
917 {
918 HVM_DBG_LOG(DBG_LEVEL_1, "Enable paging before PAE enable");
919 goto gpf;
920 }
921 HVM_DBG_LOG(DBG_LEVEL_1, "Enabling long mode");
922 v->arch.hvm_vcpu.guest_efer |= EFER_LMA;
923 hvm_update_guest_efer(v);
924 }
926 if ( !paging_mode_hap(v->domain) )
927 {
928 /* The guest CR3 must be pointing to the guest physical. */
929 gfn = v->arch.hvm_vcpu.guest_cr[3]>>PAGE_SHIFT;
930 mfn = mfn_x(gfn_to_mfn_current(gfn, &p2mt));
931 if ( !p2m_is_ram(p2mt) || !mfn_valid(mfn) ||
932 !get_page(mfn_to_page(mfn), v->domain))
933 {
934 gdprintk(XENLOG_ERR, "Invalid CR3 value = %lx (mfn=%lx)\n",
935 v->arch.hvm_vcpu.guest_cr[3], mfn);
936 domain_crash(v->domain);
937 return X86EMUL_UNHANDLEABLE;
938 }
940 /* Now arch.guest_table points to machine physical. */
941 v->arch.guest_table = pagetable_from_pfn(mfn);
943 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx",
944 v->arch.hvm_vcpu.guest_cr[3], mfn);
945 }
946 }
947 else if ( !(value & X86_CR0_PG) && (old_value & X86_CR0_PG) )
948 {
949 /* When CR0.PG is cleared, LMA is cleared immediately. */
950 if ( hvm_long_mode_enabled(v) )
951 {
952 v->arch.hvm_vcpu.guest_efer &= ~EFER_LMA;
953 hvm_update_guest_efer(v);
954 }
956 if ( !paging_mode_hap(v->domain) )
957 {
958 put_page(pagetable_get_page(v->arch.guest_table));
959 v->arch.guest_table = pagetable_null();
960 }
961 }
963 if ( has_arch_pdevs(v->domain) )
964 {
965 if ( (value & X86_CR0_CD) && !(value & X86_CR0_NW) )
966 {
967 /* Entering no fill cache mode. */
968 spin_lock(&v->domain->arch.hvm_domain.uc_lock);
969 v->arch.hvm_vcpu.cache_mode = NO_FILL_CACHE_MODE;
971 if ( !v->domain->arch.hvm_domain.is_in_uc_mode )
972 {
973 /* Flush physical caches. */
974 on_each_cpu(local_flush_cache, NULL, 1, 1);
975 hvm_set_uc_mode(v, 1);
976 }
977 spin_unlock(&v->domain->arch.hvm_domain.uc_lock);
978 }
979 else if ( !(value & (X86_CR0_CD | X86_CR0_NW)) &&
980 (v->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) )
981 {
982 /* Exit from no fill cache mode. */
983 spin_lock(&v->domain->arch.hvm_domain.uc_lock);
984 v->arch.hvm_vcpu.cache_mode = NORMAL_CACHE_MODE;
986 if ( domain_exit_uc_mode(v) )
987 hvm_set_uc_mode(v, 0);
989 spin_unlock(&v->domain->arch.hvm_domain.uc_lock);
990 }
991 }
993 v->arch.hvm_vcpu.guest_cr[0] = value;
994 hvm_update_guest_cr(v, 0);
996 if ( (value ^ old_value) & X86_CR0_PG )
997 paging_update_paging_modes(v);
999 return X86EMUL_OKAY;
1001 gpf:
1002 hvm_inject_exception(TRAP_gp_fault, 0, 0);
1003 return X86EMUL_EXCEPTION;
1006 int hvm_set_cr3(unsigned long value)
1008 unsigned long mfn;
1009 p2m_type_t p2mt;
1010 struct vcpu *v = current;
1012 if ( hvm_paging_enabled(v) && !paging_mode_hap(v->domain) &&
1013 (value != v->arch.hvm_vcpu.guest_cr[3]) )
1015 /* Shadow-mode CR3 change. Check PDBR and update refcounts. */
1016 HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value);
1017 mfn = mfn_x(gfn_to_mfn_current(value >> PAGE_SHIFT, &p2mt));
1018 if ( !p2m_is_ram(p2mt) || !mfn_valid(mfn) ||
1019 !get_page(mfn_to_page(mfn), v->domain) )
1020 goto bad_cr3;
1022 put_page(pagetable_get_page(v->arch.guest_table));
1023 v->arch.guest_table = pagetable_from_pfn(mfn);
1025 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx", value);
1028 v->arch.hvm_vcpu.guest_cr[3] = value;
1029 paging_update_cr3(v);
1030 return X86EMUL_OKAY;
1032 bad_cr3:
1033 gdprintk(XENLOG_ERR, "Invalid CR3\n");
1034 domain_crash(v->domain);
1035 return X86EMUL_UNHANDLEABLE;
1038 int hvm_set_cr4(unsigned long value)
1040 struct vcpu *v = current;
1041 unsigned long old_cr;
1043 if ( value & HVM_CR4_GUEST_RESERVED_BITS )
1045 HVM_DBG_LOG(DBG_LEVEL_1,
1046 "Guest attempts to set reserved bit in CR4: %lx",
1047 value);
1048 goto gpf;
1051 if ( !(value & X86_CR4_PAE) && hvm_long_mode_enabled(v) )
1053 HVM_DBG_LOG(DBG_LEVEL_1, "Guest cleared CR4.PAE while "
1054 "EFER.LMA is set");
1055 goto gpf;
1058 old_cr = v->arch.hvm_vcpu.guest_cr[4];
1059 v->arch.hvm_vcpu.guest_cr[4] = value;
1060 hvm_update_guest_cr(v, 4);
1062 /* Modifying CR4.{PSE,PAE,PGE} invalidates all TLB entries, inc. Global. */
1063 if ( (old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE) )
1064 paging_update_paging_modes(v);
1066 return X86EMUL_OKAY;
1068 gpf:
1069 hvm_inject_exception(TRAP_gp_fault, 0, 0);
1070 return X86EMUL_EXCEPTION;
1073 int hvm_virtual_to_linear_addr(
1074 enum x86_segment seg,
1075 struct segment_register *reg,
1076 unsigned long offset,
1077 unsigned int bytes,
1078 enum hvm_access_type access_type,
1079 unsigned int addr_size,
1080 unsigned long *linear_addr)
1082 unsigned long addr = offset;
1083 uint32_t last_byte;
1085 if ( !(current->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) )
1087 /*
1088 * REAL MODE: Don't bother with segment access checks.
1089 * Certain of them are not done in native real mode anyway.
1090 */
1091 addr = (uint32_t)(addr + reg->base);
1093 else if ( addr_size != 64 )
1095 /*
1096 * COMPATIBILITY MODE: Apply segment checks and add base.
1097 */
1099 switch ( access_type )
1101 case hvm_access_read:
1102 if ( (reg->attr.fields.type & 0xa) == 0x8 )
1103 goto gpf; /* execute-only code segment */
1104 break;
1105 case hvm_access_write:
1106 if ( (reg->attr.fields.type & 0xa) != 0x2 )
1107 goto gpf; /* not a writable data segment */
1108 break;
1109 default:
1110 break;
1113 last_byte = offset + bytes - 1;
1115 /* Is this a grows-down data segment? Special limit check if so. */
1116 if ( (reg->attr.fields.type & 0xc) == 0x4 )
1118 /* Is upper limit 0xFFFF or 0xFFFFFFFF? */
1119 if ( !reg->attr.fields.db )
1120 last_byte = (uint16_t)last_byte;
1122 /* Check first byte and last byte against respective bounds. */
1123 if ( (offset <= reg->limit) || (last_byte < offset) )
1124 goto gpf;
1126 else if ( (last_byte > reg->limit) || (last_byte < offset) )
1127 goto gpf; /* last byte is beyond limit or wraps 0xFFFFFFFF */
1129 /*
1130 * Hardware truncates to 32 bits in compatibility mode.
1131 * It does not truncate to 16 bits in 16-bit address-size mode.
1132 */
1133 addr = (uint32_t)(addr + reg->base);
1135 else
1137 /*
1138 * LONG MODE: FS and GS add segment base. Addresses must be canonical.
1139 */
1141 if ( (seg == x86_seg_fs) || (seg == x86_seg_gs) )
1142 addr += reg->base;
1144 if ( !is_canonical_address(addr) )
1145 goto gpf;
1148 *linear_addr = addr;
1149 return 1;
1151 gpf:
1152 return 0;
1155 static void *hvm_map_entry(unsigned long va)
1157 unsigned long gfn, mfn;
1158 p2m_type_t p2mt;
1159 uint32_t pfec;
1161 if ( ((va & ~PAGE_MASK) + 8) > PAGE_SIZE )
1163 gdprintk(XENLOG_ERR, "Descriptor table entry "
1164 "straddles page boundary\n");
1165 domain_crash(current->domain);
1166 return NULL;
1169 /* We're mapping on behalf of the segment-load logic, which might
1170 * write the accessed flags in the descriptors (in 32-bit mode), but
1171 * we still treat it as a kernel-mode read (i.e. no access checks). */
1172 pfec = PFEC_page_present;
1173 gfn = paging_gva_to_gfn(current, va, &pfec);
1174 mfn = mfn_x(gfn_to_mfn_current(gfn, &p2mt));
1175 if ( !p2m_is_ram(p2mt) )
1177 gdprintk(XENLOG_ERR, "Failed to look up descriptor table entry\n");
1178 domain_crash(current->domain);
1179 return NULL;
1182 ASSERT(mfn_valid(mfn));
1184 paging_mark_dirty(current->domain, mfn);
1186 return (char *)map_domain_page(mfn) + (va & ~PAGE_MASK);
1189 static void hvm_unmap_entry(void *p)
1191 if ( p )
1192 unmap_domain_page(p);
1195 static int hvm_load_segment_selector(
1196 enum x86_segment seg, uint16_t sel)
1198 struct segment_register desctab, cs, segr;
1199 struct desc_struct *pdesc, desc;
1200 u8 dpl, rpl, cpl;
1201 int fault_type = TRAP_invalid_tss;
1202 struct cpu_user_regs *regs = guest_cpu_user_regs();
1203 struct vcpu *v = current;
1205 if ( regs->eflags & EF_VM )
1207 segr.sel = sel;
1208 segr.base = (uint32_t)sel << 4;
1209 segr.limit = 0xffffu;
1210 segr.attr.bytes = 0xf3;
1211 hvm_set_segment_register(v, seg, &segr);
1212 return 0;
1215 /* NULL selector? */
1216 if ( (sel & 0xfffc) == 0 )
1218 if ( (seg == x86_seg_cs) || (seg == x86_seg_ss) )
1219 goto fail;
1220 memset(&segr, 0, sizeof(segr));
1221 hvm_set_segment_register(v, seg, &segr);
1222 return 0;
1225 /* LDT descriptor must be in the GDT. */
1226 if ( (seg == x86_seg_ldtr) && (sel & 4) )
1227 goto fail;
1229 hvm_get_segment_register(v, x86_seg_cs, &cs);
1230 hvm_get_segment_register(
1231 v, (sel & 4) ? x86_seg_ldtr : x86_seg_gdtr, &desctab);
1233 /* Check against descriptor table limit. */
1234 if ( ((sel & 0xfff8) + 7) > desctab.limit )
1235 goto fail;
1237 pdesc = hvm_map_entry(desctab.base + (sel & 0xfff8));
1238 if ( pdesc == NULL )
1239 goto hvm_map_fail;
1241 do {
1242 desc = *pdesc;
1244 /* Segment present in memory? */
1245 if ( !(desc.b & (1u<<15)) )
1247 fault_type = TRAP_no_segment;
1248 goto unmap_and_fail;
1251 /* LDT descriptor is a system segment. All others are code/data. */
1252 if ( (desc.b & (1u<<12)) == ((seg == x86_seg_ldtr) << 12) )
1253 goto unmap_and_fail;
1255 dpl = (desc.b >> 13) & 3;
1256 rpl = sel & 3;
1257 cpl = cs.sel & 3;
1259 switch ( seg )
1261 case x86_seg_cs:
1262 /* Code segment? */
1263 if ( !(desc.b & (1u<<11)) )
1264 goto unmap_and_fail;
1265 /* Non-conforming segment: check DPL against RPL. */
1266 if ( ((desc.b & (6u<<9)) != 6) && (dpl != rpl) )
1267 goto unmap_and_fail;
1268 break;
1269 case x86_seg_ss:
1270 /* Writable data segment? */
1271 if ( (desc.b & (5u<<9)) != (1u<<9) )
1272 goto unmap_and_fail;
1273 if ( (dpl != cpl) || (dpl != rpl) )
1274 goto unmap_and_fail;
1275 break;
1276 case x86_seg_ldtr:
1277 /* LDT system segment? */
1278 if ( (desc.b & (15u<<8)) != (2u<<8) )
1279 goto unmap_and_fail;
1280 goto skip_accessed_flag;
1281 default:
1282 /* Readable code or data segment? */
1283 if ( (desc.b & (5u<<9)) == (4u<<9) )
1284 goto unmap_and_fail;
1285 /* Non-conforming segment: check DPL against RPL and CPL. */
1286 if ( ((desc.b & (6u<<9)) != 6) && ((dpl < cpl) || (dpl < rpl)) )
1287 goto unmap_and_fail;
1288 break;
1290 } while ( !(desc.b & 0x100) && /* Ensure Accessed flag is set */
1291 (cmpxchg(&pdesc->b, desc.b, desc.b | 0x100) != desc.b) );
1293 /* Force the Accessed flag in our local copy. */
1294 desc.b |= 0x100;
1296 skip_accessed_flag:
1297 hvm_unmap_entry(pdesc);
1299 segr.base = (((desc.b << 0) & 0xff000000u) |
1300 ((desc.b << 16) & 0x00ff0000u) |
1301 ((desc.a >> 16) & 0x0000ffffu));
1302 segr.attr.bytes = (((desc.b >> 8) & 0x00ffu) |
1303 ((desc.b >> 12) & 0x0f00u));
1304 segr.limit = (desc.b & 0x000f0000u) | (desc.a & 0x0000ffffu);
1305 if ( segr.attr.fields.g )
1306 segr.limit = (segr.limit << 12) | 0xfffu;
1307 segr.sel = sel;
1308 hvm_set_segment_register(v, seg, &segr);
1310 return 0;
1312 unmap_and_fail:
1313 hvm_unmap_entry(pdesc);
1314 fail:
1315 hvm_inject_exception(fault_type, sel & 0xfffc, 0);
1316 hvm_map_fail:
1317 return 1;
1320 void hvm_task_switch(
1321 uint16_t tss_sel, enum hvm_task_switch_reason taskswitch_reason,
1322 int32_t errcode)
1324 struct vcpu *v = current;
1325 struct cpu_user_regs *regs = guest_cpu_user_regs();
1326 struct segment_register gdt, tr, prev_tr, segr;
1327 struct desc_struct *optss_desc = NULL, *nptss_desc = NULL, tss_desc;
1328 unsigned long eflags;
1329 int exn_raised, rc;
1330 struct {
1331 u16 back_link,__blh;
1332 u32 esp0;
1333 u16 ss0, _0;
1334 u32 esp1;
1335 u16 ss1, _1;
1336 u32 esp2;
1337 u16 ss2, _2;
1338 u32 cr3, eip, eflags, eax, ecx, edx, ebx, esp, ebp, esi, edi;
1339 u16 es, _3, cs, _4, ss, _5, ds, _6, fs, _7, gs, _8, ldt, _9;
1340 u16 trace, iomap;
1341 } tss = { 0 };
1343 hvm_get_segment_register(v, x86_seg_gdtr, &gdt);
1344 hvm_get_segment_register(v, x86_seg_tr, &prev_tr);
1346 if ( ((tss_sel & 0xfff8) + 7) > gdt.limit )
1348 hvm_inject_exception((taskswitch_reason == TSW_iret) ?
1349 TRAP_invalid_tss : TRAP_gp_fault,
1350 tss_sel & 0xfff8, 0);
1351 goto out;
1354 optss_desc = hvm_map_entry(gdt.base + (prev_tr.sel & 0xfff8));
1355 if ( optss_desc == NULL )
1356 goto out;
1358 nptss_desc = hvm_map_entry(gdt.base + (tss_sel & 0xfff8));
1359 if ( nptss_desc == NULL )
1360 goto out;
1362 tss_desc = *nptss_desc;
1363 tr.sel = tss_sel;
1364 tr.base = (((tss_desc.b << 0) & 0xff000000u) |
1365 ((tss_desc.b << 16) & 0x00ff0000u) |
1366 ((tss_desc.a >> 16) & 0x0000ffffu));
1367 tr.attr.bytes = (((tss_desc.b >> 8) & 0x00ffu) |
1368 ((tss_desc.b >> 12) & 0x0f00u));
1369 tr.limit = (tss_desc.b & 0x000f0000u) | (tss_desc.a & 0x0000ffffu);
1370 if ( tr.attr.fields.g )
1371 tr.limit = (tr.limit << 12) | 0xfffu;
1373 if ( !tr.attr.fields.p )
1375 hvm_inject_exception(TRAP_no_segment, tss_sel & 0xfff8, 0);
1376 goto out;
1379 if ( tr.attr.fields.type != ((taskswitch_reason == TSW_iret) ? 0xb : 0x9) )
1381 hvm_inject_exception(
1382 (taskswitch_reason == TSW_iret) ? TRAP_invalid_tss : TRAP_gp_fault,
1383 tss_sel & 0xfff8, 0);
1384 goto out;
1387 if ( tr.limit < (sizeof(tss)-1) )
1389 hvm_inject_exception(TRAP_invalid_tss, tss_sel & 0xfff8, 0);
1390 goto out;
1393 rc = hvm_copy_from_guest_virt(
1394 &tss, prev_tr.base, sizeof(tss), PFEC_page_present);
1395 if ( rc == HVMCOPY_bad_gva_to_gfn )
1396 goto out;
1398 eflags = regs->eflags;
1399 if ( taskswitch_reason == TSW_iret )
1400 eflags &= ~X86_EFLAGS_NT;
1402 tss.cr3 = v->arch.hvm_vcpu.guest_cr[3];
1403 tss.eip = regs->eip;
1404 tss.eflags = eflags;
1405 tss.eax = regs->eax;
1406 tss.ecx = regs->ecx;
1407 tss.edx = regs->edx;
1408 tss.ebx = regs->ebx;
1409 tss.esp = regs->esp;
1410 tss.ebp = regs->ebp;
1411 tss.esi = regs->esi;
1412 tss.edi = regs->edi;
1414 hvm_get_segment_register(v, x86_seg_es, &segr);
1415 tss.es = segr.sel;
1416 hvm_get_segment_register(v, x86_seg_cs, &segr);
1417 tss.cs = segr.sel;
1418 hvm_get_segment_register(v, x86_seg_ss, &segr);
1419 tss.ss = segr.sel;
1420 hvm_get_segment_register(v, x86_seg_ds, &segr);
1421 tss.ds = segr.sel;
1422 hvm_get_segment_register(v, x86_seg_fs, &segr);
1423 tss.fs = segr.sel;
1424 hvm_get_segment_register(v, x86_seg_gs, &segr);
1425 tss.gs = segr.sel;
1426 hvm_get_segment_register(v, x86_seg_ldtr, &segr);
1427 tss.ldt = segr.sel;
1429 rc = hvm_copy_to_guest_virt(
1430 prev_tr.base, &tss, sizeof(tss), PFEC_page_present);
1431 if ( rc == HVMCOPY_bad_gva_to_gfn )
1432 goto out;
1434 rc = hvm_copy_from_guest_virt(
1435 &tss, tr.base, sizeof(tss), PFEC_page_present);
1436 if ( rc == HVMCOPY_bad_gva_to_gfn )
1437 goto out;
1439 if ( hvm_set_cr3(tss.cr3) )
1440 goto out;
1442 regs->eip = tss.eip;
1443 regs->eflags = tss.eflags | 2;
1444 regs->eax = tss.eax;
1445 regs->ecx = tss.ecx;
1446 regs->edx = tss.edx;
1447 regs->ebx = tss.ebx;
1448 regs->esp = tss.esp;
1449 regs->ebp = tss.ebp;
1450 regs->esi = tss.esi;
1451 regs->edi = tss.edi;
1453 if ( (taskswitch_reason == TSW_call_or_int) )
1455 regs->eflags |= X86_EFLAGS_NT;
1456 tss.back_link = prev_tr.sel;
1459 exn_raised = 0;
1460 if ( hvm_load_segment_selector(x86_seg_ldtr, tss.ldt) ||
1461 hvm_load_segment_selector(x86_seg_es, tss.es) ||
1462 hvm_load_segment_selector(x86_seg_cs, tss.cs) ||
1463 hvm_load_segment_selector(x86_seg_ss, tss.ss) ||
1464 hvm_load_segment_selector(x86_seg_ds, tss.ds) ||
1465 hvm_load_segment_selector(x86_seg_fs, tss.fs) ||
1466 hvm_load_segment_selector(x86_seg_gs, tss.gs) )
1467 exn_raised = 1;
1469 rc = hvm_copy_to_guest_virt(
1470 tr.base, &tss, sizeof(tss), PFEC_page_present);
1471 if ( rc == HVMCOPY_bad_gva_to_gfn )
1472 exn_raised = 1;
1474 if ( (tss.trace & 1) && !exn_raised )
1475 hvm_inject_exception(TRAP_debug, tss_sel & 0xfff8, 0);
1477 tr.attr.fields.type = 0xb; /* busy 32-bit tss */
1478 hvm_set_segment_register(v, x86_seg_tr, &tr);
1480 v->arch.hvm_vcpu.guest_cr[0] |= X86_CR0_TS;
1481 hvm_update_guest_cr(v, 0);
1483 if ( (taskswitch_reason == TSW_iret) ||
1484 (taskswitch_reason == TSW_jmp) )
1485 clear_bit(41, optss_desc); /* clear B flag of old task */
1487 if ( taskswitch_reason != TSW_iret )
1488 set_bit(41, nptss_desc); /* set B flag of new task */
1490 if ( errcode >= 0 )
1492 struct segment_register reg;
1493 unsigned long linear_addr;
1494 regs->esp -= 4;
1495 hvm_get_segment_register(current, x86_seg_ss, &reg);
1496 /* Todo: do not ignore access faults here. */
1497 if ( hvm_virtual_to_linear_addr(x86_seg_ss, &reg, regs->esp,
1498 4, hvm_access_write, 32,
1499 &linear_addr) )
1500 hvm_copy_to_guest_virt_nofault(linear_addr, &errcode, 4, 0);
1503 out:
1504 hvm_unmap_entry(optss_desc);
1505 hvm_unmap_entry(nptss_desc);
1508 #define HVMCOPY_from_guest (0u<<0)
1509 #define HVMCOPY_to_guest (1u<<0)
1510 #define HVMCOPY_no_fault (0u<<1)
1511 #define HVMCOPY_fault (1u<<1)
1512 #define HVMCOPY_phys (0u<<2)
1513 #define HVMCOPY_virt (1u<<2)
1514 static enum hvm_copy_result __hvm_copy(
1515 void *buf, paddr_t addr, int size, unsigned int flags, uint32_t pfec)
1517 struct vcpu *curr = current;
1518 unsigned long gfn, mfn;
1519 p2m_type_t p2mt;
1520 char *p;
1521 int count, todo = size;
1523 while ( todo > 0 )
1525 count = min_t(int, PAGE_SIZE - (addr & ~PAGE_MASK), todo);
1527 if ( flags & HVMCOPY_virt )
1529 gfn = paging_gva_to_gfn(curr, addr, &pfec);
1530 if ( gfn == INVALID_GFN )
1532 if ( flags & HVMCOPY_fault )
1533 hvm_inject_exception(TRAP_page_fault, pfec, addr);
1534 return HVMCOPY_bad_gva_to_gfn;
1537 else
1539 gfn = addr >> PAGE_SHIFT;
1542 mfn = mfn_x(gfn_to_mfn_current(gfn, &p2mt));
1544 if ( !p2m_is_ram(p2mt) )
1545 return HVMCOPY_bad_gfn_to_mfn;
1546 ASSERT(mfn_valid(mfn));
1548 p = (char *)map_domain_page(mfn) + (addr & ~PAGE_MASK);
1550 if ( flags & HVMCOPY_to_guest )
1552 if ( p2mt == p2m_ram_ro )
1554 static unsigned long lastpage;
1555 if ( xchg(&lastpage, gfn) != gfn )
1556 gdprintk(XENLOG_DEBUG, "guest attempted write to read-only"
1557 " memory page. gfn=%#lx, mfn=%#lx\n",
1558 gfn, mfn);
1560 else
1562 memcpy(p, buf, count);
1563 paging_mark_dirty(curr->domain, mfn);
1566 else
1568 memcpy(buf, p, count);
1571 unmap_domain_page(p);
1573 addr += count;
1574 buf += count;
1575 todo -= count;
1578 return HVMCOPY_okay;
1581 enum hvm_copy_result hvm_copy_to_guest_phys(
1582 paddr_t paddr, void *buf, int size)
1584 return __hvm_copy(buf, paddr, size,
1585 HVMCOPY_to_guest | HVMCOPY_fault | HVMCOPY_phys,
1586 0);
1589 enum hvm_copy_result hvm_copy_from_guest_phys(
1590 void *buf, paddr_t paddr, int size)
1592 return __hvm_copy(buf, paddr, size,
1593 HVMCOPY_from_guest | HVMCOPY_fault | HVMCOPY_phys,
1594 0);
1597 enum hvm_copy_result hvm_copy_to_guest_virt(
1598 unsigned long vaddr, void *buf, int size, uint32_t pfec)
1600 return __hvm_copy(buf, vaddr, size,
1601 HVMCOPY_to_guest | HVMCOPY_fault | HVMCOPY_virt,
1602 PFEC_page_present | PFEC_write_access | pfec);
1605 enum hvm_copy_result hvm_copy_from_guest_virt(
1606 void *buf, unsigned long vaddr, int size, uint32_t pfec)
1608 return __hvm_copy(buf, vaddr, size,
1609 HVMCOPY_from_guest | HVMCOPY_fault | HVMCOPY_virt,
1610 PFEC_page_present | pfec);
1613 enum hvm_copy_result hvm_fetch_from_guest_virt(
1614 void *buf, unsigned long vaddr, int size, uint32_t pfec)
1616 if ( hvm_nx_enabled(current) )
1617 pfec |= PFEC_insn_fetch;
1618 return __hvm_copy(buf, vaddr, size,
1619 HVMCOPY_from_guest | HVMCOPY_fault | HVMCOPY_virt,
1620 PFEC_page_present | pfec);
1623 enum hvm_copy_result hvm_copy_to_guest_virt_nofault(
1624 unsigned long vaddr, void *buf, int size, uint32_t pfec)
1626 return __hvm_copy(buf, vaddr, size,
1627 HVMCOPY_to_guest | HVMCOPY_no_fault | HVMCOPY_virt,
1628 PFEC_page_present | PFEC_write_access | pfec);
1631 enum hvm_copy_result hvm_copy_from_guest_virt_nofault(
1632 void *buf, unsigned long vaddr, int size, uint32_t pfec)
1634 return __hvm_copy(buf, vaddr, size,
1635 HVMCOPY_from_guest | HVMCOPY_no_fault | HVMCOPY_virt,
1636 PFEC_page_present | pfec);
1639 enum hvm_copy_result hvm_fetch_from_guest_virt_nofault(
1640 void *buf, unsigned long vaddr, int size, uint32_t pfec)
1642 if ( hvm_nx_enabled(current) )
1643 pfec |= PFEC_insn_fetch;
1644 return __hvm_copy(buf, vaddr, size,
1645 HVMCOPY_from_guest | HVMCOPY_no_fault | HVMCOPY_virt,
1646 PFEC_page_present | pfec);
1649 #ifdef __x86_64__
1650 DEFINE_PER_CPU(bool_t, hvm_64bit_hcall);
1651 #endif
1653 unsigned long copy_to_user_hvm(void *to, const void *from, unsigned int len)
1655 int rc;
1657 #ifdef __x86_64__
1658 if ( !this_cpu(hvm_64bit_hcall) && is_compat_arg_xlat_range(to, len) )
1660 memcpy(to, from, len);
1661 return 0;
1663 #endif
1665 rc = hvm_copy_to_guest_virt_nofault((unsigned long)to, (void *)from,
1666 len, 0);
1667 return rc ? len : 0; /* fake a copy_to_user() return code */
1670 unsigned long copy_from_user_hvm(void *to, const void *from, unsigned len)
1672 int rc;
1674 #ifdef __x86_64__
1675 if ( !this_cpu(hvm_64bit_hcall) && is_compat_arg_xlat_range(from, len) )
1677 memcpy(to, from, len);
1678 return 0;
1680 #endif
1682 rc = hvm_copy_from_guest_virt_nofault(to, (unsigned long)from, len, 0);
1683 return rc ? len : 0; /* fake a copy_from_user() return code */
1686 #define bitmaskof(idx) (1U << ((idx) & 31))
1687 void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
1688 unsigned int *ecx, unsigned int *edx)
1690 struct vcpu *v = current;
1692 if ( cpuid_viridian_leaves(input, eax, ebx, ecx, edx) )
1693 return;
1695 if ( cpuid_hypervisor_leaves(input, eax, ebx, ecx, edx) )
1696 return;
1698 domain_cpuid(v->domain, input, *ecx, eax, ebx, ecx, edx);
1700 switch ( input )
1702 case 0x1:
1703 /* Fix up VLAPIC details. */
1704 *ebx &= 0x00FFFFFFu;
1705 *ebx |= (v->vcpu_id * 2) << 24;
1706 if ( vlapic_hw_disabled(vcpu_vlapic(v)) )
1707 __clear_bit(X86_FEATURE_APIC & 31, edx);
1708 break;
1709 case 0xb:
1710 /* Fix the x2APIC identifier. */
1711 *edx = v->vcpu_id * 2;
1712 break;
1716 void hvm_rdtsc_intercept(struct cpu_user_regs *regs)
1718 uint64_t tsc;
1719 struct vcpu *v = current;
1721 tsc = hvm_get_guest_tsc(v);
1722 regs->eax = (uint32_t)tsc;
1723 regs->edx = (uint32_t)(tsc >> 32);
1726 int hvm_msr_read_intercept(struct cpu_user_regs *regs)
1728 uint32_t ecx = regs->ecx;
1729 uint64_t msr_content = 0;
1730 struct vcpu *v = current;
1731 uint64_t *var_range_base, *fixed_range_base;
1732 int index, mtrr;
1733 uint32_t cpuid[4];
1735 var_range_base = (uint64_t *)v->arch.hvm_vcpu.mtrr.var_ranges;
1736 fixed_range_base = (uint64_t *)v->arch.hvm_vcpu.mtrr.fixed_ranges;
1738 hvm_cpuid(1, &cpuid[0], &cpuid[1], &cpuid[2], &cpuid[3]);
1739 mtrr = !!(cpuid[3] & bitmaskof(X86_FEATURE_MTRR));
1741 switch ( ecx )
1743 case MSR_IA32_TSC:
1744 msr_content = hvm_get_guest_tsc(v);
1745 break;
1747 case MSR_IA32_APICBASE:
1748 msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
1749 break;
1751 case MSR_IA32_MCG_CAP:
1752 case MSR_IA32_MCG_STATUS:
1753 case MSR_IA32_MC0_STATUS:
1754 case MSR_IA32_MC1_STATUS:
1755 case MSR_IA32_MC2_STATUS:
1756 case MSR_IA32_MC3_STATUS:
1757 case MSR_IA32_MC4_STATUS:
1758 case MSR_IA32_MC5_STATUS:
1759 /* No point in letting the guest see real MCEs */
1760 msr_content = 0;
1761 break;
1763 case MSR_IA32_CR_PAT:
1764 msr_content = v->arch.hvm_vcpu.pat_cr;
1765 break;
1767 case MSR_MTRRcap:
1768 if ( !mtrr )
1769 goto gp_fault;
1770 msr_content = v->arch.hvm_vcpu.mtrr.mtrr_cap;
1771 break;
1772 case MSR_MTRRdefType:
1773 if ( !mtrr )
1774 goto gp_fault;
1775 msr_content = v->arch.hvm_vcpu.mtrr.def_type
1776 | (v->arch.hvm_vcpu.mtrr.enabled << 10);
1777 break;
1778 case MSR_MTRRfix64K_00000:
1779 if ( !mtrr )
1780 goto gp_fault;
1781 msr_content = fixed_range_base[0];
1782 break;
1783 case MSR_MTRRfix16K_80000:
1784 case MSR_MTRRfix16K_A0000:
1785 if ( !mtrr )
1786 goto gp_fault;
1787 index = regs->ecx - MSR_MTRRfix16K_80000;
1788 msr_content = fixed_range_base[index + 1];
1789 break;
1790 case MSR_MTRRfix4K_C0000...MSR_MTRRfix4K_F8000:
1791 if ( !mtrr )
1792 goto gp_fault;
1793 index = regs->ecx - MSR_MTRRfix4K_C0000;
1794 msr_content = fixed_range_base[index + 3];
1795 break;
1796 case MSR_IA32_MTRR_PHYSBASE0...MSR_IA32_MTRR_PHYSMASK7:
1797 if ( !mtrr )
1798 goto gp_fault;
1799 index = regs->ecx - MSR_IA32_MTRR_PHYSBASE0;
1800 msr_content = var_range_base[index];
1801 break;
1803 case MSR_K8_ENABLE_C1E:
1804 /* There's no point in letting the guest see C-States.
1805 * Further, this AMD-only register may be accessed if this HVM guest
1806 * has been migrated to an Intel host. This fixes a guest crash
1807 * in this case.
1808 */
1809 msr_content = 0;
1810 break;
1812 default:
1813 return hvm_funcs.msr_read_intercept(regs);
1816 regs->eax = (uint32_t)msr_content;
1817 regs->edx = (uint32_t)(msr_content >> 32);
1818 return X86EMUL_OKAY;
1820 gp_fault:
1821 hvm_inject_exception(TRAP_gp_fault, 0, 0);
1822 return X86EMUL_EXCEPTION;
1825 int hvm_msr_write_intercept(struct cpu_user_regs *regs)
1827 extern bool_t mtrr_var_range_msr_set(
1828 struct mtrr_state *v, u32 msr, u64 msr_content);
1829 extern bool_t mtrr_fix_range_msr_set(
1830 struct mtrr_state *v, int row, u64 msr_content);
1831 extern bool_t mtrr_def_type_msr_set(struct mtrr_state *v, u64 msr_content);
1832 extern bool_t pat_msr_set(u64 *pat, u64 msr);
1834 uint32_t ecx = regs->ecx;
1835 uint64_t msr_content = (uint32_t)regs->eax | ((uint64_t)regs->edx << 32);
1836 struct vcpu *v = current;
1837 int index, mtrr;
1838 uint32_t cpuid[4];
1840 hvm_cpuid(1, &cpuid[0], &cpuid[1], &cpuid[2], &cpuid[3]);
1841 mtrr = !!(cpuid[3] & bitmaskof(X86_FEATURE_MTRR));
1843 switch ( ecx )
1845 case MSR_IA32_TSC:
1846 hvm_set_guest_tsc(v, msr_content);
1847 pt_reset(v);
1848 break;
1850 case MSR_IA32_APICBASE:
1851 vlapic_msr_set(vcpu_vlapic(v), msr_content);
1852 break;
1854 case MSR_IA32_CR_PAT:
1855 if ( !pat_msr_set(&v->arch.hvm_vcpu.pat_cr, msr_content) )
1856 goto gp_fault;
1857 break;
1859 case MSR_MTRRcap:
1860 if ( !mtrr )
1861 goto gp_fault;
1862 goto gp_fault;
1863 case MSR_MTRRdefType:
1864 if ( !mtrr )
1865 goto gp_fault;
1866 if ( !mtrr_def_type_msr_set(&v->arch.hvm_vcpu.mtrr, msr_content) )
1867 goto gp_fault;
1868 break;
1869 case MSR_MTRRfix64K_00000:
1870 if ( !mtrr )
1871 goto gp_fault;
1872 if ( !mtrr_fix_range_msr_set(&v->arch.hvm_vcpu.mtrr, 0, msr_content) )
1873 goto gp_fault;
1874 break;
1875 case MSR_MTRRfix16K_80000:
1876 case MSR_MTRRfix16K_A0000:
1877 if ( !mtrr )
1878 goto gp_fault;
1879 index = regs->ecx - MSR_MTRRfix16K_80000 + 1;
1880 if ( !mtrr_fix_range_msr_set(&v->arch.hvm_vcpu.mtrr,
1881 index, msr_content) )
1882 goto gp_fault;
1883 break;
1884 case MSR_MTRRfix4K_C0000...MSR_MTRRfix4K_F8000:
1885 if ( !mtrr )
1886 goto gp_fault;
1887 index = regs->ecx - MSR_MTRRfix4K_C0000 + 3;
1888 if ( !mtrr_fix_range_msr_set(&v->arch.hvm_vcpu.mtrr,
1889 index, msr_content) )
1890 goto gp_fault;
1891 break;
1892 case MSR_IA32_MTRR_PHYSBASE0...MSR_IA32_MTRR_PHYSMASK7:
1893 if ( !mtrr )
1894 goto gp_fault;
1895 if ( !mtrr_var_range_msr_set(&v->arch.hvm_vcpu.mtrr,
1896 regs->ecx, msr_content) )
1897 goto gp_fault;
1898 break;
1900 default:
1901 return hvm_funcs.msr_write_intercept(regs);
1904 return X86EMUL_OKAY;
1906 gp_fault:
1907 hvm_inject_exception(TRAP_gp_fault, 0, 0);
1908 return X86EMUL_EXCEPTION;
1911 enum hvm_intblk hvm_interrupt_blocked(struct vcpu *v, struct hvm_intack intack)
1913 unsigned long intr_shadow;
1915 ASSERT(v == current);
1917 if ( (intack.source != hvm_intsrc_nmi) &&
1918 !(guest_cpu_user_regs()->eflags & X86_EFLAGS_IF) )
1919 return hvm_intblk_rflags_ie;
1921 intr_shadow = hvm_funcs.get_interrupt_shadow(v);
1923 if ( intr_shadow & (HVM_INTR_SHADOW_STI|HVM_INTR_SHADOW_MOV_SS) )
1924 return hvm_intblk_shadow;
1926 if ( intack.source == hvm_intsrc_nmi )
1927 return ((intr_shadow & HVM_INTR_SHADOW_NMI) ?
1928 hvm_intblk_nmi_iret : hvm_intblk_none);
1930 if ( intack.source == hvm_intsrc_lapic )
1932 uint32_t tpr = vlapic_get_reg(vcpu_vlapic(v), APIC_TASKPRI) & 0xF0;
1933 if ( (tpr >> 4) >= (intack.vector >> 4) )
1934 return hvm_intblk_tpr;
1937 return hvm_intblk_none;
1940 static long hvm_grant_table_op(
1941 unsigned int cmd, XEN_GUEST_HANDLE(void) uop, unsigned int count)
1943 if ( (cmd != GNTTABOP_query_size) && (cmd != GNTTABOP_setup_table) )
1944 return -ENOSYS; /* all other commands need auditing */
1945 return do_grant_table_op(cmd, uop, count);
1948 static long hvm_memory_op(int cmd, XEN_GUEST_HANDLE(void) arg)
1950 long rc = do_memory_op(cmd, arg);
1951 if ( (cmd & MEMOP_CMD_MASK) == XENMEM_decrease_reservation )
1952 current->domain->arch.hvm_domain.qemu_mapcache_invalidate = 1;
1953 return rc;
1956 static long hvm_vcpu_op(
1957 int cmd, int vcpuid, XEN_GUEST_HANDLE(void) arg)
1959 long rc;
1961 switch ( cmd )
1963 case VCPUOP_register_runstate_memory_area:
1964 case VCPUOP_get_runstate_info:
1965 rc = do_vcpu_op(cmd, vcpuid, arg);
1966 break;
1967 default:
1968 rc = -ENOSYS;
1969 break;
1972 return rc;
1975 typedef unsigned long hvm_hypercall_t(
1976 unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
1978 #define HYPERCALL(x) \
1979 [ __HYPERVISOR_ ## x ] = (hvm_hypercall_t *) do_ ## x
1981 #if defined(__i386__)
1983 static hvm_hypercall_t *hvm_hypercall32_table[NR_hypercalls] = {
1984 [ __HYPERVISOR_memory_op ] = (hvm_hypercall_t *)hvm_memory_op,
1985 [ __HYPERVISOR_grant_table_op ] = (hvm_hypercall_t *)hvm_grant_table_op,
1986 [ __HYPERVISOR_vcpu_op ] = (hvm_hypercall_t *)hvm_vcpu_op,
1987 HYPERCALL(xen_version),
1988 HYPERCALL(event_channel_op),
1989 HYPERCALL(sched_op),
1990 HYPERCALL(hvm_op)
1991 };
1993 #else /* defined(__x86_64__) */
1995 static long hvm_memory_op_compat32(int cmd, XEN_GUEST_HANDLE(void) arg)
1997 long rc = compat_memory_op(cmd, arg);
1998 if ( (cmd & MEMOP_CMD_MASK) == XENMEM_decrease_reservation )
1999 current->domain->arch.hvm_domain.qemu_mapcache_invalidate = 1;
2000 return rc;
2003 static long hvm_vcpu_op_compat32(
2004 int cmd, int vcpuid, XEN_GUEST_HANDLE(void) arg)
2006 long rc;
2008 switch ( cmd )
2010 case VCPUOP_register_runstate_memory_area:
2011 case VCPUOP_get_runstate_info:
2012 rc = compat_vcpu_op(cmd, vcpuid, arg);
2013 break;
2014 default:
2015 rc = -ENOSYS;
2016 break;
2019 return rc;
2022 static hvm_hypercall_t *hvm_hypercall64_table[NR_hypercalls] = {
2023 [ __HYPERVISOR_memory_op ] = (hvm_hypercall_t *)hvm_memory_op,
2024 [ __HYPERVISOR_grant_table_op ] = (hvm_hypercall_t *)hvm_grant_table_op,
2025 [ __HYPERVISOR_vcpu_op ] = (hvm_hypercall_t *)hvm_vcpu_op,
2026 HYPERCALL(xen_version),
2027 HYPERCALL(event_channel_op),
2028 HYPERCALL(sched_op),
2029 HYPERCALL(hvm_op)
2030 };
2032 static hvm_hypercall_t *hvm_hypercall32_table[NR_hypercalls] = {
2033 [ __HYPERVISOR_memory_op ] = (hvm_hypercall_t *)hvm_memory_op_compat32,
2034 [ __HYPERVISOR_grant_table_op ] = (hvm_hypercall_t *)hvm_grant_table_op,
2035 [ __HYPERVISOR_vcpu_op ] = (hvm_hypercall_t *)hvm_vcpu_op_compat32,
2036 HYPERCALL(xen_version),
2037 HYPERCALL(event_channel_op),
2038 HYPERCALL(sched_op),
2039 HYPERCALL(hvm_op)
2040 };
2042 #endif /* defined(__x86_64__) */
2044 int hvm_do_hypercall(struct cpu_user_regs *regs)
2046 struct vcpu *curr = current;
2047 struct segment_register sreg;
2048 int mode = hvm_guest_x86_mode(curr);
2049 uint32_t eax = regs->eax;
2051 switch ( mode )
2053 #ifdef __x86_64__
2054 case 8:
2055 #endif
2056 case 4:
2057 case 2:
2058 hvm_get_segment_register(curr, x86_seg_ss, &sreg);
2059 if ( unlikely(sreg.attr.fields.dpl == 3) )
2061 default:
2062 regs->eax = -EPERM;
2063 return HVM_HCALL_completed;
2065 case 0:
2066 break;
2069 if ( (eax & 0x80000000) && is_viridian_domain(curr->domain) )
2070 return viridian_hypercall(regs);
2072 if ( (eax >= NR_hypercalls) || !hvm_hypercall32_table[eax] )
2074 regs->eax = -ENOSYS;
2075 return HVM_HCALL_completed;
2078 this_cpu(hc_preempted) = 0;
2080 #ifdef __x86_64__
2081 if ( mode == 8 )
2083 HVM_DBG_LOG(DBG_LEVEL_HCALL, "hcall%u(%lx, %lx, %lx, %lx, %lx)", eax,
2084 regs->rdi, regs->rsi, regs->rdx, regs->r10, regs->r8);
2086 this_cpu(hvm_64bit_hcall) = 1;
2087 regs->rax = hvm_hypercall64_table[eax](regs->rdi,
2088 regs->rsi,
2089 regs->rdx,
2090 regs->r10,
2091 regs->r8);
2092 this_cpu(hvm_64bit_hcall) = 0;
2094 else
2095 #endif
2097 HVM_DBG_LOG(DBG_LEVEL_HCALL, "hcall%u(%x, %x, %x, %x, %x)", eax,
2098 (uint32_t)regs->ebx, (uint32_t)regs->ecx,
2099 (uint32_t)regs->edx, (uint32_t)regs->esi,
2100 (uint32_t)regs->edi);
2102 regs->eax = hvm_hypercall32_table[eax]((uint32_t)regs->ebx,
2103 (uint32_t)regs->ecx,
2104 (uint32_t)regs->edx,
2105 (uint32_t)regs->esi,
2106 (uint32_t)regs->edi);
2109 HVM_DBG_LOG(DBG_LEVEL_HCALL, "hcall%u -> %lx",
2110 eax, (unsigned long)regs->eax);
2112 if ( this_cpu(hc_preempted) )
2113 return HVM_HCALL_preempted;
2115 if ( unlikely(curr->domain->arch.hvm_domain.qemu_mapcache_invalidate) &&
2116 test_and_clear_bool(curr->domain->arch.hvm_domain.
2117 qemu_mapcache_invalidate) )
2118 return HVM_HCALL_invalidate;
2120 return HVM_HCALL_completed;
2123 static void hvm_latch_shinfo_size(struct domain *d)
2125 /*
2126 * Called from operations which are among the very first executed by
2127 * PV drivers on initialisation or after save/restore. These are sensible
2128 * points at which to sample the execution mode of the guest and latch
2129 * 32- or 64-bit format for shared state.
2130 */
2131 if ( current->domain == d )
2132 d->arch.has_32bit_shinfo = (hvm_guest_x86_mode(current) != 8);
2135 /* Initialise a hypercall transfer page for a VMX domain using
2136 paravirtualised drivers. */
2137 void hvm_hypercall_page_initialise(struct domain *d,
2138 void *hypercall_page)
2140 hvm_latch_shinfo_size(d);
2141 hvm_funcs.init_hypercall_page(d, hypercall_page);
2144 static int hvmop_set_pci_intx_level(
2145 XEN_GUEST_HANDLE(xen_hvm_set_pci_intx_level_t) uop)
2147 struct xen_hvm_set_pci_intx_level op;
2148 struct domain *d;
2149 int rc;
2151 if ( copy_from_guest(&op, uop, 1) )
2152 return -EFAULT;
2154 if ( (op.domain > 0) || (op.bus > 0) || (op.device > 31) || (op.intx > 3) )
2155 return -EINVAL;
2157 d = rcu_lock_domain_by_id(op.domid);
2158 if ( d == NULL )
2159 return -ESRCH;
2161 rc = -EPERM;
2162 if ( !IS_PRIV_FOR(current->domain, d) )
2163 goto out;
2165 rc = -EINVAL;
2166 if ( !is_hvm_domain(d) )
2167 goto out;
2169 rc = xsm_hvm_set_pci_intx_level(d);
2170 if ( rc )
2171 goto out;
2173 rc = 0;
2174 switch ( op.level )
2176 case 0:
2177 hvm_pci_intx_deassert(d, op.device, op.intx);
2178 break;
2179 case 1:
2180 hvm_pci_intx_assert(d, op.device, op.intx);
2181 break;
2182 default:
2183 rc = -EINVAL;
2184 break;
2187 out:
2188 rcu_unlock_domain(d);
2189 return rc;
2192 void hvm_vcpu_reset_state(struct vcpu *v, uint16_t cs, uint16_t ip)
2194 struct domain *d = v->domain;
2195 struct vcpu_guest_context *ctxt;
2196 struct segment_register reg;
2198 BUG_ON(vcpu_runnable(v));
2200 domain_lock(d);
2202 if ( v->is_initialised )
2203 goto out;
2205 if ( !paging_mode_hap(d) )
2207 if ( v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG )
2208 put_page(pagetable_get_page(v->arch.guest_table));
2209 v->arch.guest_table = pagetable_null();
2212 ctxt = &v->arch.guest_context;
2213 memset(ctxt, 0, sizeof(*ctxt));
2214 ctxt->flags = VGCF_online;
2215 ctxt->user_regs.eflags = 2;
2216 ctxt->user_regs.edx = 0x00000f00;
2217 ctxt->user_regs.eip = ip;
2219 v->arch.hvm_vcpu.guest_cr[0] = X86_CR0_ET;
2220 hvm_update_guest_cr(v, 0);
2222 v->arch.hvm_vcpu.guest_cr[2] = 0;
2223 hvm_update_guest_cr(v, 2);
2225 v->arch.hvm_vcpu.guest_cr[3] = 0;
2226 hvm_update_guest_cr(v, 3);
2228 v->arch.hvm_vcpu.guest_cr[4] = 0;
2229 hvm_update_guest_cr(v, 4);
2231 v->arch.hvm_vcpu.guest_efer = 0;
2232 hvm_update_guest_efer(v);
2234 reg.sel = cs;
2235 reg.base = (uint32_t)reg.sel << 4;
2236 reg.limit = 0xffff;
2237 reg.attr.bytes = 0x09b;
2238 hvm_set_segment_register(v, x86_seg_cs, &reg);
2240 reg.sel = reg.base = 0;
2241 reg.limit = 0xffff;
2242 reg.attr.bytes = 0x093;
2243 hvm_set_segment_register(v, x86_seg_ds, &reg);
2244 hvm_set_segment_register(v, x86_seg_es, &reg);
2245 hvm_set_segment_register(v, x86_seg_fs, &reg);
2246 hvm_set_segment_register(v, x86_seg_gs, &reg);
2247 hvm_set_segment_register(v, x86_seg_ss, &reg);
2249 reg.attr.bytes = 0x82; /* LDT */
2250 hvm_set_segment_register(v, x86_seg_ldtr, &reg);
2252 reg.attr.bytes = 0x8b; /* 32-bit TSS (busy) */
2253 hvm_set_segment_register(v, x86_seg_tr, &reg);
2255 reg.attr.bytes = 0;
2256 hvm_set_segment_register(v, x86_seg_gdtr, &reg);
2257 hvm_set_segment_register(v, x86_seg_idtr, &reg);
2259 /* Sync AP's TSC with BSP's. */
2260 v->arch.hvm_vcpu.cache_tsc_offset =
2261 v->domain->vcpu[0]->arch.hvm_vcpu.cache_tsc_offset;
2262 hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset);
2264 paging_update_paging_modes(v);
2266 v->arch.flags |= TF_kernel_mode;
2267 v->is_initialised = 1;
2268 clear_bit(_VPF_down, &v->pause_flags);
2270 out:
2271 domain_unlock(d);
2274 static void hvm_s3_suspend(struct domain *d)
2276 struct vcpu *v;
2278 domain_pause(d);
2279 domain_lock(d);
2281 if ( d->is_dying || (d->vcpu[0] == NULL) ||
2282 test_and_set_bool(d->arch.hvm_domain.is_s3_suspended) )
2284 domain_unlock(d);
2285 domain_unpause(d);
2286 return;
2289 for_each_vcpu ( d, v )
2291 vlapic_reset(vcpu_vlapic(v));
2292 vcpu_reset(v);
2295 vpic_reset(d);
2296 vioapic_reset(d);
2297 pit_reset(d);
2298 rtc_reset(d);
2299 pmtimer_reset(d);
2300 hpet_reset(d);
2302 hvm_vcpu_reset_state(d->vcpu[0], 0xf000, 0xfff0);
2304 domain_unlock(d);
2307 static void hvm_s3_resume(struct domain *d)
2309 if ( test_and_clear_bool(d->arch.hvm_domain.is_s3_suspended) )
2310 domain_unpause(d);
2313 static int hvmop_set_isa_irq_level(
2314 XEN_GUEST_HANDLE(xen_hvm_set_isa_irq_level_t) uop)
2316 struct xen_hvm_set_isa_irq_level op;
2317 struct domain *d;
2318 int rc;
2320 if ( copy_from_guest(&op, uop, 1) )
2321 return -EFAULT;
2323 if ( op.isa_irq > 15 )
2324 return -EINVAL;
2326 d = rcu_lock_domain_by_id(op.domid);
2327 if ( d == NULL )
2328 return -ESRCH;
2330 rc = -EPERM;
2331 if ( !IS_PRIV_FOR(current->domain, d) )
2332 goto out;
2334 rc = -EINVAL;
2335 if ( !is_hvm_domain(d) )
2336 goto out;
2338 rc = xsm_hvm_set_isa_irq_level(d);
2339 if ( rc )
2340 goto out;
2342 rc = 0;
2343 switch ( op.level )
2345 case 0:
2346 hvm_isa_irq_deassert(d, op.isa_irq);
2347 break;
2348 case 1:
2349 hvm_isa_irq_assert(d, op.isa_irq);
2350 break;
2351 default:
2352 rc = -EINVAL;
2353 break;
2356 out:
2357 rcu_unlock_domain(d);
2358 return rc;
2361 static int hvmop_set_pci_link_route(
2362 XEN_GUEST_HANDLE(xen_hvm_set_pci_link_route_t) uop)
2364 struct xen_hvm_set_pci_link_route op;
2365 struct domain *d;
2366 int rc;
2368 if ( copy_from_guest(&op, uop, 1) )
2369 return -EFAULT;
2371 if ( (op.link > 3) || (op.isa_irq > 15) )
2372 return -EINVAL;
2374 d = rcu_lock_domain_by_id(op.domid);
2375 if ( d == NULL )
2376 return -ESRCH;
2378 rc = -EPERM;
2379 if ( !IS_PRIV_FOR(current->domain, d) )
2380 goto out;
2382 rc = -EINVAL;
2383 if ( !is_hvm_domain(d) )
2384 goto out;
2386 rc = xsm_hvm_set_pci_link_route(d);
2387 if ( rc )
2388 goto out;
2390 rc = 0;
2391 hvm_set_pci_link_route(d, op.link, op.isa_irq);
2393 out:
2394 rcu_unlock_domain(d);
2395 return rc;
2398 static int hvmop_flush_tlb_all(void)
2400 struct domain *d = current->domain;
2401 struct vcpu *v;
2403 if ( !is_hvm_domain(d) )
2404 return -EINVAL;
2406 /* Avoid deadlock if more than one vcpu tries this at the same time. */
2407 if ( !spin_trylock(&d->hypercall_deadlock_mutex) )
2408 return -EAGAIN;
2410 /* Pause all other vcpus. */
2411 for_each_vcpu ( d, v )
2412 if ( v != current )
2413 vcpu_pause_nosync(v);
2415 /* Now that all VCPUs are signalled to deschedule, we wait... */
2416 for_each_vcpu ( d, v )
2417 if ( v != current )
2418 while ( !vcpu_runnable(v) && v->is_running )
2419 cpu_relax();
2421 /* All other vcpus are paused, safe to unlock now. */
2422 spin_unlock(&d->hypercall_deadlock_mutex);
2424 /* Flush paging-mode soft state (e.g., va->gfn cache; PAE PDPE cache). */
2425 for_each_vcpu ( d, v )
2426 paging_update_cr3(v);
2428 /* Flush all dirty TLBs. */
2429 flush_tlb_mask(d->domain_dirty_cpumask);
2431 /* Done. */
2432 for_each_vcpu ( d, v )
2433 if ( v != current )
2434 vcpu_unpause(v);
2436 return 0;
2439 long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE(void) arg)
2442 struct domain *curr_d = current->domain;
2443 long rc = 0;
2445 switch ( op )
2447 case HVMOP_set_param:
2448 case HVMOP_get_param:
2450 struct xen_hvm_param a;
2451 struct hvm_ioreq_page *iorp;
2452 struct domain *d;
2453 struct vcpu *v;
2455 if ( copy_from_guest(&a, arg, 1) )
2456 return -EFAULT;
2458 if ( a.index >= HVM_NR_PARAMS )
2459 return -EINVAL;
2461 rc = rcu_lock_target_domain_by_id(a.domid, &d);
2462 if ( rc != 0 )
2463 return rc;
2465 rc = -EINVAL;
2466 if ( !is_hvm_domain(d) )
2467 goto param_fail;
2469 rc = xsm_hvm_param(d, op);
2470 if ( rc )
2471 goto param_fail;
2473 if ( op == HVMOP_set_param )
2475 rc = 0;
2477 switch ( a.index )
2479 case HVM_PARAM_IOREQ_PFN:
2480 iorp = &d->arch.hvm_domain.ioreq;
2481 if ( (rc = hvm_set_ioreq_page(d, iorp, a.value)) != 0 )
2482 break;
2483 spin_lock(&iorp->lock);
2484 if ( iorp->va != NULL )
2485 /* Initialise evtchn port info if VCPUs already created. */
2486 for_each_vcpu ( d, v )
2487 get_ioreq(v)->vp_eport = v->arch.hvm_vcpu.xen_port;
2488 spin_unlock(&iorp->lock);
2489 break;
2490 case HVM_PARAM_BUFIOREQ_PFN:
2491 iorp = &d->arch.hvm_domain.buf_ioreq;
2492 rc = hvm_set_ioreq_page(d, iorp, a.value);
2493 break;
2494 case HVM_PARAM_CALLBACK_IRQ:
2495 hvm_set_callback_via(d, a.value);
2496 hvm_latch_shinfo_size(d);
2497 break;
2498 case HVM_PARAM_TIMER_MODE:
2499 if ( a.value > HVMPTM_one_missed_tick_pending )
2500 rc = -EINVAL;
2501 break;
2502 case HVM_PARAM_VIRIDIAN:
2503 if ( a.value > 1 )
2504 rc = -EINVAL;
2505 break;
2506 case HVM_PARAM_IDENT_PT:
2507 /* Not reflexive, as we must domain_pause(). */
2508 rc = -EPERM;
2509 if ( curr_d == d )
2510 break;
2512 rc = -EINVAL;
2513 if ( d->arch.hvm_domain.params[a.index] != 0 )
2514 break;
2516 rc = 0;
2517 if ( !paging_mode_hap(d) )
2518 break;
2520 /*
2521 * Update GUEST_CR3 in each VMCS to point at identity map.
2522 * All foreign updates to guest state must synchronise on
2523 * the domctl_lock.
2524 */
2525 rc = -EAGAIN;
2526 if ( !domctl_lock_acquire() )
2527 break;
2529 rc = 0;
2530 domain_pause(d);
2531 d->arch.hvm_domain.params[a.index] = a.value;
2532 for_each_vcpu ( d, v )
2533 paging_update_cr3(v);
2534 domain_unpause(d);
2536 domctl_lock_release();
2537 break;
2538 case HVM_PARAM_DM_DOMAIN:
2539 /* Not reflexive, as we must domain_pause(). */
2540 rc = -EPERM;
2541 if ( curr_d == d )
2542 break;
2544 if ( a.value == DOMID_SELF )
2545 a.value = curr_d->domain_id;
2547 rc = 0;
2548 domain_pause(d); /* safe to change per-vcpu xen_port */
2549 iorp = &d->arch.hvm_domain.ioreq;
2550 for_each_vcpu ( d, v )
2552 int old_port, new_port;
2553 new_port = alloc_unbound_xen_event_channel(v, a.value);
2554 if ( new_port < 0 )
2556 rc = new_port;
2557 break;
2559 /* xchg() ensures that only we free_xen_event_channel() */
2560 old_port = xchg(&v->arch.hvm_vcpu.xen_port, new_port);
2561 free_xen_event_channel(v, old_port);
2562 spin_lock(&iorp->lock);
2563 if ( iorp->va != NULL )
2564 get_ioreq(v)->vp_eport = v->arch.hvm_vcpu.xen_port;
2565 spin_unlock(&iorp->lock);
2567 domain_unpause(d);
2568 break;
2569 case HVM_PARAM_ACPI_S_STATE:
2570 /* Not reflexive, as we must domain_pause(). */
2571 rc = -EPERM;
2572 if ( curr_d == d )
2573 break;
2575 rc = 0;
2576 if ( a.value == 3 )
2577 hvm_s3_suspend(d);
2578 else if ( a.value == 0 )
2579 hvm_s3_resume(d);
2580 else
2581 rc = -EINVAL;
2583 break;
2586 if ( rc == 0 )
2587 d->arch.hvm_domain.params[a.index] = a.value;
2589 else
2591 switch ( a.index )
2593 case HVM_PARAM_ACPI_S_STATE:
2594 a.value = d->arch.hvm_domain.is_s3_suspended ? 3 : 0;
2595 break;
2596 default:
2597 a.value = d->arch.hvm_domain.params[a.index];
2598 break;
2600 rc = copy_to_guest(arg, &a, 1) ? -EFAULT : 0;
2603 HVM_DBG_LOG(DBG_LEVEL_HCALL, "%s param %u = %"PRIx64,
2604 op == HVMOP_set_param ? "set" : "get",
2605 a.index, a.value);
2607 param_fail:
2608 rcu_unlock_domain(d);
2609 break;
2612 case HVMOP_set_pci_intx_level:
2613 rc = hvmop_set_pci_intx_level(
2614 guest_handle_cast(arg, xen_hvm_set_pci_intx_level_t));
2615 break;
2617 case HVMOP_set_isa_irq_level:
2618 rc = hvmop_set_isa_irq_level(
2619 guest_handle_cast(arg, xen_hvm_set_isa_irq_level_t));
2620 break;
2622 case HVMOP_set_pci_link_route:
2623 rc = hvmop_set_pci_link_route(
2624 guest_handle_cast(arg, xen_hvm_set_pci_link_route_t));
2625 break;
2627 case HVMOP_flush_tlbs:
2628 rc = guest_handle_is_null(arg) ? hvmop_flush_tlb_all() : -ENOSYS;
2629 break;
2631 case HVMOP_track_dirty_vram:
2633 struct xen_hvm_track_dirty_vram a;
2634 struct domain *d;
2636 if ( copy_from_guest(&a, arg, 1) )
2637 return -EFAULT;
2639 rc = rcu_lock_target_domain_by_id(a.domid, &d);
2640 if ( rc != 0 )
2641 return rc;
2643 rc = -EINVAL;
2644 if ( !is_hvm_domain(d) )
2645 goto param_fail2;
2647 rc = xsm_hvm_param(d, op);
2648 if ( rc )
2649 goto param_fail2;
2651 rc = -ESRCH;
2652 if ( d->is_dying )
2653 goto param_fail2;
2655 rc = -EINVAL;
2656 if ( !shadow_mode_enabled(d))
2657 goto param_fail2;
2658 if ( d->vcpu[0] == NULL )
2659 goto param_fail2;
2661 rc = shadow_track_dirty_vram(d, a.first_pfn, a.nr, a.dirty_bitmap);
2663 param_fail2:
2664 rcu_unlock_domain(d);
2665 break;
2668 case HVMOP_modified_memory:
2670 struct xen_hvm_modified_memory a;
2671 struct domain *d;
2672 unsigned long pfn;
2674 if ( copy_from_guest(&a, arg, 1) )
2675 return -EFAULT;
2677 rc = rcu_lock_target_domain_by_id(a.domid, &d);
2678 if ( rc != 0 )
2679 return rc;
2681 rc = -EINVAL;
2682 if ( !is_hvm_domain(d) )
2683 goto param_fail3;
2685 rc = xsm_hvm_param(d, op);
2686 if ( rc )
2687 goto param_fail3;
2689 rc = -EINVAL;
2690 if ( (a.first_pfn > domain_get_maximum_gpfn(d)) ||
2691 ((a.first_pfn + a.nr - 1) < a.first_pfn) ||
2692 ((a.first_pfn + a.nr - 1) > domain_get_maximum_gpfn(d)) )
2693 goto param_fail3;
2695 rc = 0;
2696 if ( !paging_mode_log_dirty(d) )
2697 goto param_fail3;
2699 for ( pfn = a.first_pfn; pfn < a.first_pfn + a.nr; pfn++ )
2701 p2m_type_t t;
2702 mfn_t mfn = gfn_to_mfn(d, pfn, &t);
2703 if ( mfn_x(mfn) != INVALID_MFN )
2705 paging_mark_dirty(d, mfn_x(mfn));
2706 /* These are most probably not page tables any more */
2707 /* don't take a long time and don't die either */
2708 sh_remove_shadows(d->vcpu[0], mfn, 1, 0);
2712 param_fail3:
2713 rcu_unlock_domain(d);
2714 break;
2717 case HVMOP_set_mem_type:
2719 struct xen_hvm_set_mem_type a;
2720 struct domain *d;
2721 unsigned long pfn;
2723 /* Interface types to internal p2m types */
2724 p2m_type_t memtype[] = {
2725 p2m_ram_rw, /* HVMMEM_ram_rw */
2726 p2m_ram_ro, /* HVMMEM_ram_ro */
2727 p2m_mmio_dm /* HVMMEM_mmio_dm */
2728 };
2730 if ( copy_from_guest(&a, arg, 1) )
2731 return -EFAULT;
2733 rc = rcu_lock_target_domain_by_id(a.domid, &d);
2734 if ( rc != 0 )
2735 return rc;
2737 rc = -EINVAL;
2738 if ( !is_hvm_domain(d) )
2739 goto param_fail4;
2741 rc = -EINVAL;
2742 if ( (a.first_pfn > domain_get_maximum_gpfn(d)) ||
2743 ((a.first_pfn + a.nr - 1) < a.first_pfn) ||
2744 ((a.first_pfn + a.nr - 1) > domain_get_maximum_gpfn(d)) )
2745 goto param_fail4;
2747 if ( a.hvmmem_type >= ARRAY_SIZE(memtype) )
2748 goto param_fail4;
2750 rc = 0;
2752 for ( pfn = a.first_pfn; pfn < a.first_pfn + a.nr; pfn++ )
2754 p2m_type_t t;
2755 mfn_t mfn;
2756 mfn = gfn_to_mfn(d, pfn, &t);
2757 p2m_change_type(d, pfn, t, memtype[a.hvmmem_type]);
2760 param_fail4:
2761 rcu_unlock_domain(d);
2762 break;
2765 default:
2767 gdprintk(XENLOG_WARNING, "Bad HVM op %ld.\n", op);
2768 rc = -ENOSYS;
2769 break;
2773 if ( rc == -EAGAIN )
2774 rc = hypercall_create_continuation(
2775 __HYPERVISOR_hvm_op, "lh", op, arg);
2777 return rc;
2780 int hvm_debug_op(struct vcpu *v, int32_t op)
2782 int rc;
2784 switch ( op )
2786 case XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_ON:
2787 case XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_OFF:
2788 rc = -ENOSYS;
2789 if ( !cpu_has_monitor_trap_flag )
2790 break;
2791 rc = 0;
2792 vcpu_pause(v);
2793 v->arch.hvm_vcpu.single_step =
2794 (op == XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_ON);
2795 vcpu_unpause(v); /* guest will latch new state */
2796 break;
2797 default:
2798 rc = -ENOSYS;
2799 break;
2802 return rc;
2806 /*
2807 * Local variables:
2808 * mode: C
2809 * c-set-style: "BSD"
2810 * c-basic-offset: 4
2811 * tab-width: 4
2812 * indent-tabs-mode: nil
2813 * End:
2814 */