ia64/xen-unstable

view xen/arch/x86/hvm/hvm.c @ 19765:f2a3b7188906

x86, hvm: set vcpu->is_initialised after restore/migration

After restore/migration, the xenctx command for auxiliary vcpus
fails with a message "xc_vcpu_getcontext: No data available".

Signed-off-by: Kouya Shimura <kouya@jp.fujitsu.com>
author Keir Fraser <keir.fraser@citrix.com>
date Tue Jun 16 13:39:00 2009 +0100 (2009-06-16)
parents 775afcdc2759
children cb6f8a34b59a
line source
1 /*
2 * hvm.c: Common hardware virtual machine abstractions.
3 *
4 * Copyright (c) 2004, Intel Corporation.
5 * Copyright (c) 2005, International Business Machines Corporation.
6 * Copyright (c) 2008, Citrix Systems, Inc.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License along with
18 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
19 * Place - Suite 330, Boston, MA 02111-1307 USA.
20 */
22 #include <xen/config.h>
23 #include <xen/ctype.h>
24 #include <xen/init.h>
25 #include <xen/lib.h>
26 #include <xen/trace.h>
27 #include <xen/sched.h>
28 #include <xen/irq.h>
29 #include <xen/softirq.h>
30 #include <xen/domain.h>
31 #include <xen/domain_page.h>
32 #include <xen/hypercall.h>
33 #include <xen/guest_access.h>
34 #include <xen/event.h>
35 #include <xen/paging.h>
36 #include <asm/shadow.h>
37 #include <asm/hap.h>
38 #include <asm/current.h>
39 #include <asm/e820.h>
40 #include <asm/io.h>
41 #include <asm/regs.h>
42 #include <asm/cpufeature.h>
43 #include <asm/processor.h>
44 #include <asm/types.h>
45 #include <asm/msr.h>
46 #include <asm/mc146818rtc.h>
47 #include <asm/spinlock.h>
48 #include <asm/hvm/hvm.h>
49 #include <asm/hvm/vpt.h>
50 #include <asm/hvm/support.h>
51 #include <asm/hvm/cacheattr.h>
52 #include <asm/hvm/trace.h>
53 #include <public/sched.h>
54 #include <public/hvm/ioreq.h>
55 #include <public/version.h>
56 #include <public/memory.h>
58 int hvm_enabled __read_mostly;
60 unsigned int opt_hvm_debug_level __read_mostly;
61 integer_param("hvm_debug", opt_hvm_debug_level);
63 int opt_softtsc;
64 boolean_param("softtsc", opt_softtsc);
66 struct hvm_function_table hvm_funcs __read_mostly;
68 /* I/O permission bitmap is globally shared by all HVM guests. */
69 unsigned long __attribute__ ((__section__ (".bss.page_aligned")))
70 hvm_io_bitmap[3*PAGE_SIZE/BYTES_PER_LONG];
72 void hvm_enable(struct hvm_function_table *fns)
73 {
74 BUG_ON(hvm_enabled);
75 printk("HVM: %s enabled\n", fns->name);
77 /*
78 * Allow direct access to the PC debug ports 0x80 and 0xed (they are
79 * often used for I/O delays, but the vmexits simply slow things down).
80 */
81 memset(hvm_io_bitmap, ~0, sizeof(hvm_io_bitmap));
82 __clear_bit(0x80, hvm_io_bitmap);
83 __clear_bit(0xed, hvm_io_bitmap);
85 hvm_funcs = *fns;
86 hvm_enabled = 1;
88 if ( hvm_funcs.hap_supported )
89 printk("HVM: Hardware Assisted Paging detected.\n");
90 }
92 /*
93 * Need to re-inject a given event? We avoid re-injecting software exceptions
94 * and interrupts because the faulting/trapping instruction can simply be
95 * re-executed (neither VMX nor SVM update RIP when they VMEXIT during
96 * INT3/INTO/INTn).
97 */
98 int hvm_event_needs_reinjection(uint8_t type, uint8_t vector)
99 {
100 switch ( type )
101 {
102 case X86_EVENTTYPE_EXT_INTR:
103 case X86_EVENTTYPE_NMI:
104 return 1;
105 case X86_EVENTTYPE_HW_EXCEPTION:
106 /*
107 * SVM uses type 3 ("HW Exception") for #OF and #BP. We explicitly
108 * check for these vectors, as they are really SW Exceptions. SVM has
109 * not updated RIP to point after the trapping instruction (INT3/INTO).
110 */
111 return (vector != 3) && (vector != 4);
112 default:
113 /* Software exceptions/interrupts can be re-executed (e.g., INT n). */
114 break;
115 }
116 return 0;
117 }
119 /*
120 * Combine two hardware exceptions: @vec2 was raised during delivery of @vec1.
121 * This means we can assume that @vec2 is contributory or a page fault.
122 */
123 uint8_t hvm_combine_hw_exceptions(uint8_t vec1, uint8_t vec2)
124 {
125 /* Exception during double-fault delivery always causes a triple fault. */
126 if ( vec1 == TRAP_double_fault )
127 {
128 hvm_triple_fault();
129 return TRAP_double_fault; /* dummy return */
130 }
132 /* Exception during page-fault delivery always causes a double fault. */
133 if ( vec1 == TRAP_page_fault )
134 return TRAP_double_fault;
136 /* Discard the first exception if it's benign or if we now have a #PF. */
137 if ( !((1u << vec1) & 0x7c01u) || (vec2 == TRAP_page_fault) )
138 return vec2;
140 /* Cannot combine the exceptions: double fault. */
141 return TRAP_double_fault;
142 }
144 void hvm_set_guest_tsc(struct vcpu *v, u64 guest_tsc)
145 {
146 u64 host_tsc;
148 rdtscll(host_tsc);
150 v->arch.hvm_vcpu.cache_tsc_offset = guest_tsc - host_tsc;
151 hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset);
152 }
154 u64 hvm_get_guest_tsc(struct vcpu *v)
155 {
156 u64 host_tsc;
158 if ( opt_softtsc )
159 host_tsc = hvm_get_guest_time(v);
160 else
161 rdtscll(host_tsc);
163 return host_tsc + v->arch.hvm_vcpu.cache_tsc_offset;
164 }
166 void hvm_migrate_timers(struct vcpu *v)
167 {
168 rtc_migrate_timers(v);
169 pt_migrate(v);
170 }
172 void hvm_do_resume(struct vcpu *v)
173 {
174 ioreq_t *p;
176 pt_restore_timer(v);
178 /* NB. Optimised for common case (p->state == STATE_IOREQ_NONE). */
179 p = &get_ioreq(v)->vp_ioreq;
180 while ( p->state != STATE_IOREQ_NONE )
181 {
182 switch ( p->state )
183 {
184 case STATE_IORESP_READY: /* IORESP_READY -> NONE */
185 hvm_io_assist();
186 break;
187 case STATE_IOREQ_READY: /* IOREQ_{READY,INPROCESS} -> IORESP_READY */
188 case STATE_IOREQ_INPROCESS:
189 wait_on_xen_event_channel(v->arch.hvm_vcpu.xen_port,
190 (p->state != STATE_IOREQ_READY) &&
191 (p->state != STATE_IOREQ_INPROCESS));
192 break;
193 default:
194 gdprintk(XENLOG_ERR, "Weird HVM iorequest state %d.\n", p->state);
195 domain_crash(v->domain);
196 return; /* bail */
197 }
198 }
199 }
201 static void hvm_init_ioreq_page(
202 struct domain *d, struct hvm_ioreq_page *iorp)
203 {
204 memset(iorp, 0, sizeof(*iorp));
205 spin_lock_init(&iorp->lock);
206 domain_pause(d);
207 }
209 static void hvm_destroy_ioreq_page(
210 struct domain *d, struct hvm_ioreq_page *iorp)
211 {
212 spin_lock(&iorp->lock);
214 ASSERT(d->is_dying);
216 if ( iorp->va != NULL )
217 {
218 unmap_domain_page_global(iorp->va);
219 put_page_and_type(iorp->page);
220 iorp->va = NULL;
221 }
223 spin_unlock(&iorp->lock);
224 }
226 static int hvm_set_ioreq_page(
227 struct domain *d, struct hvm_ioreq_page *iorp, unsigned long gmfn)
228 {
229 struct page_info *page;
230 p2m_type_t p2mt;
231 unsigned long mfn;
232 void *va;
234 mfn = mfn_x(gfn_to_mfn(d, gmfn, &p2mt));
235 if ( !p2m_is_ram(p2mt) )
236 return -EINVAL;
237 ASSERT(mfn_valid(mfn));
239 page = mfn_to_page(mfn);
240 if ( !get_page_and_type(page, d, PGT_writable_page) )
241 return -EINVAL;
243 va = map_domain_page_global(mfn);
244 if ( va == NULL )
245 {
246 put_page_and_type(page);
247 return -ENOMEM;
248 }
250 spin_lock(&iorp->lock);
252 if ( (iorp->va != NULL) || d->is_dying )
253 {
254 spin_unlock(&iorp->lock);
255 unmap_domain_page_global(va);
256 put_page_and_type(mfn_to_page(mfn));
257 return -EINVAL;
258 }
260 iorp->va = va;
261 iorp->page = page;
263 spin_unlock(&iorp->lock);
265 domain_unpause(d);
267 return 0;
268 }
270 static int hvm_print_line(
271 int dir, uint32_t port, uint32_t bytes, uint32_t *val)
272 {
273 struct vcpu *curr = current;
274 struct hvm_domain *hd = &curr->domain->arch.hvm_domain;
275 char c = *val;
277 BUG_ON(bytes != 1);
279 /* Accept only printable characters, newline, and horizontal tab. */
280 if ( !isprint(c) && (c != '\n') && (c != '\t') )
281 return X86EMUL_OKAY;
283 spin_lock(&hd->pbuf_lock);
284 hd->pbuf[hd->pbuf_idx++] = c;
285 if ( (hd->pbuf_idx == (sizeof(hd->pbuf) - 2)) || (c == '\n') )
286 {
287 if ( c != '\n' )
288 hd->pbuf[hd->pbuf_idx++] = '\n';
289 hd->pbuf[hd->pbuf_idx] = '\0';
290 printk(XENLOG_G_DEBUG "HVM%u: %s", curr->domain->domain_id, hd->pbuf);
291 hd->pbuf_idx = 0;
292 }
293 spin_unlock(&hd->pbuf_lock);
295 return X86EMUL_OKAY;
296 }
298 int hvm_domain_initialise(struct domain *d)
299 {
300 int rc;
302 if ( !hvm_enabled )
303 {
304 gdprintk(XENLOG_WARNING, "Attempt to create a HVM guest "
305 "on a non-VT/AMDV platform.\n");
306 return -EINVAL;
307 }
309 spin_lock_init(&d->arch.hvm_domain.pbuf_lock);
310 spin_lock_init(&d->arch.hvm_domain.irq_lock);
311 spin_lock_init(&d->arch.hvm_domain.uc_lock);
313 INIT_LIST_HEAD(&d->arch.hvm_domain.msixtbl_list);
314 spin_lock_init(&d->arch.hvm_domain.msixtbl_list_lock);
316 hvm_init_guest_time(d);
318 d->arch.hvm_domain.params[HVM_PARAM_HPET_ENABLED] = 1;
320 hvm_init_cacheattr_region_list(d);
322 rc = paging_enable(d, PG_refcounts|PG_translate|PG_external);
323 if ( rc != 0 )
324 goto fail1;
326 vpic_init(d);
328 rc = vioapic_init(d);
329 if ( rc != 0 )
330 goto fail1;
332 stdvga_init(d);
334 rtc_init(d);
336 hvm_init_ioreq_page(d, &d->arch.hvm_domain.ioreq);
337 hvm_init_ioreq_page(d, &d->arch.hvm_domain.buf_ioreq);
339 register_portio_handler(d, 0xe9, 1, hvm_print_line);
341 rc = hvm_funcs.domain_initialise(d);
342 if ( rc != 0 )
343 goto fail2;
345 return 0;
347 fail2:
348 rtc_deinit(d);
349 stdvga_deinit(d);
350 vioapic_deinit(d);
351 fail1:
352 hvm_destroy_cacheattr_region_list(d);
353 return rc;
354 }
356 extern void msixtbl_pt_cleanup(struct domain *d);
358 void hvm_domain_relinquish_resources(struct domain *d)
359 {
360 hvm_destroy_ioreq_page(d, &d->arch.hvm_domain.ioreq);
361 hvm_destroy_ioreq_page(d, &d->arch.hvm_domain.buf_ioreq);
363 msixtbl_pt_cleanup(d);
365 /* Stop all asynchronous timer actions. */
366 rtc_deinit(d);
367 if ( d->vcpu[0] != NULL )
368 {
369 pit_deinit(d);
370 pmtimer_deinit(d);
371 hpet_deinit(d);
372 }
373 }
375 void hvm_domain_destroy(struct domain *d)
376 {
377 hvm_funcs.domain_destroy(d);
378 rtc_deinit(d);
379 stdvga_deinit(d);
380 vioapic_deinit(d);
381 hvm_destroy_cacheattr_region_list(d);
382 }
384 static int hvm_save_cpu_ctxt(struct domain *d, hvm_domain_context_t *h)
385 {
386 struct vcpu *v;
387 struct hvm_hw_cpu ctxt;
388 struct segment_register seg;
389 struct vcpu_guest_context *vc;
391 for_each_vcpu ( d, v )
392 {
393 /* We don't need to save state for a vcpu that is down; the restore
394 * code will leave it down if there is nothing saved. */
395 if ( test_bit(_VPF_down, &v->pause_flags) )
396 continue;
398 /* Architecture-specific vmcs/vmcb bits */
399 hvm_funcs.save_cpu_ctxt(v, &ctxt);
401 hvm_get_segment_register(v, x86_seg_idtr, &seg);
402 ctxt.idtr_limit = seg.limit;
403 ctxt.idtr_base = seg.base;
405 hvm_get_segment_register(v, x86_seg_gdtr, &seg);
406 ctxt.gdtr_limit = seg.limit;
407 ctxt.gdtr_base = seg.base;
409 hvm_get_segment_register(v, x86_seg_cs, &seg);
410 ctxt.cs_sel = seg.sel;
411 ctxt.cs_limit = seg.limit;
412 ctxt.cs_base = seg.base;
413 ctxt.cs_arbytes = seg.attr.bytes;
415 hvm_get_segment_register(v, x86_seg_ds, &seg);
416 ctxt.ds_sel = seg.sel;
417 ctxt.ds_limit = seg.limit;
418 ctxt.ds_base = seg.base;
419 ctxt.ds_arbytes = seg.attr.bytes;
421 hvm_get_segment_register(v, x86_seg_es, &seg);
422 ctxt.es_sel = seg.sel;
423 ctxt.es_limit = seg.limit;
424 ctxt.es_base = seg.base;
425 ctxt.es_arbytes = seg.attr.bytes;
427 hvm_get_segment_register(v, x86_seg_ss, &seg);
428 ctxt.ss_sel = seg.sel;
429 ctxt.ss_limit = seg.limit;
430 ctxt.ss_base = seg.base;
431 ctxt.ss_arbytes = seg.attr.bytes;
433 hvm_get_segment_register(v, x86_seg_fs, &seg);
434 ctxt.fs_sel = seg.sel;
435 ctxt.fs_limit = seg.limit;
436 ctxt.fs_base = seg.base;
437 ctxt.fs_arbytes = seg.attr.bytes;
439 hvm_get_segment_register(v, x86_seg_gs, &seg);
440 ctxt.gs_sel = seg.sel;
441 ctxt.gs_limit = seg.limit;
442 ctxt.gs_base = seg.base;
443 ctxt.gs_arbytes = seg.attr.bytes;
445 hvm_get_segment_register(v, x86_seg_tr, &seg);
446 ctxt.tr_sel = seg.sel;
447 ctxt.tr_limit = seg.limit;
448 ctxt.tr_base = seg.base;
449 ctxt.tr_arbytes = seg.attr.bytes;
451 hvm_get_segment_register(v, x86_seg_ldtr, &seg);
452 ctxt.ldtr_sel = seg.sel;
453 ctxt.ldtr_limit = seg.limit;
454 ctxt.ldtr_base = seg.base;
455 ctxt.ldtr_arbytes = seg.attr.bytes;
457 vc = &v->arch.guest_context;
459 if ( v->fpu_initialised )
460 memcpy(ctxt.fpu_regs, &vc->fpu_ctxt, sizeof(ctxt.fpu_regs));
461 else
462 memset(ctxt.fpu_regs, 0, sizeof(ctxt.fpu_regs));
464 ctxt.rax = vc->user_regs.eax;
465 ctxt.rbx = vc->user_regs.ebx;
466 ctxt.rcx = vc->user_regs.ecx;
467 ctxt.rdx = vc->user_regs.edx;
468 ctxt.rbp = vc->user_regs.ebp;
469 ctxt.rsi = vc->user_regs.esi;
470 ctxt.rdi = vc->user_regs.edi;
471 ctxt.rsp = vc->user_regs.esp;
472 ctxt.rip = vc->user_regs.eip;
473 ctxt.rflags = vc->user_regs.eflags;
474 #ifdef __x86_64__
475 ctxt.r8 = vc->user_regs.r8;
476 ctxt.r9 = vc->user_regs.r9;
477 ctxt.r10 = vc->user_regs.r10;
478 ctxt.r11 = vc->user_regs.r11;
479 ctxt.r12 = vc->user_regs.r12;
480 ctxt.r13 = vc->user_regs.r13;
481 ctxt.r14 = vc->user_regs.r14;
482 ctxt.r15 = vc->user_regs.r15;
483 #endif
484 ctxt.dr0 = vc->debugreg[0];
485 ctxt.dr1 = vc->debugreg[1];
486 ctxt.dr2 = vc->debugreg[2];
487 ctxt.dr3 = vc->debugreg[3];
488 ctxt.dr6 = vc->debugreg[6];
489 ctxt.dr7 = vc->debugreg[7];
491 if ( hvm_save_entry(CPU, v->vcpu_id, h, &ctxt) != 0 )
492 return 1;
493 }
494 return 0;
495 }
497 static int hvm_load_cpu_ctxt(struct domain *d, hvm_domain_context_t *h)
498 {
499 int vcpuid, rc;
500 struct vcpu *v;
501 struct hvm_hw_cpu ctxt;
502 struct segment_register seg;
503 struct vcpu_guest_context *vc;
505 /* Which vcpu is this? */
506 vcpuid = hvm_load_instance(h);
507 if ( vcpuid >= MAX_VIRT_CPUS || (v = d->vcpu[vcpuid]) == NULL )
508 {
509 gdprintk(XENLOG_ERR, "HVM restore: domain has no vcpu %u\n", vcpuid);
510 return -EINVAL;
511 }
512 vc = &v->arch.guest_context;
514 /* Need to init this vcpu before loading its contents */
515 rc = 0;
516 domain_lock(d);
517 if ( !v->is_initialised )
518 rc = boot_vcpu(d, vcpuid, vc);
519 domain_unlock(d);
520 if ( rc != 0 )
521 return rc;
523 if ( hvm_load_entry(CPU, h, &ctxt) != 0 )
524 return -EINVAL;
526 /* Sanity check some control registers. */
527 if ( (ctxt.cr0 & HVM_CR0_GUEST_RESERVED_BITS) ||
528 !(ctxt.cr0 & X86_CR0_ET) ||
529 ((ctxt.cr0 & (X86_CR0_PE|X86_CR0_PG)) == X86_CR0_PG) )
530 {
531 gdprintk(XENLOG_ERR, "HVM restore: bad CR0 0x%"PRIx64"\n",
532 ctxt.cr0);
533 return -EINVAL;
534 }
536 if ( ctxt.cr4 & HVM_CR4_GUEST_RESERVED_BITS )
537 {
538 gdprintk(XENLOG_ERR, "HVM restore: bad CR4 0x%"PRIx64"\n",
539 ctxt.cr4);
540 return -EINVAL;
541 }
543 if ( (ctxt.msr_efer & ~(EFER_FFXSE | EFER_LME | EFER_LMA |
544 EFER_NX | EFER_SCE)) ||
545 ((sizeof(long) != 8) && (ctxt.msr_efer & EFER_LME)) ||
546 (!cpu_has_nx && (ctxt.msr_efer & EFER_NX)) ||
547 (!cpu_has_syscall && (ctxt.msr_efer & EFER_SCE)) ||
548 (!cpu_has_ffxsr && (ctxt.msr_efer & EFER_FFXSE)) ||
549 ((ctxt.msr_efer & (EFER_LME|EFER_LMA)) == EFER_LMA) )
550 {
551 gdprintk(XENLOG_ERR, "HVM restore: bad EFER 0x%"PRIx64"\n",
552 ctxt.msr_efer);
553 return -EINVAL;
554 }
556 /* Older Xen versions used to save the segment arbytes directly
557 * from the VMCS on Intel hosts. Detect this and rearrange them
558 * into the struct segment_register format. */
559 #define UNFOLD_ARBYTES(_r) \
560 if ( (_r & 0xf000) && !(_r & 0x0f00) ) \
561 _r = ((_r & 0xff) | ((_r >> 4) & 0xf00))
562 UNFOLD_ARBYTES(ctxt.cs_arbytes);
563 UNFOLD_ARBYTES(ctxt.ds_arbytes);
564 UNFOLD_ARBYTES(ctxt.es_arbytes);
565 UNFOLD_ARBYTES(ctxt.fs_arbytes);
566 UNFOLD_ARBYTES(ctxt.gs_arbytes);
567 UNFOLD_ARBYTES(ctxt.ss_arbytes);
568 UNFOLD_ARBYTES(ctxt.tr_arbytes);
569 UNFOLD_ARBYTES(ctxt.ldtr_arbytes);
570 #undef UNFOLD_ARBYTES
572 /* Architecture-specific vmcs/vmcb bits */
573 if ( hvm_funcs.load_cpu_ctxt(v, &ctxt) < 0 )
574 return -EINVAL;
576 seg.limit = ctxt.idtr_limit;
577 seg.base = ctxt.idtr_base;
578 hvm_set_segment_register(v, x86_seg_idtr, &seg);
580 seg.limit = ctxt.gdtr_limit;
581 seg.base = ctxt.gdtr_base;
582 hvm_set_segment_register(v, x86_seg_gdtr, &seg);
584 seg.sel = ctxt.cs_sel;
585 seg.limit = ctxt.cs_limit;
586 seg.base = ctxt.cs_base;
587 seg.attr.bytes = ctxt.cs_arbytes;
588 hvm_set_segment_register(v, x86_seg_cs, &seg);
590 seg.sel = ctxt.ds_sel;
591 seg.limit = ctxt.ds_limit;
592 seg.base = ctxt.ds_base;
593 seg.attr.bytes = ctxt.ds_arbytes;
594 hvm_set_segment_register(v, x86_seg_ds, &seg);
596 seg.sel = ctxt.es_sel;
597 seg.limit = ctxt.es_limit;
598 seg.base = ctxt.es_base;
599 seg.attr.bytes = ctxt.es_arbytes;
600 hvm_set_segment_register(v, x86_seg_es, &seg);
602 seg.sel = ctxt.ss_sel;
603 seg.limit = ctxt.ss_limit;
604 seg.base = ctxt.ss_base;
605 seg.attr.bytes = ctxt.ss_arbytes;
606 hvm_set_segment_register(v, x86_seg_ss, &seg);
608 seg.sel = ctxt.fs_sel;
609 seg.limit = ctxt.fs_limit;
610 seg.base = ctxt.fs_base;
611 seg.attr.bytes = ctxt.fs_arbytes;
612 hvm_set_segment_register(v, x86_seg_fs, &seg);
614 seg.sel = ctxt.gs_sel;
615 seg.limit = ctxt.gs_limit;
616 seg.base = ctxt.gs_base;
617 seg.attr.bytes = ctxt.gs_arbytes;
618 hvm_set_segment_register(v, x86_seg_gs, &seg);
620 seg.sel = ctxt.tr_sel;
621 seg.limit = ctxt.tr_limit;
622 seg.base = ctxt.tr_base;
623 seg.attr.bytes = ctxt.tr_arbytes;
624 hvm_set_segment_register(v, x86_seg_tr, &seg);
626 seg.sel = ctxt.ldtr_sel;
627 seg.limit = ctxt.ldtr_limit;
628 seg.base = ctxt.ldtr_base;
629 seg.attr.bytes = ctxt.ldtr_arbytes;
630 hvm_set_segment_register(v, x86_seg_ldtr, &seg);
632 memcpy(&vc->fpu_ctxt, ctxt.fpu_regs, sizeof(ctxt.fpu_regs));
634 vc->user_regs.eax = ctxt.rax;
635 vc->user_regs.ebx = ctxt.rbx;
636 vc->user_regs.ecx = ctxt.rcx;
637 vc->user_regs.edx = ctxt.rdx;
638 vc->user_regs.ebp = ctxt.rbp;
639 vc->user_regs.esi = ctxt.rsi;
640 vc->user_regs.edi = ctxt.rdi;
641 vc->user_regs.esp = ctxt.rsp;
642 vc->user_regs.eip = ctxt.rip;
643 vc->user_regs.eflags = ctxt.rflags | 2;
644 #ifdef __x86_64__
645 vc->user_regs.r8 = ctxt.r8;
646 vc->user_regs.r9 = ctxt.r9;
647 vc->user_regs.r10 = ctxt.r10;
648 vc->user_regs.r11 = ctxt.r11;
649 vc->user_regs.r12 = ctxt.r12;
650 vc->user_regs.r13 = ctxt.r13;
651 vc->user_regs.r14 = ctxt.r14;
652 vc->user_regs.r15 = ctxt.r15;
653 #endif
654 vc->debugreg[0] = ctxt.dr0;
655 vc->debugreg[1] = ctxt.dr1;
656 vc->debugreg[2] = ctxt.dr2;
657 vc->debugreg[3] = ctxt.dr3;
658 vc->debugreg[6] = ctxt.dr6;
659 vc->debugreg[7] = ctxt.dr7;
661 vc->flags = VGCF_online;
662 v->fpu_initialised = 1;
664 /* Auxiliary processors should be woken immediately. */
665 v->is_initialised = 1;
666 clear_bit(_VPF_down, &v->pause_flags);
667 vcpu_wake(v);
669 return 0;
670 }
672 HVM_REGISTER_SAVE_RESTORE(CPU, hvm_save_cpu_ctxt, hvm_load_cpu_ctxt,
673 1, HVMSR_PER_VCPU);
675 int hvm_vcpu_initialise(struct vcpu *v)
676 {
677 int rc;
679 if ( (rc = vlapic_init(v)) != 0 )
680 goto fail1;
682 if ( (rc = hvm_funcs.vcpu_initialise(v)) != 0 )
683 goto fail2;
685 /* Create ioreq event channel. */
686 rc = alloc_unbound_xen_event_channel(v, 0);
687 if ( rc < 0 )
688 goto fail3;
690 /* Register ioreq event channel. */
691 v->arch.hvm_vcpu.xen_port = rc;
692 spin_lock(&v->domain->arch.hvm_domain.ioreq.lock);
693 if ( v->domain->arch.hvm_domain.ioreq.va != NULL )
694 get_ioreq(v)->vp_eport = v->arch.hvm_vcpu.xen_port;
695 spin_unlock(&v->domain->arch.hvm_domain.ioreq.lock);
697 spin_lock_init(&v->arch.hvm_vcpu.tm_lock);
698 INIT_LIST_HEAD(&v->arch.hvm_vcpu.tm_list);
700 rc = hvm_vcpu_cacheattr_init(v);
701 if ( rc != 0 )
702 goto fail3;
704 tasklet_init(&v->arch.hvm_vcpu.assert_evtchn_irq_tasklet,
705 (void(*)(unsigned long))hvm_assert_evtchn_irq,
706 (unsigned long)v);
708 v->arch.guest_context.user_regs.eflags = 2;
710 if ( v->vcpu_id == 0 )
711 {
712 /* NB. All these really belong in hvm_domain_initialise(). */
713 pit_init(v, cpu_khz);
714 pmtimer_init(v);
715 hpet_init(v);
717 /* Init guest TSC to start from zero. */
718 hvm_set_guest_tsc(v, 0);
720 /* Can start up without SIPI-SIPI or setvcpucontext domctl. */
721 v->is_initialised = 1;
722 clear_bit(_VPF_down, &v->pause_flags);
723 }
725 return 0;
727 fail3:
728 hvm_funcs.vcpu_destroy(v);
729 fail2:
730 vlapic_destroy(v);
731 fail1:
732 return rc;
733 }
735 void hvm_vcpu_destroy(struct vcpu *v)
736 {
737 tasklet_kill(&v->arch.hvm_vcpu.assert_evtchn_irq_tasklet);
738 hvm_vcpu_cacheattr_destroy(v);
739 vlapic_destroy(v);
740 hvm_funcs.vcpu_destroy(v);
742 /* Event channel is already freed by evtchn_destroy(). */
743 /*free_xen_event_channel(v, v->arch.hvm_vcpu.xen_port);*/
744 }
746 void hvm_vcpu_down(struct vcpu *v)
747 {
748 struct domain *d = v->domain;
749 int online_count = 0;
751 /* Doesn't halt us immediately, but we'll never return to guest context. */
752 set_bit(_VPF_down, &v->pause_flags);
753 vcpu_sleep_nosync(v);
755 /* Any other VCPUs online? ... */
756 domain_lock(d);
757 for_each_vcpu ( d, v )
758 if ( !test_bit(_VPF_down, &v->pause_flags) )
759 online_count++;
760 domain_unlock(d);
762 /* ... Shut down the domain if not. */
763 if ( online_count == 0 )
764 {
765 gdprintk(XENLOG_INFO, "All CPUs offline -- powering off.\n");
766 domain_shutdown(d, SHUTDOWN_poweroff);
767 }
768 }
770 void hvm_send_assist_req(struct vcpu *v)
771 {
772 ioreq_t *p;
774 if ( unlikely(!vcpu_start_shutdown_deferral(v)) )
775 return; /* implicitly bins the i/o operation */
777 p = &get_ioreq(v)->vp_ioreq;
778 if ( unlikely(p->state != STATE_IOREQ_NONE) )
779 {
780 /* This indicates a bug in the device model. Crash the domain. */
781 gdprintk(XENLOG_ERR, "Device model set bad IO state %d.\n", p->state);
782 domain_crash(v->domain);
783 return;
784 }
786 prepare_wait_on_xen_event_channel(v->arch.hvm_vcpu.xen_port);
788 /*
789 * Following happens /after/ blocking and setting up ioreq contents.
790 * prepare_wait_on_xen_event_channel() is an implicit barrier.
791 */
792 p->state = STATE_IOREQ_READY;
793 notify_via_xen_event_channel(v->arch.hvm_vcpu.xen_port);
794 }
796 void hvm_hlt(unsigned long rflags)
797 {
798 struct vcpu *curr = current;
800 if ( hvm_event_pending(curr) )
801 return;
803 /*
804 * If we halt with interrupts disabled, that's a pretty sure sign that we
805 * want to shut down. In a real processor, NMIs are the only way to break
806 * out of this.
807 */
808 if ( unlikely(!(rflags & X86_EFLAGS_IF)) )
809 return hvm_vcpu_down(curr);
811 do_sched_op_compat(SCHEDOP_block, 0);
813 HVMTRACE_1D(HLT, /* pending = */ vcpu_runnable(curr));
814 }
816 void hvm_triple_fault(void)
817 {
818 struct vcpu *v = current;
819 gdprintk(XENLOG_INFO, "Triple fault on VCPU%d - "
820 "invoking HVM system reset.\n", v->vcpu_id);
821 domain_shutdown(v->domain, SHUTDOWN_reboot);
822 }
824 int hvm_set_efer(uint64_t value)
825 {
826 struct vcpu *v = current;
828 value &= ~EFER_LMA;
830 if ( (value & ~(EFER_FFXSE | EFER_LME | EFER_NX | EFER_SCE)) ||
831 ((sizeof(long) != 8) && (value & EFER_LME)) ||
832 (!cpu_has_nx && (value & EFER_NX)) ||
833 (!cpu_has_syscall && (value & EFER_SCE)) ||
834 (!cpu_has_ffxsr && (value & EFER_FFXSE)) )
835 {
836 gdprintk(XENLOG_WARNING, "Trying to set reserved bit in "
837 "EFER: %"PRIx64"\n", value);
838 hvm_inject_exception(TRAP_gp_fault, 0, 0);
839 return X86EMUL_EXCEPTION;
840 }
842 if ( ((value ^ v->arch.hvm_vcpu.guest_efer) & EFER_LME) &&
843 hvm_paging_enabled(v) )
844 {
845 gdprintk(XENLOG_WARNING,
846 "Trying to change EFER.LME with paging enabled\n");
847 hvm_inject_exception(TRAP_gp_fault, 0, 0);
848 return X86EMUL_EXCEPTION;
849 }
851 value |= v->arch.hvm_vcpu.guest_efer & EFER_LMA;
852 v->arch.hvm_vcpu.guest_efer = value;
853 hvm_update_guest_efer(v);
855 return X86EMUL_OKAY;
856 }
858 extern void shadow_blow_tables_per_domain(struct domain *d);
859 extern bool_t mtrr_pat_not_equal(struct vcpu *vd, struct vcpu *vs);
861 /* Exit UC mode only if all VCPUs agree on MTRR/PAT and are not in no_fill. */
862 static bool_t domain_exit_uc_mode(struct vcpu *v)
863 {
864 struct domain *d = v->domain;
865 struct vcpu *vs;
867 for_each_vcpu ( d, vs )
868 {
869 if ( (vs == v) || !vs->is_initialised )
870 continue;
871 if ( (vs->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) ||
872 mtrr_pat_not_equal(vs, v) )
873 return 0;
874 }
876 return 1;
877 }
879 static void local_flush_cache(void *info)
880 {
881 wbinvd();
882 }
884 static void hvm_set_uc_mode(struct vcpu *v, bool_t is_in_uc_mode)
885 {
886 v->domain->arch.hvm_domain.is_in_uc_mode = is_in_uc_mode;
887 shadow_blow_tables_per_domain(v->domain);
888 if ( hvm_funcs.set_uc_mode )
889 return hvm_funcs.set_uc_mode(v);
890 }
892 int hvm_set_cr0(unsigned long value)
893 {
894 struct vcpu *v = current;
895 p2m_type_t p2mt;
896 unsigned long gfn, mfn, old_value = v->arch.hvm_vcpu.guest_cr[0];
898 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx", value);
900 if ( (u32)value != value )
901 {
902 HVM_DBG_LOG(DBG_LEVEL_1,
903 "Guest attempts to set upper 32 bits in CR0: %lx",
904 value);
905 goto gpf;
906 }
908 value &= ~HVM_CR0_GUEST_RESERVED_BITS;
910 /* ET is reserved and should be always be 1. */
911 value |= X86_CR0_ET;
913 if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PG )
914 goto gpf;
916 if ( (value & X86_CR0_PG) && !(old_value & X86_CR0_PG) )
917 {
918 if ( v->arch.hvm_vcpu.guest_efer & EFER_LME )
919 {
920 if ( !(v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_PAE) )
921 {
922 HVM_DBG_LOG(DBG_LEVEL_1, "Enable paging before PAE enable");
923 goto gpf;
924 }
925 HVM_DBG_LOG(DBG_LEVEL_1, "Enabling long mode");
926 v->arch.hvm_vcpu.guest_efer |= EFER_LMA;
927 hvm_update_guest_efer(v);
928 }
930 if ( !paging_mode_hap(v->domain) )
931 {
932 /* The guest CR3 must be pointing to the guest physical. */
933 gfn = v->arch.hvm_vcpu.guest_cr[3]>>PAGE_SHIFT;
934 mfn = mfn_x(gfn_to_mfn_current(gfn, &p2mt));
935 if ( !p2m_is_ram(p2mt) || !mfn_valid(mfn) ||
936 !get_page(mfn_to_page(mfn), v->domain))
937 {
938 gdprintk(XENLOG_ERR, "Invalid CR3 value = %lx (mfn=%lx)\n",
939 v->arch.hvm_vcpu.guest_cr[3], mfn);
940 domain_crash(v->domain);
941 return X86EMUL_UNHANDLEABLE;
942 }
944 /* Now arch.guest_table points to machine physical. */
945 v->arch.guest_table = pagetable_from_pfn(mfn);
947 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx",
948 v->arch.hvm_vcpu.guest_cr[3], mfn);
949 }
950 }
951 else if ( !(value & X86_CR0_PG) && (old_value & X86_CR0_PG) )
952 {
953 /* When CR0.PG is cleared, LMA is cleared immediately. */
954 if ( hvm_long_mode_enabled(v) )
955 {
956 v->arch.hvm_vcpu.guest_efer &= ~EFER_LMA;
957 hvm_update_guest_efer(v);
958 }
960 if ( !paging_mode_hap(v->domain) )
961 {
962 put_page(pagetable_get_page(v->arch.guest_table));
963 v->arch.guest_table = pagetable_null();
964 }
965 }
967 if ( has_arch_pdevs(v->domain) )
968 {
969 if ( (value & X86_CR0_CD) && !(value & X86_CR0_NW) )
970 {
971 /* Entering no fill cache mode. */
972 spin_lock(&v->domain->arch.hvm_domain.uc_lock);
973 v->arch.hvm_vcpu.cache_mode = NO_FILL_CACHE_MODE;
975 if ( !v->domain->arch.hvm_domain.is_in_uc_mode )
976 {
977 /* Flush physical caches. */
978 on_each_cpu(local_flush_cache, NULL, 1);
979 hvm_set_uc_mode(v, 1);
980 }
981 spin_unlock(&v->domain->arch.hvm_domain.uc_lock);
982 }
983 else if ( !(value & (X86_CR0_CD | X86_CR0_NW)) &&
984 (v->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) )
985 {
986 /* Exit from no fill cache mode. */
987 spin_lock(&v->domain->arch.hvm_domain.uc_lock);
988 v->arch.hvm_vcpu.cache_mode = NORMAL_CACHE_MODE;
990 if ( domain_exit_uc_mode(v) )
991 hvm_set_uc_mode(v, 0);
993 spin_unlock(&v->domain->arch.hvm_domain.uc_lock);
994 }
995 }
997 v->arch.hvm_vcpu.guest_cr[0] = value;
998 hvm_update_guest_cr(v, 0);
1000 if ( (value ^ old_value) & X86_CR0_PG )
1001 paging_update_paging_modes(v);
1003 return X86EMUL_OKAY;
1005 gpf:
1006 hvm_inject_exception(TRAP_gp_fault, 0, 0);
1007 return X86EMUL_EXCEPTION;
1010 int hvm_set_cr3(unsigned long value)
1012 unsigned long mfn;
1013 p2m_type_t p2mt;
1014 struct vcpu *v = current;
1016 if ( hvm_paging_enabled(v) && !paging_mode_hap(v->domain) &&
1017 (value != v->arch.hvm_vcpu.guest_cr[3]) )
1019 /* Shadow-mode CR3 change. Check PDBR and update refcounts. */
1020 HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value);
1021 mfn = mfn_x(gfn_to_mfn_current(value >> PAGE_SHIFT, &p2mt));
1022 if ( !p2m_is_ram(p2mt) || !mfn_valid(mfn) ||
1023 !get_page(mfn_to_page(mfn), v->domain) )
1024 goto bad_cr3;
1026 put_page(pagetable_get_page(v->arch.guest_table));
1027 v->arch.guest_table = pagetable_from_pfn(mfn);
1029 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx", value);
1032 v->arch.hvm_vcpu.guest_cr[3] = value;
1033 paging_update_cr3(v);
1034 return X86EMUL_OKAY;
1036 bad_cr3:
1037 gdprintk(XENLOG_ERR, "Invalid CR3\n");
1038 domain_crash(v->domain);
1039 return X86EMUL_UNHANDLEABLE;
1042 int hvm_set_cr4(unsigned long value)
1044 struct vcpu *v = current;
1045 unsigned long old_cr;
1047 if ( value & HVM_CR4_GUEST_RESERVED_BITS )
1049 HVM_DBG_LOG(DBG_LEVEL_1,
1050 "Guest attempts to set reserved bit in CR4: %lx",
1051 value);
1052 goto gpf;
1055 if ( !(value & X86_CR4_PAE) && hvm_long_mode_enabled(v) )
1057 HVM_DBG_LOG(DBG_LEVEL_1, "Guest cleared CR4.PAE while "
1058 "EFER.LMA is set");
1059 goto gpf;
1062 old_cr = v->arch.hvm_vcpu.guest_cr[4];
1063 v->arch.hvm_vcpu.guest_cr[4] = value;
1064 hvm_update_guest_cr(v, 4);
1066 /* Modifying CR4.{PSE,PAE,PGE} invalidates all TLB entries, inc. Global. */
1067 if ( (old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE) )
1068 paging_update_paging_modes(v);
1070 return X86EMUL_OKAY;
1072 gpf:
1073 hvm_inject_exception(TRAP_gp_fault, 0, 0);
1074 return X86EMUL_EXCEPTION;
1077 int hvm_virtual_to_linear_addr(
1078 enum x86_segment seg,
1079 struct segment_register *reg,
1080 unsigned long offset,
1081 unsigned int bytes,
1082 enum hvm_access_type access_type,
1083 unsigned int addr_size,
1084 unsigned long *linear_addr)
1086 unsigned long addr = offset;
1087 uint32_t last_byte;
1089 if ( !(current->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) )
1091 /*
1092 * REAL MODE: Don't bother with segment access checks.
1093 * Certain of them are not done in native real mode anyway.
1094 */
1095 addr = (uint32_t)(addr + reg->base);
1097 else if ( addr_size != 64 )
1099 /*
1100 * COMPATIBILITY MODE: Apply segment checks and add base.
1101 */
1103 switch ( access_type )
1105 case hvm_access_read:
1106 if ( (reg->attr.fields.type & 0xa) == 0x8 )
1107 goto gpf; /* execute-only code segment */
1108 break;
1109 case hvm_access_write:
1110 if ( (reg->attr.fields.type & 0xa) != 0x2 )
1111 goto gpf; /* not a writable data segment */
1112 break;
1113 default:
1114 break;
1117 last_byte = offset + bytes - 1;
1119 /* Is this a grows-down data segment? Special limit check if so. */
1120 if ( (reg->attr.fields.type & 0xc) == 0x4 )
1122 /* Is upper limit 0xFFFF or 0xFFFFFFFF? */
1123 if ( !reg->attr.fields.db )
1124 last_byte = (uint16_t)last_byte;
1126 /* Check first byte and last byte against respective bounds. */
1127 if ( (offset <= reg->limit) || (last_byte < offset) )
1128 goto gpf;
1130 else if ( (last_byte > reg->limit) || (last_byte < offset) )
1131 goto gpf; /* last byte is beyond limit or wraps 0xFFFFFFFF */
1133 /*
1134 * Hardware truncates to 32 bits in compatibility mode.
1135 * It does not truncate to 16 bits in 16-bit address-size mode.
1136 */
1137 addr = (uint32_t)(addr + reg->base);
1139 else
1141 /*
1142 * LONG MODE: FS and GS add segment base. Addresses must be canonical.
1143 */
1145 if ( (seg == x86_seg_fs) || (seg == x86_seg_gs) )
1146 addr += reg->base;
1148 if ( !is_canonical_address(addr) )
1149 goto gpf;
1152 *linear_addr = addr;
1153 return 1;
1155 gpf:
1156 return 0;
1159 static void *hvm_map_entry(unsigned long va)
1161 unsigned long gfn, mfn;
1162 p2m_type_t p2mt;
1163 uint32_t pfec;
1165 if ( ((va & ~PAGE_MASK) + 8) > PAGE_SIZE )
1167 gdprintk(XENLOG_ERR, "Descriptor table entry "
1168 "straddles page boundary\n");
1169 domain_crash(current->domain);
1170 return NULL;
1173 /* We're mapping on behalf of the segment-load logic, which might
1174 * write the accessed flags in the descriptors (in 32-bit mode), but
1175 * we still treat it as a kernel-mode read (i.e. no access checks). */
1176 pfec = PFEC_page_present;
1177 gfn = paging_gva_to_gfn(current, va, &pfec);
1178 mfn = mfn_x(gfn_to_mfn_current(gfn, &p2mt));
1179 if ( !p2m_is_ram(p2mt) )
1181 gdprintk(XENLOG_ERR, "Failed to look up descriptor table entry\n");
1182 domain_crash(current->domain);
1183 return NULL;
1186 ASSERT(mfn_valid(mfn));
1188 paging_mark_dirty(current->domain, mfn);
1190 return (char *)map_domain_page(mfn) + (va & ~PAGE_MASK);
1193 static void hvm_unmap_entry(void *p)
1195 if ( p )
1196 unmap_domain_page(p);
1199 static int hvm_load_segment_selector(
1200 enum x86_segment seg, uint16_t sel)
1202 struct segment_register desctab, cs, segr;
1203 struct desc_struct *pdesc, desc;
1204 u8 dpl, rpl, cpl;
1205 int fault_type = TRAP_invalid_tss;
1206 struct cpu_user_regs *regs = guest_cpu_user_regs();
1207 struct vcpu *v = current;
1209 if ( regs->eflags & EF_VM )
1211 segr.sel = sel;
1212 segr.base = (uint32_t)sel << 4;
1213 segr.limit = 0xffffu;
1214 segr.attr.bytes = 0xf3;
1215 hvm_set_segment_register(v, seg, &segr);
1216 return 0;
1219 /* NULL selector? */
1220 if ( (sel & 0xfffc) == 0 )
1222 if ( (seg == x86_seg_cs) || (seg == x86_seg_ss) )
1223 goto fail;
1224 memset(&segr, 0, sizeof(segr));
1225 hvm_set_segment_register(v, seg, &segr);
1226 return 0;
1229 /* LDT descriptor must be in the GDT. */
1230 if ( (seg == x86_seg_ldtr) && (sel & 4) )
1231 goto fail;
1233 hvm_get_segment_register(v, x86_seg_cs, &cs);
1234 hvm_get_segment_register(
1235 v, (sel & 4) ? x86_seg_ldtr : x86_seg_gdtr, &desctab);
1237 /* Check against descriptor table limit. */
1238 if ( ((sel & 0xfff8) + 7) > desctab.limit )
1239 goto fail;
1241 pdesc = hvm_map_entry(desctab.base + (sel & 0xfff8));
1242 if ( pdesc == NULL )
1243 goto hvm_map_fail;
1245 do {
1246 desc = *pdesc;
1248 /* Segment present in memory? */
1249 if ( !(desc.b & (1u<<15)) )
1251 fault_type = TRAP_no_segment;
1252 goto unmap_and_fail;
1255 /* LDT descriptor is a system segment. All others are code/data. */
1256 if ( (desc.b & (1u<<12)) == ((seg == x86_seg_ldtr) << 12) )
1257 goto unmap_and_fail;
1259 dpl = (desc.b >> 13) & 3;
1260 rpl = sel & 3;
1261 cpl = cs.sel & 3;
1263 switch ( seg )
1265 case x86_seg_cs:
1266 /* Code segment? */
1267 if ( !(desc.b & (1u<<11)) )
1268 goto unmap_and_fail;
1269 /* Non-conforming segment: check DPL against RPL. */
1270 if ( ((desc.b & (6u<<9)) != 6) && (dpl != rpl) )
1271 goto unmap_and_fail;
1272 break;
1273 case x86_seg_ss:
1274 /* Writable data segment? */
1275 if ( (desc.b & (5u<<9)) != (1u<<9) )
1276 goto unmap_and_fail;
1277 if ( (dpl != cpl) || (dpl != rpl) )
1278 goto unmap_and_fail;
1279 break;
1280 case x86_seg_ldtr:
1281 /* LDT system segment? */
1282 if ( (desc.b & (15u<<8)) != (2u<<8) )
1283 goto unmap_and_fail;
1284 goto skip_accessed_flag;
1285 default:
1286 /* Readable code or data segment? */
1287 if ( (desc.b & (5u<<9)) == (4u<<9) )
1288 goto unmap_and_fail;
1289 /* Non-conforming segment: check DPL against RPL and CPL. */
1290 if ( ((desc.b & (6u<<9)) != 6) && ((dpl < cpl) || (dpl < rpl)) )
1291 goto unmap_and_fail;
1292 break;
1294 } while ( !(desc.b & 0x100) && /* Ensure Accessed flag is set */
1295 (cmpxchg(&pdesc->b, desc.b, desc.b | 0x100) != desc.b) );
1297 /* Force the Accessed flag in our local copy. */
1298 desc.b |= 0x100;
1300 skip_accessed_flag:
1301 hvm_unmap_entry(pdesc);
1303 segr.base = (((desc.b << 0) & 0xff000000u) |
1304 ((desc.b << 16) & 0x00ff0000u) |
1305 ((desc.a >> 16) & 0x0000ffffu));
1306 segr.attr.bytes = (((desc.b >> 8) & 0x00ffu) |
1307 ((desc.b >> 12) & 0x0f00u));
1308 segr.limit = (desc.b & 0x000f0000u) | (desc.a & 0x0000ffffu);
1309 if ( segr.attr.fields.g )
1310 segr.limit = (segr.limit << 12) | 0xfffu;
1311 segr.sel = sel;
1312 hvm_set_segment_register(v, seg, &segr);
1314 return 0;
1316 unmap_and_fail:
1317 hvm_unmap_entry(pdesc);
1318 fail:
1319 hvm_inject_exception(fault_type, sel & 0xfffc, 0);
1320 hvm_map_fail:
1321 return 1;
1324 void hvm_task_switch(
1325 uint16_t tss_sel, enum hvm_task_switch_reason taskswitch_reason,
1326 int32_t errcode)
1328 struct vcpu *v = current;
1329 struct cpu_user_regs *regs = guest_cpu_user_regs();
1330 struct segment_register gdt, tr, prev_tr, segr;
1331 struct desc_struct *optss_desc = NULL, *nptss_desc = NULL, tss_desc;
1332 unsigned long eflags;
1333 int exn_raised, rc;
1334 struct {
1335 u16 back_link,__blh;
1336 u32 esp0;
1337 u16 ss0, _0;
1338 u32 esp1;
1339 u16 ss1, _1;
1340 u32 esp2;
1341 u16 ss2, _2;
1342 u32 cr3, eip, eflags, eax, ecx, edx, ebx, esp, ebp, esi, edi;
1343 u16 es, _3, cs, _4, ss, _5, ds, _6, fs, _7, gs, _8, ldt, _9;
1344 u16 trace, iomap;
1345 } tss = { 0 };
1347 hvm_get_segment_register(v, x86_seg_gdtr, &gdt);
1348 hvm_get_segment_register(v, x86_seg_tr, &prev_tr);
1350 if ( ((tss_sel & 0xfff8) + 7) > gdt.limit )
1352 hvm_inject_exception((taskswitch_reason == TSW_iret) ?
1353 TRAP_invalid_tss : TRAP_gp_fault,
1354 tss_sel & 0xfff8, 0);
1355 goto out;
1358 optss_desc = hvm_map_entry(gdt.base + (prev_tr.sel & 0xfff8));
1359 if ( optss_desc == NULL )
1360 goto out;
1362 nptss_desc = hvm_map_entry(gdt.base + (tss_sel & 0xfff8));
1363 if ( nptss_desc == NULL )
1364 goto out;
1366 tss_desc = *nptss_desc;
1367 tr.sel = tss_sel;
1368 tr.base = (((tss_desc.b << 0) & 0xff000000u) |
1369 ((tss_desc.b << 16) & 0x00ff0000u) |
1370 ((tss_desc.a >> 16) & 0x0000ffffu));
1371 tr.attr.bytes = (((tss_desc.b >> 8) & 0x00ffu) |
1372 ((tss_desc.b >> 12) & 0x0f00u));
1373 tr.limit = (tss_desc.b & 0x000f0000u) | (tss_desc.a & 0x0000ffffu);
1374 if ( tr.attr.fields.g )
1375 tr.limit = (tr.limit << 12) | 0xfffu;
1377 if ( !tr.attr.fields.p )
1379 hvm_inject_exception(TRAP_no_segment, tss_sel & 0xfff8, 0);
1380 goto out;
1383 if ( tr.attr.fields.type != ((taskswitch_reason == TSW_iret) ? 0xb : 0x9) )
1385 hvm_inject_exception(
1386 (taskswitch_reason == TSW_iret) ? TRAP_invalid_tss : TRAP_gp_fault,
1387 tss_sel & 0xfff8, 0);
1388 goto out;
1391 if ( tr.limit < (sizeof(tss)-1) )
1393 hvm_inject_exception(TRAP_invalid_tss, tss_sel & 0xfff8, 0);
1394 goto out;
1397 rc = hvm_copy_from_guest_virt(
1398 &tss, prev_tr.base, sizeof(tss), PFEC_page_present);
1399 if ( rc == HVMCOPY_bad_gva_to_gfn )
1400 goto out;
1402 eflags = regs->eflags;
1403 if ( taskswitch_reason == TSW_iret )
1404 eflags &= ~X86_EFLAGS_NT;
1406 tss.cr3 = v->arch.hvm_vcpu.guest_cr[3];
1407 tss.eip = regs->eip;
1408 tss.eflags = eflags;
1409 tss.eax = regs->eax;
1410 tss.ecx = regs->ecx;
1411 tss.edx = regs->edx;
1412 tss.ebx = regs->ebx;
1413 tss.esp = regs->esp;
1414 tss.ebp = regs->ebp;
1415 tss.esi = regs->esi;
1416 tss.edi = regs->edi;
1418 hvm_get_segment_register(v, x86_seg_es, &segr);
1419 tss.es = segr.sel;
1420 hvm_get_segment_register(v, x86_seg_cs, &segr);
1421 tss.cs = segr.sel;
1422 hvm_get_segment_register(v, x86_seg_ss, &segr);
1423 tss.ss = segr.sel;
1424 hvm_get_segment_register(v, x86_seg_ds, &segr);
1425 tss.ds = segr.sel;
1426 hvm_get_segment_register(v, x86_seg_fs, &segr);
1427 tss.fs = segr.sel;
1428 hvm_get_segment_register(v, x86_seg_gs, &segr);
1429 tss.gs = segr.sel;
1430 hvm_get_segment_register(v, x86_seg_ldtr, &segr);
1431 tss.ldt = segr.sel;
1433 rc = hvm_copy_to_guest_virt(
1434 prev_tr.base, &tss, sizeof(tss), PFEC_page_present);
1435 if ( rc == HVMCOPY_bad_gva_to_gfn )
1436 goto out;
1438 rc = hvm_copy_from_guest_virt(
1439 &tss, tr.base, sizeof(tss), PFEC_page_present);
1440 if ( rc == HVMCOPY_bad_gva_to_gfn )
1441 goto out;
1443 if ( hvm_set_cr3(tss.cr3) )
1444 goto out;
1446 regs->eip = tss.eip;
1447 regs->eflags = tss.eflags | 2;
1448 regs->eax = tss.eax;
1449 regs->ecx = tss.ecx;
1450 regs->edx = tss.edx;
1451 regs->ebx = tss.ebx;
1452 regs->esp = tss.esp;
1453 regs->ebp = tss.ebp;
1454 regs->esi = tss.esi;
1455 regs->edi = tss.edi;
1457 if ( (taskswitch_reason == TSW_call_or_int) )
1459 regs->eflags |= X86_EFLAGS_NT;
1460 tss.back_link = prev_tr.sel;
1463 exn_raised = 0;
1464 if ( hvm_load_segment_selector(x86_seg_ldtr, tss.ldt) ||
1465 hvm_load_segment_selector(x86_seg_es, tss.es) ||
1466 hvm_load_segment_selector(x86_seg_cs, tss.cs) ||
1467 hvm_load_segment_selector(x86_seg_ss, tss.ss) ||
1468 hvm_load_segment_selector(x86_seg_ds, tss.ds) ||
1469 hvm_load_segment_selector(x86_seg_fs, tss.fs) ||
1470 hvm_load_segment_selector(x86_seg_gs, tss.gs) )
1471 exn_raised = 1;
1473 rc = hvm_copy_to_guest_virt(
1474 tr.base, &tss, sizeof(tss), PFEC_page_present);
1475 if ( rc == HVMCOPY_bad_gva_to_gfn )
1476 exn_raised = 1;
1478 if ( (tss.trace & 1) && !exn_raised )
1479 hvm_inject_exception(TRAP_debug, tss_sel & 0xfff8, 0);
1481 tr.attr.fields.type = 0xb; /* busy 32-bit tss */
1482 hvm_set_segment_register(v, x86_seg_tr, &tr);
1484 v->arch.hvm_vcpu.guest_cr[0] |= X86_CR0_TS;
1485 hvm_update_guest_cr(v, 0);
1487 if ( (taskswitch_reason == TSW_iret) ||
1488 (taskswitch_reason == TSW_jmp) )
1489 clear_bit(41, optss_desc); /* clear B flag of old task */
1491 if ( taskswitch_reason != TSW_iret )
1492 set_bit(41, nptss_desc); /* set B flag of new task */
1494 if ( errcode >= 0 )
1496 struct segment_register reg;
1497 unsigned long linear_addr;
1498 regs->esp -= 4;
1499 hvm_get_segment_register(current, x86_seg_ss, &reg);
1500 /* Todo: do not ignore access faults here. */
1501 if ( hvm_virtual_to_linear_addr(x86_seg_ss, &reg, regs->esp,
1502 4, hvm_access_write, 32,
1503 &linear_addr) )
1504 hvm_copy_to_guest_virt_nofault(linear_addr, &errcode, 4, 0);
1507 out:
1508 hvm_unmap_entry(optss_desc);
1509 hvm_unmap_entry(nptss_desc);
1512 #define HVMCOPY_from_guest (0u<<0)
1513 #define HVMCOPY_to_guest (1u<<0)
1514 #define HVMCOPY_no_fault (0u<<1)
1515 #define HVMCOPY_fault (1u<<1)
1516 #define HVMCOPY_phys (0u<<2)
1517 #define HVMCOPY_virt (1u<<2)
1518 static enum hvm_copy_result __hvm_copy(
1519 void *buf, paddr_t addr, int size, unsigned int flags, uint32_t pfec)
1521 struct vcpu *curr = current;
1522 unsigned long gfn, mfn;
1523 p2m_type_t p2mt;
1524 char *p;
1525 int count, todo = size;
1527 while ( todo > 0 )
1529 count = min_t(int, PAGE_SIZE - (addr & ~PAGE_MASK), todo);
1531 if ( flags & HVMCOPY_virt )
1533 gfn = paging_gva_to_gfn(curr, addr, &pfec);
1534 if ( gfn == INVALID_GFN )
1536 if ( flags & HVMCOPY_fault )
1537 hvm_inject_exception(TRAP_page_fault, pfec, addr);
1538 return HVMCOPY_bad_gva_to_gfn;
1541 else
1543 gfn = addr >> PAGE_SHIFT;
1546 mfn = mfn_x(gfn_to_mfn_current(gfn, &p2mt));
1548 if ( !p2m_is_ram(p2mt) )
1549 return HVMCOPY_bad_gfn_to_mfn;
1550 ASSERT(mfn_valid(mfn));
1552 p = (char *)map_domain_page(mfn) + (addr & ~PAGE_MASK);
1554 if ( flags & HVMCOPY_to_guest )
1556 if ( p2mt == p2m_ram_ro )
1558 static unsigned long lastpage;
1559 if ( xchg(&lastpage, gfn) != gfn )
1560 gdprintk(XENLOG_DEBUG, "guest attempted write to read-only"
1561 " memory page. gfn=%#lx, mfn=%#lx\n",
1562 gfn, mfn);
1564 else
1566 memcpy(p, buf, count);
1567 paging_mark_dirty(curr->domain, mfn);
1570 else
1572 memcpy(buf, p, count);
1575 unmap_domain_page(p);
1577 addr += count;
1578 buf += count;
1579 todo -= count;
1582 return HVMCOPY_okay;
1585 enum hvm_copy_result hvm_copy_to_guest_phys(
1586 paddr_t paddr, void *buf, int size)
1588 return __hvm_copy(buf, paddr, size,
1589 HVMCOPY_to_guest | HVMCOPY_fault | HVMCOPY_phys,
1590 0);
1593 enum hvm_copy_result hvm_copy_from_guest_phys(
1594 void *buf, paddr_t paddr, int size)
1596 return __hvm_copy(buf, paddr, size,
1597 HVMCOPY_from_guest | HVMCOPY_fault | HVMCOPY_phys,
1598 0);
1601 enum hvm_copy_result hvm_copy_to_guest_virt(
1602 unsigned long vaddr, void *buf, int size, uint32_t pfec)
1604 return __hvm_copy(buf, vaddr, size,
1605 HVMCOPY_to_guest | HVMCOPY_fault | HVMCOPY_virt,
1606 PFEC_page_present | PFEC_write_access | pfec);
1609 enum hvm_copy_result hvm_copy_from_guest_virt(
1610 void *buf, unsigned long vaddr, int size, uint32_t pfec)
1612 return __hvm_copy(buf, vaddr, size,
1613 HVMCOPY_from_guest | HVMCOPY_fault | HVMCOPY_virt,
1614 PFEC_page_present | pfec);
1617 enum hvm_copy_result hvm_fetch_from_guest_virt(
1618 void *buf, unsigned long vaddr, int size, uint32_t pfec)
1620 if ( hvm_nx_enabled(current) )
1621 pfec |= PFEC_insn_fetch;
1622 return __hvm_copy(buf, vaddr, size,
1623 HVMCOPY_from_guest | HVMCOPY_fault | HVMCOPY_virt,
1624 PFEC_page_present | pfec);
1627 enum hvm_copy_result hvm_copy_to_guest_virt_nofault(
1628 unsigned long vaddr, void *buf, int size, uint32_t pfec)
1630 return __hvm_copy(buf, vaddr, size,
1631 HVMCOPY_to_guest | HVMCOPY_no_fault | HVMCOPY_virt,
1632 PFEC_page_present | PFEC_write_access | pfec);
1635 enum hvm_copy_result hvm_copy_from_guest_virt_nofault(
1636 void *buf, unsigned long vaddr, int size, uint32_t pfec)
1638 return __hvm_copy(buf, vaddr, size,
1639 HVMCOPY_from_guest | HVMCOPY_no_fault | HVMCOPY_virt,
1640 PFEC_page_present | pfec);
1643 enum hvm_copy_result hvm_fetch_from_guest_virt_nofault(
1644 void *buf, unsigned long vaddr, int size, uint32_t pfec)
1646 if ( hvm_nx_enabled(current) )
1647 pfec |= PFEC_insn_fetch;
1648 return __hvm_copy(buf, vaddr, size,
1649 HVMCOPY_from_guest | HVMCOPY_no_fault | HVMCOPY_virt,
1650 PFEC_page_present | pfec);
1653 #ifdef __x86_64__
1654 DEFINE_PER_CPU(bool_t, hvm_64bit_hcall);
1655 #endif
1657 unsigned long copy_to_user_hvm(void *to, const void *from, unsigned int len)
1659 int rc;
1661 #ifdef __x86_64__
1662 if ( !this_cpu(hvm_64bit_hcall) && is_compat_arg_xlat_range(to, len) )
1664 memcpy(to, from, len);
1665 return 0;
1667 #endif
1669 rc = hvm_copy_to_guest_virt_nofault((unsigned long)to, (void *)from,
1670 len, 0);
1671 return rc ? len : 0; /* fake a copy_to_user() return code */
1674 unsigned long copy_from_user_hvm(void *to, const void *from, unsigned len)
1676 int rc;
1678 #ifdef __x86_64__
1679 if ( !this_cpu(hvm_64bit_hcall) && is_compat_arg_xlat_range(from, len) )
1681 memcpy(to, from, len);
1682 return 0;
1684 #endif
1686 rc = hvm_copy_from_guest_virt_nofault(to, (unsigned long)from, len, 0);
1687 return rc ? len : 0; /* fake a copy_from_user() return code */
1690 #define bitmaskof(idx) (1U << ((idx) & 31))
1691 void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
1692 unsigned int *ecx, unsigned int *edx)
1694 struct vcpu *v = current;
1696 if ( cpuid_viridian_leaves(input, eax, ebx, ecx, edx) )
1697 return;
1699 if ( cpuid_hypervisor_leaves(input, eax, ebx, ecx, edx) )
1700 return;
1702 domain_cpuid(v->domain, input, *ecx, eax, ebx, ecx, edx);
1704 switch ( input )
1706 case 0x1:
1707 /* Fix up VLAPIC details. */
1708 *ebx &= 0x00FFFFFFu;
1709 *ebx |= (v->vcpu_id * 2) << 24;
1710 if ( vlapic_hw_disabled(vcpu_vlapic(v)) )
1711 __clear_bit(X86_FEATURE_APIC & 31, edx);
1712 break;
1713 case 0xb:
1714 /* Fix the x2APIC identifier. */
1715 *edx = v->vcpu_id * 2;
1716 break;
1720 void hvm_rdtsc_intercept(struct cpu_user_regs *regs)
1722 uint64_t tsc;
1723 struct vcpu *v = current;
1725 tsc = hvm_get_guest_tsc(v);
1726 regs->eax = (uint32_t)tsc;
1727 regs->edx = (uint32_t)(tsc >> 32);
1730 int hvm_msr_read_intercept(struct cpu_user_regs *regs)
1732 uint32_t ecx = regs->ecx;
1733 uint64_t msr_content = 0;
1734 struct vcpu *v = current;
1735 uint64_t *var_range_base, *fixed_range_base;
1736 int index, mtrr;
1737 uint32_t cpuid[4];
1739 var_range_base = (uint64_t *)v->arch.hvm_vcpu.mtrr.var_ranges;
1740 fixed_range_base = (uint64_t *)v->arch.hvm_vcpu.mtrr.fixed_ranges;
1742 hvm_cpuid(1, &cpuid[0], &cpuid[1], &cpuid[2], &cpuid[3]);
1743 mtrr = !!(cpuid[3] & bitmaskof(X86_FEATURE_MTRR));
1745 switch ( ecx )
1747 case MSR_IA32_TSC:
1748 msr_content = hvm_get_guest_tsc(v);
1749 break;
1751 case MSR_IA32_APICBASE:
1752 msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
1753 break;
1755 case MSR_IA32_MCG_CAP:
1756 case MSR_IA32_MCG_STATUS:
1757 case MSR_IA32_MC0_STATUS:
1758 case MSR_IA32_MC1_STATUS:
1759 case MSR_IA32_MC2_STATUS:
1760 case MSR_IA32_MC3_STATUS:
1761 case MSR_IA32_MC4_STATUS:
1762 case MSR_IA32_MC5_STATUS:
1763 /* No point in letting the guest see real MCEs */
1764 msr_content = 0;
1765 break;
1767 case MSR_IA32_CR_PAT:
1768 msr_content = v->arch.hvm_vcpu.pat_cr;
1769 break;
1771 case MSR_MTRRcap:
1772 if ( !mtrr )
1773 goto gp_fault;
1774 msr_content = v->arch.hvm_vcpu.mtrr.mtrr_cap;
1775 break;
1776 case MSR_MTRRdefType:
1777 if ( !mtrr )
1778 goto gp_fault;
1779 msr_content = v->arch.hvm_vcpu.mtrr.def_type
1780 | (v->arch.hvm_vcpu.mtrr.enabled << 10);
1781 break;
1782 case MSR_MTRRfix64K_00000:
1783 if ( !mtrr )
1784 goto gp_fault;
1785 msr_content = fixed_range_base[0];
1786 break;
1787 case MSR_MTRRfix16K_80000:
1788 case MSR_MTRRfix16K_A0000:
1789 if ( !mtrr )
1790 goto gp_fault;
1791 index = regs->ecx - MSR_MTRRfix16K_80000;
1792 msr_content = fixed_range_base[index + 1];
1793 break;
1794 case MSR_MTRRfix4K_C0000...MSR_MTRRfix4K_F8000:
1795 if ( !mtrr )
1796 goto gp_fault;
1797 index = regs->ecx - MSR_MTRRfix4K_C0000;
1798 msr_content = fixed_range_base[index + 3];
1799 break;
1800 case MSR_IA32_MTRR_PHYSBASE0...MSR_IA32_MTRR_PHYSMASK7:
1801 if ( !mtrr )
1802 goto gp_fault;
1803 index = regs->ecx - MSR_IA32_MTRR_PHYSBASE0;
1804 msr_content = var_range_base[index];
1805 break;
1807 case MSR_K8_ENABLE_C1E:
1808 /* There's no point in letting the guest see C-States.
1809 * Further, this AMD-only register may be accessed if this HVM guest
1810 * has been migrated to an Intel host. This fixes a guest crash
1811 * in this case.
1812 */
1813 msr_content = 0;
1814 break;
1816 default:
1817 return hvm_funcs.msr_read_intercept(regs);
1820 regs->eax = (uint32_t)msr_content;
1821 regs->edx = (uint32_t)(msr_content >> 32);
1822 return X86EMUL_OKAY;
1824 gp_fault:
1825 hvm_inject_exception(TRAP_gp_fault, 0, 0);
1826 return X86EMUL_EXCEPTION;
1829 int hvm_msr_write_intercept(struct cpu_user_regs *regs)
1831 extern bool_t mtrr_var_range_msr_set(
1832 struct mtrr_state *v, u32 msr, u64 msr_content);
1833 extern bool_t mtrr_fix_range_msr_set(
1834 struct mtrr_state *v, int row, u64 msr_content);
1835 extern bool_t mtrr_def_type_msr_set(struct mtrr_state *v, u64 msr_content);
1836 extern bool_t pat_msr_set(u64 *pat, u64 msr);
1838 uint32_t ecx = regs->ecx;
1839 uint64_t msr_content = (uint32_t)regs->eax | ((uint64_t)regs->edx << 32);
1840 struct vcpu *v = current;
1841 int index, mtrr;
1842 uint32_t cpuid[4];
1844 hvm_cpuid(1, &cpuid[0], &cpuid[1], &cpuid[2], &cpuid[3]);
1845 mtrr = !!(cpuid[3] & bitmaskof(X86_FEATURE_MTRR));
1847 switch ( ecx )
1849 case MSR_IA32_TSC:
1850 hvm_set_guest_tsc(v, msr_content);
1851 pt_reset(v);
1852 break;
1854 case MSR_IA32_APICBASE:
1855 vlapic_msr_set(vcpu_vlapic(v), msr_content);
1856 break;
1858 case MSR_IA32_CR_PAT:
1859 if ( !pat_msr_set(&v->arch.hvm_vcpu.pat_cr, msr_content) )
1860 goto gp_fault;
1861 break;
1863 case MSR_MTRRcap:
1864 if ( !mtrr )
1865 goto gp_fault;
1866 goto gp_fault;
1867 case MSR_MTRRdefType:
1868 if ( !mtrr )
1869 goto gp_fault;
1870 if ( !mtrr_def_type_msr_set(&v->arch.hvm_vcpu.mtrr, msr_content) )
1871 goto gp_fault;
1872 break;
1873 case MSR_MTRRfix64K_00000:
1874 if ( !mtrr )
1875 goto gp_fault;
1876 if ( !mtrr_fix_range_msr_set(&v->arch.hvm_vcpu.mtrr, 0, msr_content) )
1877 goto gp_fault;
1878 break;
1879 case MSR_MTRRfix16K_80000:
1880 case MSR_MTRRfix16K_A0000:
1881 if ( !mtrr )
1882 goto gp_fault;
1883 index = regs->ecx - MSR_MTRRfix16K_80000 + 1;
1884 if ( !mtrr_fix_range_msr_set(&v->arch.hvm_vcpu.mtrr,
1885 index, msr_content) )
1886 goto gp_fault;
1887 break;
1888 case MSR_MTRRfix4K_C0000...MSR_MTRRfix4K_F8000:
1889 if ( !mtrr )
1890 goto gp_fault;
1891 index = regs->ecx - MSR_MTRRfix4K_C0000 + 3;
1892 if ( !mtrr_fix_range_msr_set(&v->arch.hvm_vcpu.mtrr,
1893 index, msr_content) )
1894 goto gp_fault;
1895 break;
1896 case MSR_IA32_MTRR_PHYSBASE0...MSR_IA32_MTRR_PHYSMASK7:
1897 if ( !mtrr )
1898 goto gp_fault;
1899 if ( !mtrr_var_range_msr_set(&v->arch.hvm_vcpu.mtrr,
1900 regs->ecx, msr_content) )
1901 goto gp_fault;
1902 break;
1904 default:
1905 return hvm_funcs.msr_write_intercept(regs);
1908 return X86EMUL_OKAY;
1910 gp_fault:
1911 hvm_inject_exception(TRAP_gp_fault, 0, 0);
1912 return X86EMUL_EXCEPTION;
1915 enum hvm_intblk hvm_interrupt_blocked(struct vcpu *v, struct hvm_intack intack)
1917 unsigned long intr_shadow;
1919 ASSERT(v == current);
1921 if ( (intack.source != hvm_intsrc_nmi) &&
1922 !(guest_cpu_user_regs()->eflags & X86_EFLAGS_IF) )
1923 return hvm_intblk_rflags_ie;
1925 intr_shadow = hvm_funcs.get_interrupt_shadow(v);
1927 if ( intr_shadow & (HVM_INTR_SHADOW_STI|HVM_INTR_SHADOW_MOV_SS) )
1928 return hvm_intblk_shadow;
1930 if ( intack.source == hvm_intsrc_nmi )
1931 return ((intr_shadow & HVM_INTR_SHADOW_NMI) ?
1932 hvm_intblk_nmi_iret : hvm_intblk_none);
1934 if ( intack.source == hvm_intsrc_lapic )
1936 uint32_t tpr = vlapic_get_reg(vcpu_vlapic(v), APIC_TASKPRI) & 0xF0;
1937 if ( (tpr >> 4) >= (intack.vector >> 4) )
1938 return hvm_intblk_tpr;
1941 return hvm_intblk_none;
1944 static long hvm_grant_table_op(
1945 unsigned int cmd, XEN_GUEST_HANDLE(void) uop, unsigned int count)
1947 if ( (cmd != GNTTABOP_query_size) && (cmd != GNTTABOP_setup_table) )
1948 return -ENOSYS; /* all other commands need auditing */
1949 return do_grant_table_op(cmd, uop, count);
1952 static long hvm_memory_op(int cmd, XEN_GUEST_HANDLE(void) arg)
1954 long rc = do_memory_op(cmd, arg);
1955 if ( (cmd & MEMOP_CMD_MASK) == XENMEM_decrease_reservation )
1956 current->domain->arch.hvm_domain.qemu_mapcache_invalidate = 1;
1957 return rc;
1960 static long hvm_vcpu_op(
1961 int cmd, int vcpuid, XEN_GUEST_HANDLE(void) arg)
1963 long rc;
1965 switch ( cmd )
1967 case VCPUOP_register_runstate_memory_area:
1968 case VCPUOP_get_runstate_info:
1969 rc = do_vcpu_op(cmd, vcpuid, arg);
1970 break;
1971 default:
1972 rc = -ENOSYS;
1973 break;
1976 return rc;
1979 typedef unsigned long hvm_hypercall_t(
1980 unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
1982 #define HYPERCALL(x) \
1983 [ __HYPERVISOR_ ## x ] = (hvm_hypercall_t *) do_ ## x
1985 #if defined(__i386__)
1987 static hvm_hypercall_t *hvm_hypercall32_table[NR_hypercalls] = {
1988 [ __HYPERVISOR_memory_op ] = (hvm_hypercall_t *)hvm_memory_op,
1989 [ __HYPERVISOR_grant_table_op ] = (hvm_hypercall_t *)hvm_grant_table_op,
1990 [ __HYPERVISOR_vcpu_op ] = (hvm_hypercall_t *)hvm_vcpu_op,
1991 HYPERCALL(xen_version),
1992 HYPERCALL(event_channel_op),
1993 HYPERCALL(sched_op),
1994 HYPERCALL(hvm_op)
1995 };
1997 #else /* defined(__x86_64__) */
1999 static long hvm_memory_op_compat32(int cmd, XEN_GUEST_HANDLE(void) arg)
2001 long rc = compat_memory_op(cmd, arg);
2002 if ( (cmd & MEMOP_CMD_MASK) == XENMEM_decrease_reservation )
2003 current->domain->arch.hvm_domain.qemu_mapcache_invalidate = 1;
2004 return rc;
2007 static long hvm_vcpu_op_compat32(
2008 int cmd, int vcpuid, XEN_GUEST_HANDLE(void) arg)
2010 long rc;
2012 switch ( cmd )
2014 case VCPUOP_register_runstate_memory_area:
2015 case VCPUOP_get_runstate_info:
2016 rc = compat_vcpu_op(cmd, vcpuid, arg);
2017 break;
2018 default:
2019 rc = -ENOSYS;
2020 break;
2023 return rc;
2026 static hvm_hypercall_t *hvm_hypercall64_table[NR_hypercalls] = {
2027 [ __HYPERVISOR_memory_op ] = (hvm_hypercall_t *)hvm_memory_op,
2028 [ __HYPERVISOR_grant_table_op ] = (hvm_hypercall_t *)hvm_grant_table_op,
2029 [ __HYPERVISOR_vcpu_op ] = (hvm_hypercall_t *)hvm_vcpu_op,
2030 HYPERCALL(xen_version),
2031 HYPERCALL(event_channel_op),
2032 HYPERCALL(sched_op),
2033 HYPERCALL(hvm_op)
2034 };
2036 static hvm_hypercall_t *hvm_hypercall32_table[NR_hypercalls] = {
2037 [ __HYPERVISOR_memory_op ] = (hvm_hypercall_t *)hvm_memory_op_compat32,
2038 [ __HYPERVISOR_grant_table_op ] = (hvm_hypercall_t *)hvm_grant_table_op,
2039 [ __HYPERVISOR_vcpu_op ] = (hvm_hypercall_t *)hvm_vcpu_op_compat32,
2040 HYPERCALL(xen_version),
2041 HYPERCALL(event_channel_op),
2042 HYPERCALL(sched_op),
2043 HYPERCALL(hvm_op)
2044 };
2046 #endif /* defined(__x86_64__) */
2048 int hvm_do_hypercall(struct cpu_user_regs *regs)
2050 struct vcpu *curr = current;
2051 struct segment_register sreg;
2052 int mode = hvm_guest_x86_mode(curr);
2053 uint32_t eax = regs->eax;
2055 switch ( mode )
2057 #ifdef __x86_64__
2058 case 8:
2059 #endif
2060 case 4:
2061 case 2:
2062 hvm_get_segment_register(curr, x86_seg_ss, &sreg);
2063 if ( unlikely(sreg.attr.fields.dpl == 3) )
2065 default:
2066 regs->eax = -EPERM;
2067 return HVM_HCALL_completed;
2069 case 0:
2070 break;
2073 if ( (eax & 0x80000000) && is_viridian_domain(curr->domain) )
2074 return viridian_hypercall(regs);
2076 if ( (eax >= NR_hypercalls) || !hvm_hypercall32_table[eax] )
2078 regs->eax = -ENOSYS;
2079 return HVM_HCALL_completed;
2082 this_cpu(hc_preempted) = 0;
2084 #ifdef __x86_64__
2085 if ( mode == 8 )
2087 HVM_DBG_LOG(DBG_LEVEL_HCALL, "hcall%u(%lx, %lx, %lx, %lx, %lx)", eax,
2088 regs->rdi, regs->rsi, regs->rdx, regs->r10, regs->r8);
2090 this_cpu(hvm_64bit_hcall) = 1;
2091 regs->rax = hvm_hypercall64_table[eax](regs->rdi,
2092 regs->rsi,
2093 regs->rdx,
2094 regs->r10,
2095 regs->r8);
2096 this_cpu(hvm_64bit_hcall) = 0;
2098 else
2099 #endif
2101 HVM_DBG_LOG(DBG_LEVEL_HCALL, "hcall%u(%x, %x, %x, %x, %x)", eax,
2102 (uint32_t)regs->ebx, (uint32_t)regs->ecx,
2103 (uint32_t)regs->edx, (uint32_t)regs->esi,
2104 (uint32_t)regs->edi);
2106 regs->eax = hvm_hypercall32_table[eax]((uint32_t)regs->ebx,
2107 (uint32_t)regs->ecx,
2108 (uint32_t)regs->edx,
2109 (uint32_t)regs->esi,
2110 (uint32_t)regs->edi);
2113 HVM_DBG_LOG(DBG_LEVEL_HCALL, "hcall%u -> %lx",
2114 eax, (unsigned long)regs->eax);
2116 if ( this_cpu(hc_preempted) )
2117 return HVM_HCALL_preempted;
2119 if ( unlikely(curr->domain->arch.hvm_domain.qemu_mapcache_invalidate) &&
2120 test_and_clear_bool(curr->domain->arch.hvm_domain.
2121 qemu_mapcache_invalidate) )
2122 return HVM_HCALL_invalidate;
2124 return HVM_HCALL_completed;
2127 static void hvm_latch_shinfo_size(struct domain *d)
2129 /*
2130 * Called from operations which are among the very first executed by
2131 * PV drivers on initialisation or after save/restore. These are sensible
2132 * points at which to sample the execution mode of the guest and latch
2133 * 32- or 64-bit format for shared state.
2134 */
2135 if ( current->domain == d )
2136 d->arch.has_32bit_shinfo = (hvm_guest_x86_mode(current) != 8);
2139 /* Initialise a hypercall transfer page for a VMX domain using
2140 paravirtualised drivers. */
2141 void hvm_hypercall_page_initialise(struct domain *d,
2142 void *hypercall_page)
2144 hvm_latch_shinfo_size(d);
2145 hvm_funcs.init_hypercall_page(d, hypercall_page);
2148 static int hvmop_set_pci_intx_level(
2149 XEN_GUEST_HANDLE(xen_hvm_set_pci_intx_level_t) uop)
2151 struct xen_hvm_set_pci_intx_level op;
2152 struct domain *d;
2153 int rc;
2155 if ( copy_from_guest(&op, uop, 1) )
2156 return -EFAULT;
2158 if ( (op.domain > 0) || (op.bus > 0) || (op.device > 31) || (op.intx > 3) )
2159 return -EINVAL;
2161 d = rcu_lock_domain_by_id(op.domid);
2162 if ( d == NULL )
2163 return -ESRCH;
2165 rc = -EPERM;
2166 if ( !IS_PRIV_FOR(current->domain, d) )
2167 goto out;
2169 rc = -EINVAL;
2170 if ( !is_hvm_domain(d) )
2171 goto out;
2173 rc = xsm_hvm_set_pci_intx_level(d);
2174 if ( rc )
2175 goto out;
2177 rc = 0;
2178 switch ( op.level )
2180 case 0:
2181 hvm_pci_intx_deassert(d, op.device, op.intx);
2182 break;
2183 case 1:
2184 hvm_pci_intx_assert(d, op.device, op.intx);
2185 break;
2186 default:
2187 rc = -EINVAL;
2188 break;
2191 out:
2192 rcu_unlock_domain(d);
2193 return rc;
2196 void hvm_vcpu_reset_state(struct vcpu *v, uint16_t cs, uint16_t ip)
2198 struct domain *d = v->domain;
2199 struct vcpu_guest_context *ctxt;
2200 struct segment_register reg;
2202 BUG_ON(vcpu_runnable(v));
2204 domain_lock(d);
2206 if ( v->is_initialised )
2207 goto out;
2209 if ( !paging_mode_hap(d) )
2211 if ( v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG )
2212 put_page(pagetable_get_page(v->arch.guest_table));
2213 v->arch.guest_table = pagetable_null();
2216 ctxt = &v->arch.guest_context;
2217 memset(ctxt, 0, sizeof(*ctxt));
2218 ctxt->flags = VGCF_online;
2219 ctxt->user_regs.eflags = 2;
2220 ctxt->user_regs.edx = 0x00000f00;
2221 ctxt->user_regs.eip = ip;
2223 v->arch.hvm_vcpu.guest_cr[0] = X86_CR0_ET;
2224 hvm_update_guest_cr(v, 0);
2226 v->arch.hvm_vcpu.guest_cr[2] = 0;
2227 hvm_update_guest_cr(v, 2);
2229 v->arch.hvm_vcpu.guest_cr[3] = 0;
2230 hvm_update_guest_cr(v, 3);
2232 v->arch.hvm_vcpu.guest_cr[4] = 0;
2233 hvm_update_guest_cr(v, 4);
2235 v->arch.hvm_vcpu.guest_efer = 0;
2236 hvm_update_guest_efer(v);
2238 reg.sel = cs;
2239 reg.base = (uint32_t)reg.sel << 4;
2240 reg.limit = 0xffff;
2241 reg.attr.bytes = 0x09b;
2242 hvm_set_segment_register(v, x86_seg_cs, &reg);
2244 reg.sel = reg.base = 0;
2245 reg.limit = 0xffff;
2246 reg.attr.bytes = 0x093;
2247 hvm_set_segment_register(v, x86_seg_ds, &reg);
2248 hvm_set_segment_register(v, x86_seg_es, &reg);
2249 hvm_set_segment_register(v, x86_seg_fs, &reg);
2250 hvm_set_segment_register(v, x86_seg_gs, &reg);
2251 hvm_set_segment_register(v, x86_seg_ss, &reg);
2253 reg.attr.bytes = 0x82; /* LDT */
2254 hvm_set_segment_register(v, x86_seg_ldtr, &reg);
2256 reg.attr.bytes = 0x8b; /* 32-bit TSS (busy) */
2257 hvm_set_segment_register(v, x86_seg_tr, &reg);
2259 reg.attr.bytes = 0;
2260 hvm_set_segment_register(v, x86_seg_gdtr, &reg);
2261 hvm_set_segment_register(v, x86_seg_idtr, &reg);
2263 /* Sync AP's TSC with BSP's. */
2264 v->arch.hvm_vcpu.cache_tsc_offset =
2265 v->domain->vcpu[0]->arch.hvm_vcpu.cache_tsc_offset;
2266 hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset);
2268 paging_update_paging_modes(v);
2270 v->arch.flags |= TF_kernel_mode;
2271 v->is_initialised = 1;
2272 clear_bit(_VPF_down, &v->pause_flags);
2274 out:
2275 domain_unlock(d);
2278 static void hvm_s3_suspend(struct domain *d)
2280 struct vcpu *v;
2282 domain_pause(d);
2283 domain_lock(d);
2285 if ( d->is_dying || (d->vcpu[0] == NULL) ||
2286 test_and_set_bool(d->arch.hvm_domain.is_s3_suspended) )
2288 domain_unlock(d);
2289 domain_unpause(d);
2290 return;
2293 for_each_vcpu ( d, v )
2295 vlapic_reset(vcpu_vlapic(v));
2296 vcpu_reset(v);
2299 vpic_reset(d);
2300 vioapic_reset(d);
2301 pit_reset(d);
2302 rtc_reset(d);
2303 pmtimer_reset(d);
2304 hpet_reset(d);
2306 hvm_vcpu_reset_state(d->vcpu[0], 0xf000, 0xfff0);
2308 domain_unlock(d);
2311 static void hvm_s3_resume(struct domain *d)
2313 if ( test_and_clear_bool(d->arch.hvm_domain.is_s3_suspended) )
2314 domain_unpause(d);
2317 static int hvmop_set_isa_irq_level(
2318 XEN_GUEST_HANDLE(xen_hvm_set_isa_irq_level_t) uop)
2320 struct xen_hvm_set_isa_irq_level op;
2321 struct domain *d;
2322 int rc;
2324 if ( copy_from_guest(&op, uop, 1) )
2325 return -EFAULT;
2327 if ( op.isa_irq > 15 )
2328 return -EINVAL;
2330 d = rcu_lock_domain_by_id(op.domid);
2331 if ( d == NULL )
2332 return -ESRCH;
2334 rc = -EPERM;
2335 if ( !IS_PRIV_FOR(current->domain, d) )
2336 goto out;
2338 rc = -EINVAL;
2339 if ( !is_hvm_domain(d) )
2340 goto out;
2342 rc = xsm_hvm_set_isa_irq_level(d);
2343 if ( rc )
2344 goto out;
2346 rc = 0;
2347 switch ( op.level )
2349 case 0:
2350 hvm_isa_irq_deassert(d, op.isa_irq);
2351 break;
2352 case 1:
2353 hvm_isa_irq_assert(d, op.isa_irq);
2354 break;
2355 default:
2356 rc = -EINVAL;
2357 break;
2360 out:
2361 rcu_unlock_domain(d);
2362 return rc;
2365 static int hvmop_set_pci_link_route(
2366 XEN_GUEST_HANDLE(xen_hvm_set_pci_link_route_t) uop)
2368 struct xen_hvm_set_pci_link_route op;
2369 struct domain *d;
2370 int rc;
2372 if ( copy_from_guest(&op, uop, 1) )
2373 return -EFAULT;
2375 if ( (op.link > 3) || (op.isa_irq > 15) )
2376 return -EINVAL;
2378 d = rcu_lock_domain_by_id(op.domid);
2379 if ( d == NULL )
2380 return -ESRCH;
2382 rc = -EPERM;
2383 if ( !IS_PRIV_FOR(current->domain, d) )
2384 goto out;
2386 rc = -EINVAL;
2387 if ( !is_hvm_domain(d) )
2388 goto out;
2390 rc = xsm_hvm_set_pci_link_route(d);
2391 if ( rc )
2392 goto out;
2394 rc = 0;
2395 hvm_set_pci_link_route(d, op.link, op.isa_irq);
2397 out:
2398 rcu_unlock_domain(d);
2399 return rc;
2402 static int hvmop_flush_tlb_all(void)
2404 struct domain *d = current->domain;
2405 struct vcpu *v;
2407 if ( !is_hvm_domain(d) )
2408 return -EINVAL;
2410 /* Avoid deadlock if more than one vcpu tries this at the same time. */
2411 if ( !spin_trylock(&d->hypercall_deadlock_mutex) )
2412 return -EAGAIN;
2414 /* Pause all other vcpus. */
2415 for_each_vcpu ( d, v )
2416 if ( v != current )
2417 vcpu_pause_nosync(v);
2419 /* Now that all VCPUs are signalled to deschedule, we wait... */
2420 for_each_vcpu ( d, v )
2421 if ( v != current )
2422 while ( !vcpu_runnable(v) && v->is_running )
2423 cpu_relax();
2425 /* All other vcpus are paused, safe to unlock now. */
2426 spin_unlock(&d->hypercall_deadlock_mutex);
2428 /* Flush paging-mode soft state (e.g., va->gfn cache; PAE PDPE cache). */
2429 for_each_vcpu ( d, v )
2430 paging_update_cr3(v);
2432 /* Flush all dirty TLBs. */
2433 flush_tlb_mask(&d->domain_dirty_cpumask);
2435 /* Done. */
2436 for_each_vcpu ( d, v )
2437 if ( v != current )
2438 vcpu_unpause(v);
2440 return 0;
2443 long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE(void) arg)
2446 struct domain *curr_d = current->domain;
2447 long rc = 0;
2449 switch ( op )
2451 case HVMOP_set_param:
2452 case HVMOP_get_param:
2454 struct xen_hvm_param a;
2455 struct hvm_ioreq_page *iorp;
2456 struct domain *d;
2457 struct vcpu *v;
2459 if ( copy_from_guest(&a, arg, 1) )
2460 return -EFAULT;
2462 if ( a.index >= HVM_NR_PARAMS )
2463 return -EINVAL;
2465 rc = rcu_lock_target_domain_by_id(a.domid, &d);
2466 if ( rc != 0 )
2467 return rc;
2469 rc = -EINVAL;
2470 if ( !is_hvm_domain(d) )
2471 goto param_fail;
2473 rc = xsm_hvm_param(d, op);
2474 if ( rc )
2475 goto param_fail;
2477 if ( op == HVMOP_set_param )
2479 rc = 0;
2481 switch ( a.index )
2483 case HVM_PARAM_IOREQ_PFN:
2484 iorp = &d->arch.hvm_domain.ioreq;
2485 if ( (rc = hvm_set_ioreq_page(d, iorp, a.value)) != 0 )
2486 break;
2487 spin_lock(&iorp->lock);
2488 if ( iorp->va != NULL )
2489 /* Initialise evtchn port info if VCPUs already created. */
2490 for_each_vcpu ( d, v )
2491 get_ioreq(v)->vp_eport = v->arch.hvm_vcpu.xen_port;
2492 spin_unlock(&iorp->lock);
2493 break;
2494 case HVM_PARAM_BUFIOREQ_PFN:
2495 iorp = &d->arch.hvm_domain.buf_ioreq;
2496 rc = hvm_set_ioreq_page(d, iorp, a.value);
2497 break;
2498 case HVM_PARAM_CALLBACK_IRQ:
2499 hvm_set_callback_via(d, a.value);
2500 hvm_latch_shinfo_size(d);
2501 break;
2502 case HVM_PARAM_TIMER_MODE:
2503 if ( a.value > HVMPTM_one_missed_tick_pending )
2504 rc = -EINVAL;
2505 break;
2506 case HVM_PARAM_VIRIDIAN:
2507 if ( a.value > 1 )
2508 rc = -EINVAL;
2509 break;
2510 case HVM_PARAM_IDENT_PT:
2511 /* Not reflexive, as we must domain_pause(). */
2512 rc = -EPERM;
2513 if ( curr_d == d )
2514 break;
2516 rc = -EINVAL;
2517 if ( d->arch.hvm_domain.params[a.index] != 0 )
2518 break;
2520 rc = 0;
2521 if ( !paging_mode_hap(d) )
2522 break;
2524 /*
2525 * Update GUEST_CR3 in each VMCS to point at identity map.
2526 * All foreign updates to guest state must synchronise on
2527 * the domctl_lock.
2528 */
2529 rc = -EAGAIN;
2530 if ( !domctl_lock_acquire() )
2531 break;
2533 rc = 0;
2534 domain_pause(d);
2535 d->arch.hvm_domain.params[a.index] = a.value;
2536 for_each_vcpu ( d, v )
2537 paging_update_cr3(v);
2538 domain_unpause(d);
2540 domctl_lock_release();
2541 break;
2542 case HVM_PARAM_DM_DOMAIN:
2543 /* Not reflexive, as we must domain_pause(). */
2544 rc = -EPERM;
2545 if ( curr_d == d )
2546 break;
2548 if ( a.value == DOMID_SELF )
2549 a.value = curr_d->domain_id;
2551 rc = 0;
2552 domain_pause(d); /* safe to change per-vcpu xen_port */
2553 iorp = &d->arch.hvm_domain.ioreq;
2554 for_each_vcpu ( d, v )
2556 int old_port, new_port;
2557 new_port = alloc_unbound_xen_event_channel(v, a.value);
2558 if ( new_port < 0 )
2560 rc = new_port;
2561 break;
2563 /* xchg() ensures that only we free_xen_event_channel() */
2564 old_port = xchg(&v->arch.hvm_vcpu.xen_port, new_port);
2565 free_xen_event_channel(v, old_port);
2566 spin_lock(&iorp->lock);
2567 if ( iorp->va != NULL )
2568 get_ioreq(v)->vp_eport = v->arch.hvm_vcpu.xen_port;
2569 spin_unlock(&iorp->lock);
2571 domain_unpause(d);
2572 break;
2573 case HVM_PARAM_ACPI_S_STATE:
2574 /* Not reflexive, as we must domain_pause(). */
2575 rc = -EPERM;
2576 if ( curr_d == d )
2577 break;
2579 rc = 0;
2580 if ( a.value == 3 )
2581 hvm_s3_suspend(d);
2582 else if ( a.value == 0 )
2583 hvm_s3_resume(d);
2584 else
2585 rc = -EINVAL;
2587 break;
2590 if ( rc == 0 )
2591 d->arch.hvm_domain.params[a.index] = a.value;
2593 else
2595 switch ( a.index )
2597 case HVM_PARAM_ACPI_S_STATE:
2598 a.value = d->arch.hvm_domain.is_s3_suspended ? 3 : 0;
2599 break;
2600 default:
2601 a.value = d->arch.hvm_domain.params[a.index];
2602 break;
2604 rc = copy_to_guest(arg, &a, 1) ? -EFAULT : 0;
2607 HVM_DBG_LOG(DBG_LEVEL_HCALL, "%s param %u = %"PRIx64,
2608 op == HVMOP_set_param ? "set" : "get",
2609 a.index, a.value);
2611 param_fail:
2612 rcu_unlock_domain(d);
2613 break;
2616 case HVMOP_set_pci_intx_level:
2617 rc = hvmop_set_pci_intx_level(
2618 guest_handle_cast(arg, xen_hvm_set_pci_intx_level_t));
2619 break;
2621 case HVMOP_set_isa_irq_level:
2622 rc = hvmop_set_isa_irq_level(
2623 guest_handle_cast(arg, xen_hvm_set_isa_irq_level_t));
2624 break;
2626 case HVMOP_set_pci_link_route:
2627 rc = hvmop_set_pci_link_route(
2628 guest_handle_cast(arg, xen_hvm_set_pci_link_route_t));
2629 break;
2631 case HVMOP_flush_tlbs:
2632 rc = guest_handle_is_null(arg) ? hvmop_flush_tlb_all() : -ENOSYS;
2633 break;
2635 case HVMOP_track_dirty_vram:
2637 struct xen_hvm_track_dirty_vram a;
2638 struct domain *d;
2640 if ( copy_from_guest(&a, arg, 1) )
2641 return -EFAULT;
2643 rc = rcu_lock_target_domain_by_id(a.domid, &d);
2644 if ( rc != 0 )
2645 return rc;
2647 rc = -EINVAL;
2648 if ( !is_hvm_domain(d) )
2649 goto param_fail2;
2651 rc = xsm_hvm_param(d, op);
2652 if ( rc )
2653 goto param_fail2;
2655 rc = -ESRCH;
2656 if ( d->is_dying )
2657 goto param_fail2;
2659 rc = -EINVAL;
2660 if ( d->vcpu[0] == NULL )
2661 goto param_fail2;
2663 if ( shadow_mode_enabled(d) )
2664 rc = shadow_track_dirty_vram(d, a.first_pfn, a.nr, a.dirty_bitmap);
2665 else
2666 rc = hap_track_dirty_vram(d, a.first_pfn, a.nr, a.dirty_bitmap);
2668 param_fail2:
2669 rcu_unlock_domain(d);
2670 break;
2673 case HVMOP_modified_memory:
2675 struct xen_hvm_modified_memory a;
2676 struct domain *d;
2677 unsigned long pfn;
2679 if ( copy_from_guest(&a, arg, 1) )
2680 return -EFAULT;
2682 rc = rcu_lock_target_domain_by_id(a.domid, &d);
2683 if ( rc != 0 )
2684 return rc;
2686 rc = -EINVAL;
2687 if ( !is_hvm_domain(d) )
2688 goto param_fail3;
2690 rc = xsm_hvm_param(d, op);
2691 if ( rc )
2692 goto param_fail3;
2694 rc = -EINVAL;
2695 if ( (a.first_pfn > domain_get_maximum_gpfn(d)) ||
2696 ((a.first_pfn + a.nr - 1) < a.first_pfn) ||
2697 ((a.first_pfn + a.nr - 1) > domain_get_maximum_gpfn(d)) )
2698 goto param_fail3;
2700 rc = 0;
2701 if ( !paging_mode_log_dirty(d) )
2702 goto param_fail3;
2704 for ( pfn = a.first_pfn; pfn < a.first_pfn + a.nr; pfn++ )
2706 p2m_type_t t;
2707 mfn_t mfn = gfn_to_mfn(d, pfn, &t);
2708 if ( mfn_x(mfn) != INVALID_MFN )
2710 paging_mark_dirty(d, mfn_x(mfn));
2711 /* These are most probably not page tables any more */
2712 /* don't take a long time and don't die either */
2713 sh_remove_shadows(d->vcpu[0], mfn, 1, 0);
2717 param_fail3:
2718 rcu_unlock_domain(d);
2719 break;
2722 case HVMOP_set_mem_type:
2724 struct xen_hvm_set_mem_type a;
2725 struct domain *d;
2726 unsigned long pfn;
2728 /* Interface types to internal p2m types */
2729 p2m_type_t memtype[] = {
2730 p2m_ram_rw, /* HVMMEM_ram_rw */
2731 p2m_ram_ro, /* HVMMEM_ram_ro */
2732 p2m_mmio_dm /* HVMMEM_mmio_dm */
2733 };
2735 if ( copy_from_guest(&a, arg, 1) )
2736 return -EFAULT;
2738 rc = rcu_lock_target_domain_by_id(a.domid, &d);
2739 if ( rc != 0 )
2740 return rc;
2742 rc = -EINVAL;
2743 if ( !is_hvm_domain(d) )
2744 goto param_fail4;
2746 rc = -EINVAL;
2747 if ( (a.first_pfn > domain_get_maximum_gpfn(d)) ||
2748 ((a.first_pfn + a.nr - 1) < a.first_pfn) ||
2749 ((a.first_pfn + a.nr - 1) > domain_get_maximum_gpfn(d)) )
2750 goto param_fail4;
2752 if ( a.hvmmem_type >= ARRAY_SIZE(memtype) )
2753 goto param_fail4;
2755 rc = 0;
2757 for ( pfn = a.first_pfn; pfn < a.first_pfn + a.nr; pfn++ )
2759 p2m_type_t t;
2760 mfn_t mfn;
2761 mfn = gfn_to_mfn(d, pfn, &t);
2762 p2m_change_type(d, pfn, t, memtype[a.hvmmem_type]);
2765 param_fail4:
2766 rcu_unlock_domain(d);
2767 break;
2770 default:
2772 gdprintk(XENLOG_WARNING, "Bad HVM op %ld.\n", op);
2773 rc = -ENOSYS;
2774 break;
2778 if ( rc == -EAGAIN )
2779 rc = hypercall_create_continuation(
2780 __HYPERVISOR_hvm_op, "lh", op, arg);
2782 return rc;
2785 int hvm_debug_op(struct vcpu *v, int32_t op)
2787 int rc;
2789 switch ( op )
2791 case XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_ON:
2792 case XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_OFF:
2793 rc = -ENOSYS;
2794 if ( !cpu_has_monitor_trap_flag )
2795 break;
2796 rc = 0;
2797 vcpu_pause(v);
2798 v->arch.hvm_vcpu.single_step =
2799 (op == XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_ON);
2800 vcpu_unpause(v); /* guest will latch new state */
2801 break;
2802 default:
2803 rc = -ENOSYS;
2804 break;
2807 return rc;
2811 /*
2812 * Local variables:
2813 * mode: C
2814 * c-set-style: "BSD"
2815 * c-basic-offset: 4
2816 * tab-width: 4
2817 * indent-tabs-mode: nil
2818 * End:
2819 */