ia64/xen-unstable

view xen/arch/x86/hvm/hvm.c @ 18432:1e98ea5c8604

x86: Fix guest_handle_okay/guest_handle_subrange_okay

The guest handle checks should use paging_* predicates, not shadow_*.
Also tidy up a few places where p2m definitions were being imported
via asm/guest_access.h -> asm/shadow.h -> asm/p2m.h

Signed-off-by: Tim Deegan <Tim.Deegan@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Wed Sep 03 14:16:35 2008 +0100 (2008-09-03)
parents f454f2cac170
children 1abe2bf8c0fa
line source
1 /*
2 * hvm.c: Common hardware virtual machine abstractions.
3 *
4 * Copyright (c) 2004, Intel Corporation.
5 * Copyright (c) 2005, International Business Machines Corporation.
6 * Copyright (c) 2008, Citrix Systems, Inc.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License along with
18 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
19 * Place - Suite 330, Boston, MA 02111-1307 USA.
20 */
22 #include <xen/config.h>
23 #include <xen/init.h>
24 #include <xen/lib.h>
25 #include <xen/trace.h>
26 #include <xen/sched.h>
27 #include <xen/irq.h>
28 #include <xen/softirq.h>
29 #include <xen/domain.h>
30 #include <xen/domain_page.h>
31 #include <xen/hypercall.h>
32 #include <xen/guest_access.h>
33 #include <xen/event.h>
34 #include <xen/paging.h>
35 #include <asm/shadow.h>
36 #include <asm/current.h>
37 #include <asm/e820.h>
38 #include <asm/io.h>
39 #include <asm/regs.h>
40 #include <asm/cpufeature.h>
41 #include <asm/processor.h>
42 #include <asm/types.h>
43 #include <asm/msr.h>
44 #include <asm/mc146818rtc.h>
45 #include <asm/spinlock.h>
46 #include <asm/hvm/hvm.h>
47 #include <asm/hvm/vpt.h>
48 #include <asm/hvm/support.h>
49 #include <asm/hvm/cacheattr.h>
50 #include <asm/hvm/trace.h>
51 #include <public/sched.h>
52 #include <public/hvm/ioreq.h>
53 #include <public/version.h>
54 #include <public/memory.h>
56 int hvm_enabled __read_mostly;
58 unsigned int opt_hvm_debug_level __read_mostly;
59 integer_param("hvm_debug", opt_hvm_debug_level);
61 int opt_softtsc;
62 boolean_param("softtsc", opt_softtsc);
64 struct hvm_function_table hvm_funcs __read_mostly;
66 /* I/O permission bitmap is globally shared by all HVM guests. */
67 unsigned long __attribute__ ((__section__ (".bss.page_aligned")))
68 hvm_io_bitmap[3*PAGE_SIZE/BYTES_PER_LONG];
70 void hvm_enable(struct hvm_function_table *fns)
71 {
72 BUG_ON(hvm_enabled);
73 printk("HVM: %s enabled\n", fns->name);
75 /*
76 * Allow direct access to the PC debug port (it is often used for I/O
77 * delays, but the vmexits simply slow things down).
78 */
79 memset(hvm_io_bitmap, ~0, sizeof(hvm_io_bitmap));
80 __clear_bit(0x80, hvm_io_bitmap);
82 hvm_funcs = *fns;
83 hvm_enabled = 1;
85 if ( hvm_funcs.hap_supported )
86 printk("HVM: Hardware Assisted Paging detected.\n");
87 }
89 /*
90 * Need to re-inject a given event? We avoid re-injecting software exceptions
91 * and interrupts because the faulting/trapping instruction can simply be
92 * re-executed (neither VMX nor SVM update RIP when they VMEXIT during
93 * INT3/INTO/INTn).
94 */
95 int hvm_event_needs_reinjection(uint8_t type, uint8_t vector)
96 {
97 switch ( type )
98 {
99 case X86_EVENTTYPE_EXT_INTR:
100 case X86_EVENTTYPE_NMI:
101 return 1;
102 case X86_EVENTTYPE_HW_EXCEPTION:
103 /*
104 * SVM uses type 3 ("HW Exception") for #OF and #BP. We explicitly
105 * check for these vectors, as they are really SW Exceptions. SVM has
106 * not updated RIP to point after the trapping instruction (INT3/INTO).
107 */
108 return (vector != 3) && (vector != 4);
109 default:
110 /* Software exceptions/interrupts can be re-executed (e.g., INT n). */
111 break;
112 }
113 return 0;
114 }
116 /*
117 * Combine two hardware exceptions: @vec2 was raised during delivery of @vec1.
118 * This means we can assume that @vec2 is contributory or a page fault.
119 */
120 uint8_t hvm_combine_hw_exceptions(uint8_t vec1, uint8_t vec2)
121 {
122 /* Exception during double-fault delivery always causes a triple fault. */
123 if ( vec1 == TRAP_double_fault )
124 {
125 hvm_triple_fault();
126 return TRAP_double_fault; /* dummy return */
127 }
129 /* Exception during page-fault delivery always causes a double fault. */
130 if ( vec1 == TRAP_page_fault )
131 return TRAP_double_fault;
133 /* Discard the first exception if it's benign or if we now have a #PF. */
134 if ( !((1u << vec1) & 0x7c01u) || (vec2 == TRAP_page_fault) )
135 return vec2;
137 /* Cannot combine the exceptions: double fault. */
138 return TRAP_double_fault;
139 }
141 void hvm_set_guest_tsc(struct vcpu *v, u64 guest_tsc)
142 {
143 u64 host_tsc;
145 rdtscll(host_tsc);
147 v->arch.hvm_vcpu.cache_tsc_offset = guest_tsc - host_tsc;
148 hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset);
149 }
151 u64 hvm_get_guest_tsc(struct vcpu *v)
152 {
153 u64 host_tsc;
155 if ( opt_softtsc )
156 host_tsc = hvm_get_guest_time(v);
157 else
158 rdtscll(host_tsc);
160 return host_tsc + v->arch.hvm_vcpu.cache_tsc_offset;
161 }
163 void hvm_migrate_timers(struct vcpu *v)
164 {
165 rtc_migrate_timers(v);
166 hpet_migrate_timers(v);
167 pt_migrate(v);
168 }
170 void hvm_do_resume(struct vcpu *v)
171 {
172 ioreq_t *p;
174 pt_restore_timer(v);
176 /* NB. Optimised for common case (p->state == STATE_IOREQ_NONE). */
177 p = &get_ioreq(v)->vp_ioreq;
178 while ( p->state != STATE_IOREQ_NONE )
179 {
180 switch ( p->state )
181 {
182 case STATE_IORESP_READY: /* IORESP_READY -> NONE */
183 hvm_io_assist();
184 break;
185 case STATE_IOREQ_READY: /* IOREQ_{READY,INPROCESS} -> IORESP_READY */
186 case STATE_IOREQ_INPROCESS:
187 wait_on_xen_event_channel(v->arch.hvm_vcpu.xen_port,
188 (p->state != STATE_IOREQ_READY) &&
189 (p->state != STATE_IOREQ_INPROCESS));
190 break;
191 default:
192 gdprintk(XENLOG_ERR, "Weird HVM iorequest state %d.\n", p->state);
193 domain_crash(v->domain);
194 return; /* bail */
195 }
196 }
197 }
199 static void hvm_init_ioreq_page(
200 struct domain *d, struct hvm_ioreq_page *iorp)
201 {
202 memset(iorp, 0, sizeof(*iorp));
203 spin_lock_init(&iorp->lock);
204 domain_pause(d);
205 }
207 static void hvm_destroy_ioreq_page(
208 struct domain *d, struct hvm_ioreq_page *iorp)
209 {
210 spin_lock(&iorp->lock);
212 ASSERT(d->is_dying);
214 if ( iorp->va != NULL )
215 {
216 unmap_domain_page_global(iorp->va);
217 put_page_and_type(iorp->page);
218 iorp->va = NULL;
219 }
221 spin_unlock(&iorp->lock);
222 }
224 static int hvm_set_ioreq_page(
225 struct domain *d, struct hvm_ioreq_page *iorp, unsigned long gmfn)
226 {
227 struct page_info *page;
228 p2m_type_t p2mt;
229 unsigned long mfn;
230 void *va;
232 mfn = mfn_x(gfn_to_mfn(d, gmfn, &p2mt));
233 if ( !p2m_is_ram(p2mt) )
234 return -EINVAL;
235 ASSERT(mfn_valid(mfn));
237 page = mfn_to_page(mfn);
238 if ( !get_page_and_type(page, d, PGT_writable_page) )
239 return -EINVAL;
241 va = map_domain_page_global(mfn);
242 if ( va == NULL )
243 {
244 put_page_and_type(page);
245 return -ENOMEM;
246 }
248 spin_lock(&iorp->lock);
250 if ( (iorp->va != NULL) || d->is_dying )
251 {
252 spin_unlock(&iorp->lock);
253 unmap_domain_page_global(va);
254 put_page_and_type(mfn_to_page(mfn));
255 return -EINVAL;
256 }
258 iorp->va = va;
259 iorp->page = page;
261 spin_unlock(&iorp->lock);
263 domain_unpause(d);
265 return 0;
266 }
268 static int hvm_print_line(
269 int dir, uint32_t port, uint32_t bytes, uint32_t *val)
270 {
271 struct vcpu *curr = current;
272 struct hvm_domain *hd = &curr->domain->arch.hvm_domain;
273 char c = *val;
275 BUG_ON(bytes != 1);
277 spin_lock(&hd->pbuf_lock);
278 hd->pbuf[hd->pbuf_idx++] = c;
279 if ( (hd->pbuf_idx == (sizeof(hd->pbuf) - 2)) || (c == '\n') )
280 {
281 if ( c != '\n' )
282 hd->pbuf[hd->pbuf_idx++] = '\n';
283 hd->pbuf[hd->pbuf_idx] = '\0';
284 printk(XENLOG_G_DEBUG "HVM%u: %s", curr->domain->domain_id, hd->pbuf);
285 hd->pbuf_idx = 0;
286 }
287 spin_unlock(&hd->pbuf_lock);
289 return X86EMUL_OKAY;
290 }
292 int hvm_domain_initialise(struct domain *d)
293 {
294 int rc;
296 if ( !hvm_enabled )
297 {
298 gdprintk(XENLOG_WARNING, "Attempt to create a HVM guest "
299 "on a non-VT/AMDV platform.\n");
300 return -EINVAL;
301 }
303 spin_lock_init(&d->arch.hvm_domain.pbuf_lock);
304 spin_lock_init(&d->arch.hvm_domain.irq_lock);
305 spin_lock_init(&d->arch.hvm_domain.uc_lock);
307 hvm_init_guest_time(d);
309 d->arch.hvm_domain.params[HVM_PARAM_HPET_ENABLED] = 1;
311 hvm_init_cacheattr_region_list(d);
313 rc = paging_enable(d, PG_refcounts|PG_translate|PG_external);
314 if ( rc != 0 )
315 goto fail1;
317 vpic_init(d);
319 rc = vioapic_init(d);
320 if ( rc != 0 )
321 goto fail1;
323 stdvga_init(d);
325 rtc_init(d);
327 hvm_init_ioreq_page(d, &d->arch.hvm_domain.ioreq);
328 hvm_init_ioreq_page(d, &d->arch.hvm_domain.buf_ioreq);
330 register_portio_handler(d, 0xe9, 1, hvm_print_line);
332 rc = hvm_funcs.domain_initialise(d);
333 if ( rc != 0 )
334 goto fail2;
336 return 0;
338 fail2:
339 rtc_deinit(d);
340 stdvga_deinit(d);
341 vioapic_deinit(d);
342 fail1:
343 hvm_destroy_cacheattr_region_list(d);
344 return rc;
345 }
347 void hvm_domain_relinquish_resources(struct domain *d)
348 {
349 hvm_destroy_ioreq_page(d, &d->arch.hvm_domain.ioreq);
350 hvm_destroy_ioreq_page(d, &d->arch.hvm_domain.buf_ioreq);
352 /* Stop all asynchronous timer actions. */
353 rtc_deinit(d);
354 if ( d->vcpu[0] != NULL )
355 {
356 pit_deinit(d);
357 pmtimer_deinit(d);
358 hpet_deinit(d);
359 }
360 }
362 void hvm_domain_destroy(struct domain *d)
363 {
364 hvm_funcs.domain_destroy(d);
365 rtc_deinit(d);
366 stdvga_deinit(d);
367 vioapic_deinit(d);
368 hvm_destroy_cacheattr_region_list(d);
369 }
371 static int hvm_save_cpu_ctxt(struct domain *d, hvm_domain_context_t *h)
372 {
373 struct vcpu *v;
374 struct hvm_hw_cpu ctxt;
375 struct segment_register seg;
376 struct vcpu_guest_context *vc;
378 for_each_vcpu ( d, v )
379 {
380 /* We don't need to save state for a vcpu that is down; the restore
381 * code will leave it down if there is nothing saved. */
382 if ( test_bit(_VPF_down, &v->pause_flags) )
383 continue;
385 /* Architecture-specific vmcs/vmcb bits */
386 hvm_funcs.save_cpu_ctxt(v, &ctxt);
388 hvm_get_segment_register(v, x86_seg_idtr, &seg);
389 ctxt.idtr_limit = seg.limit;
390 ctxt.idtr_base = seg.base;
392 hvm_get_segment_register(v, x86_seg_gdtr, &seg);
393 ctxt.gdtr_limit = seg.limit;
394 ctxt.gdtr_base = seg.base;
396 hvm_get_segment_register(v, x86_seg_cs, &seg);
397 ctxt.cs_sel = seg.sel;
398 ctxt.cs_limit = seg.limit;
399 ctxt.cs_base = seg.base;
400 ctxt.cs_arbytes = seg.attr.bytes;
402 hvm_get_segment_register(v, x86_seg_ds, &seg);
403 ctxt.ds_sel = seg.sel;
404 ctxt.ds_limit = seg.limit;
405 ctxt.ds_base = seg.base;
406 ctxt.ds_arbytes = seg.attr.bytes;
408 hvm_get_segment_register(v, x86_seg_es, &seg);
409 ctxt.es_sel = seg.sel;
410 ctxt.es_limit = seg.limit;
411 ctxt.es_base = seg.base;
412 ctxt.es_arbytes = seg.attr.bytes;
414 hvm_get_segment_register(v, x86_seg_ss, &seg);
415 ctxt.ss_sel = seg.sel;
416 ctxt.ss_limit = seg.limit;
417 ctxt.ss_base = seg.base;
418 ctxt.ss_arbytes = seg.attr.bytes;
420 hvm_get_segment_register(v, x86_seg_fs, &seg);
421 ctxt.fs_sel = seg.sel;
422 ctxt.fs_limit = seg.limit;
423 ctxt.fs_base = seg.base;
424 ctxt.fs_arbytes = seg.attr.bytes;
426 hvm_get_segment_register(v, x86_seg_gs, &seg);
427 ctxt.gs_sel = seg.sel;
428 ctxt.gs_limit = seg.limit;
429 ctxt.gs_base = seg.base;
430 ctxt.gs_arbytes = seg.attr.bytes;
432 hvm_get_segment_register(v, x86_seg_tr, &seg);
433 ctxt.tr_sel = seg.sel;
434 ctxt.tr_limit = seg.limit;
435 ctxt.tr_base = seg.base;
436 ctxt.tr_arbytes = seg.attr.bytes;
438 hvm_get_segment_register(v, x86_seg_ldtr, &seg);
439 ctxt.ldtr_sel = seg.sel;
440 ctxt.ldtr_limit = seg.limit;
441 ctxt.ldtr_base = seg.base;
442 ctxt.ldtr_arbytes = seg.attr.bytes;
444 vc = &v->arch.guest_context;
446 if ( v->fpu_initialised )
447 memcpy(ctxt.fpu_regs, &vc->fpu_ctxt, sizeof(ctxt.fpu_regs));
448 else
449 memset(ctxt.fpu_regs, 0, sizeof(ctxt.fpu_regs));
451 ctxt.rax = vc->user_regs.eax;
452 ctxt.rbx = vc->user_regs.ebx;
453 ctxt.rcx = vc->user_regs.ecx;
454 ctxt.rdx = vc->user_regs.edx;
455 ctxt.rbp = vc->user_regs.ebp;
456 ctxt.rsi = vc->user_regs.esi;
457 ctxt.rdi = vc->user_regs.edi;
458 ctxt.rsp = vc->user_regs.esp;
459 ctxt.rip = vc->user_regs.eip;
460 ctxt.rflags = vc->user_regs.eflags;
461 #ifdef __x86_64__
462 ctxt.r8 = vc->user_regs.r8;
463 ctxt.r9 = vc->user_regs.r9;
464 ctxt.r10 = vc->user_regs.r10;
465 ctxt.r11 = vc->user_regs.r11;
466 ctxt.r12 = vc->user_regs.r12;
467 ctxt.r13 = vc->user_regs.r13;
468 ctxt.r14 = vc->user_regs.r14;
469 ctxt.r15 = vc->user_regs.r15;
470 #endif
471 ctxt.dr0 = vc->debugreg[0];
472 ctxt.dr1 = vc->debugreg[1];
473 ctxt.dr2 = vc->debugreg[2];
474 ctxt.dr3 = vc->debugreg[3];
475 ctxt.dr6 = vc->debugreg[6];
476 ctxt.dr7 = vc->debugreg[7];
478 if ( hvm_save_entry(CPU, v->vcpu_id, h, &ctxt) != 0 )
479 return 1;
480 }
481 return 0;
482 }
484 static int hvm_load_cpu_ctxt(struct domain *d, hvm_domain_context_t *h)
485 {
486 int vcpuid, rc;
487 struct vcpu *v;
488 struct hvm_hw_cpu ctxt;
489 struct segment_register seg;
490 struct vcpu_guest_context *vc;
492 /* Which vcpu is this? */
493 vcpuid = hvm_load_instance(h);
494 if ( vcpuid > MAX_VIRT_CPUS || (v = d->vcpu[vcpuid]) == NULL )
495 {
496 gdprintk(XENLOG_ERR, "HVM restore: domain has no vcpu %u\n", vcpuid);
497 return -EINVAL;
498 }
499 vc = &v->arch.guest_context;
501 /* Need to init this vcpu before loading its contents */
502 domain_lock(d);
503 if ( !v->is_initialised )
504 if ( (rc = boot_vcpu(d, vcpuid, vc)) != 0 )
505 return rc;
506 domain_unlock(d);
508 if ( hvm_load_entry(CPU, h, &ctxt) != 0 )
509 return -EINVAL;
511 /* Sanity check some control registers. */
512 if ( (ctxt.cr0 & HVM_CR0_GUEST_RESERVED_BITS) ||
513 !(ctxt.cr0 & X86_CR0_ET) ||
514 ((ctxt.cr0 & (X86_CR0_PE|X86_CR0_PG)) == X86_CR0_PG) )
515 {
516 gdprintk(XENLOG_ERR, "HVM restore: bad CR0 0x%"PRIx64"\n",
517 ctxt.cr0);
518 return -EINVAL;
519 }
521 if ( ctxt.cr4 & HVM_CR4_GUEST_RESERVED_BITS )
522 {
523 gdprintk(XENLOG_ERR, "HVM restore: bad CR4 0x%"PRIx64"\n",
524 ctxt.cr4);
525 return -EINVAL;
526 }
528 if ( (ctxt.msr_efer & ~(EFER_FFXSE | EFER_LME | EFER_LMA |
529 EFER_NX | EFER_SCE)) ||
530 ((sizeof(long) != 8) && (ctxt.msr_efer & EFER_LME)) ||
531 (!cpu_has_nx && (ctxt.msr_efer & EFER_NX)) ||
532 (!cpu_has_syscall && (ctxt.msr_efer & EFER_SCE)) ||
533 (!cpu_has_ffxsr && (ctxt.msr_efer & EFER_FFXSE)) ||
534 ((ctxt.msr_efer & (EFER_LME|EFER_LMA)) == EFER_LMA) )
535 {
536 gdprintk(XENLOG_ERR, "HVM restore: bad EFER 0x%"PRIx64"\n",
537 ctxt.msr_efer);
538 return -EINVAL;
539 }
541 /* Architecture-specific vmcs/vmcb bits */
542 if ( hvm_funcs.load_cpu_ctxt(v, &ctxt) < 0 )
543 return -EINVAL;
545 seg.limit = ctxt.idtr_limit;
546 seg.base = ctxt.idtr_base;
547 hvm_set_segment_register(v, x86_seg_idtr, &seg);
549 seg.limit = ctxt.gdtr_limit;
550 seg.base = ctxt.gdtr_base;
551 hvm_set_segment_register(v, x86_seg_gdtr, &seg);
553 seg.sel = ctxt.cs_sel;
554 seg.limit = ctxt.cs_limit;
555 seg.base = ctxt.cs_base;
556 seg.attr.bytes = ctxt.cs_arbytes;
557 hvm_set_segment_register(v, x86_seg_cs, &seg);
559 seg.sel = ctxt.ds_sel;
560 seg.limit = ctxt.ds_limit;
561 seg.base = ctxt.ds_base;
562 seg.attr.bytes = ctxt.ds_arbytes;
563 hvm_set_segment_register(v, x86_seg_ds, &seg);
565 seg.sel = ctxt.es_sel;
566 seg.limit = ctxt.es_limit;
567 seg.base = ctxt.es_base;
568 seg.attr.bytes = ctxt.es_arbytes;
569 hvm_set_segment_register(v, x86_seg_es, &seg);
571 seg.sel = ctxt.ss_sel;
572 seg.limit = ctxt.ss_limit;
573 seg.base = ctxt.ss_base;
574 seg.attr.bytes = ctxt.ss_arbytes;
575 hvm_set_segment_register(v, x86_seg_ss, &seg);
577 seg.sel = ctxt.fs_sel;
578 seg.limit = ctxt.fs_limit;
579 seg.base = ctxt.fs_base;
580 seg.attr.bytes = ctxt.fs_arbytes;
581 hvm_set_segment_register(v, x86_seg_fs, &seg);
583 seg.sel = ctxt.gs_sel;
584 seg.limit = ctxt.gs_limit;
585 seg.base = ctxt.gs_base;
586 seg.attr.bytes = ctxt.gs_arbytes;
587 hvm_set_segment_register(v, x86_seg_gs, &seg);
589 seg.sel = ctxt.tr_sel;
590 seg.limit = ctxt.tr_limit;
591 seg.base = ctxt.tr_base;
592 seg.attr.bytes = ctxt.tr_arbytes;
593 hvm_set_segment_register(v, x86_seg_tr, &seg);
595 seg.sel = ctxt.ldtr_sel;
596 seg.limit = ctxt.ldtr_limit;
597 seg.base = ctxt.ldtr_base;
598 seg.attr.bytes = ctxt.ldtr_arbytes;
599 hvm_set_segment_register(v, x86_seg_ldtr, &seg);
601 memcpy(&vc->fpu_ctxt, ctxt.fpu_regs, sizeof(ctxt.fpu_regs));
603 vc->user_regs.eax = ctxt.rax;
604 vc->user_regs.ebx = ctxt.rbx;
605 vc->user_regs.ecx = ctxt.rcx;
606 vc->user_regs.edx = ctxt.rdx;
607 vc->user_regs.ebp = ctxt.rbp;
608 vc->user_regs.esi = ctxt.rsi;
609 vc->user_regs.edi = ctxt.rdi;
610 vc->user_regs.esp = ctxt.rsp;
611 vc->user_regs.eip = ctxt.rip;
612 vc->user_regs.eflags = ctxt.rflags | 2;
613 #ifdef __x86_64__
614 vc->user_regs.r8 = ctxt.r8;
615 vc->user_regs.r9 = ctxt.r9;
616 vc->user_regs.r10 = ctxt.r10;
617 vc->user_regs.r11 = ctxt.r11;
618 vc->user_regs.r12 = ctxt.r12;
619 vc->user_regs.r13 = ctxt.r13;
620 vc->user_regs.r14 = ctxt.r14;
621 vc->user_regs.r15 = ctxt.r15;
622 #endif
623 vc->debugreg[0] = ctxt.dr0;
624 vc->debugreg[1] = ctxt.dr1;
625 vc->debugreg[2] = ctxt.dr2;
626 vc->debugreg[3] = ctxt.dr3;
627 vc->debugreg[6] = ctxt.dr6;
628 vc->debugreg[7] = ctxt.dr7;
630 vc->flags = VGCF_online;
631 v->fpu_initialised = 1;
633 /* Auxiliary processors should be woken immediately. */
634 if ( test_and_clear_bit(_VPF_down, &v->pause_flags) )
635 vcpu_wake(v);
637 return 0;
638 }
640 HVM_REGISTER_SAVE_RESTORE(CPU, hvm_save_cpu_ctxt, hvm_load_cpu_ctxt,
641 1, HVMSR_PER_VCPU);
643 int hvm_vcpu_initialise(struct vcpu *v)
644 {
645 int rc;
647 if ( (rc = vlapic_init(v)) != 0 )
648 goto fail1;
650 if ( (rc = hvm_funcs.vcpu_initialise(v)) != 0 )
651 goto fail2;
653 /* Create ioreq event channel. */
654 rc = alloc_unbound_xen_event_channel(v, 0);
655 if ( rc < 0 )
656 goto fail3;
658 /* Register ioreq event channel. */
659 v->arch.hvm_vcpu.xen_port = rc;
660 spin_lock(&v->domain->arch.hvm_domain.ioreq.lock);
661 if ( v->domain->arch.hvm_domain.ioreq.va != NULL )
662 get_ioreq(v)->vp_eport = v->arch.hvm_vcpu.xen_port;
663 spin_unlock(&v->domain->arch.hvm_domain.ioreq.lock);
665 spin_lock_init(&v->arch.hvm_vcpu.tm_lock);
666 INIT_LIST_HEAD(&v->arch.hvm_vcpu.tm_list);
668 rc = hvm_vcpu_cacheattr_init(v);
669 if ( rc != 0 )
670 goto fail3;
672 v->arch.guest_context.user_regs.eflags = 2;
674 if ( v->vcpu_id == 0 )
675 {
676 /* NB. All these really belong in hvm_domain_initialise(). */
677 pit_init(v, cpu_khz);
678 pmtimer_init(v);
679 hpet_init(v);
681 /* Init guest TSC to start from zero. */
682 hvm_set_guest_tsc(v, 0);
684 /* Can start up without SIPI-SIPI or setvcpucontext domctl. */
685 v->is_initialised = 1;
686 clear_bit(_VPF_down, &v->pause_flags);
687 }
689 return 0;
691 fail3:
692 hvm_funcs.vcpu_destroy(v);
693 fail2:
694 vlapic_destroy(v);
695 fail1:
696 return rc;
697 }
699 void hvm_vcpu_destroy(struct vcpu *v)
700 {
701 hvm_vcpu_cacheattr_destroy(v);
702 vlapic_destroy(v);
703 hvm_funcs.vcpu_destroy(v);
705 /* Event channel is already freed by evtchn_destroy(). */
706 /*free_xen_event_channel(v, v->arch.hvm_vcpu.xen_port);*/
707 }
709 void hvm_vcpu_down(struct vcpu *v)
710 {
711 struct domain *d = v->domain;
712 int online_count = 0;
714 /* Doesn't halt us immediately, but we'll never return to guest context. */
715 set_bit(_VPF_down, &v->pause_flags);
716 vcpu_sleep_nosync(v);
718 /* Any other VCPUs online? ... */
719 domain_lock(d);
720 for_each_vcpu ( d, v )
721 if ( !test_bit(_VPF_down, &v->pause_flags) )
722 online_count++;
723 domain_unlock(d);
725 /* ... Shut down the domain if not. */
726 if ( online_count == 0 )
727 {
728 gdprintk(XENLOG_INFO, "All CPUs offline -- powering off.\n");
729 domain_shutdown(d, SHUTDOWN_poweroff);
730 }
731 }
733 void hvm_send_assist_req(struct vcpu *v)
734 {
735 ioreq_t *p;
737 if ( unlikely(!vcpu_start_shutdown_deferral(v)) )
738 return; /* implicitly bins the i/o operation */
740 p = &get_ioreq(v)->vp_ioreq;
741 if ( unlikely(p->state != STATE_IOREQ_NONE) )
742 {
743 /* This indicates a bug in the device model. Crash the domain. */
744 gdprintk(XENLOG_ERR, "Device model set bad IO state %d.\n", p->state);
745 domain_crash(v->domain);
746 return;
747 }
749 prepare_wait_on_xen_event_channel(v->arch.hvm_vcpu.xen_port);
751 /*
752 * Following happens /after/ blocking and setting up ioreq contents.
753 * prepare_wait_on_xen_event_channel() is an implicit barrier.
754 */
755 p->state = STATE_IOREQ_READY;
756 notify_via_xen_event_channel(v->arch.hvm_vcpu.xen_port);
757 }
759 void hvm_hlt(unsigned long rflags)
760 {
761 struct vcpu *curr = current;
763 if ( hvm_event_pending(curr) )
764 return;
766 /*
767 * If we halt with interrupts disabled, that's a pretty sure sign that we
768 * want to shut down. In a real processor, NMIs are the only way to break
769 * out of this.
770 */
771 if ( unlikely(!(rflags & X86_EFLAGS_IF)) )
772 return hvm_vcpu_down(curr);
774 do_sched_op_compat(SCHEDOP_block, 0);
776 HVMTRACE_1D(HLT, curr, /* pending = */ vcpu_runnable(curr));
777 }
779 void hvm_triple_fault(void)
780 {
781 struct vcpu *v = current;
782 gdprintk(XENLOG_INFO, "Triple fault on VCPU%d - "
783 "invoking HVM system reset.\n", v->vcpu_id);
784 domain_shutdown(v->domain, SHUTDOWN_reboot);
785 }
787 int hvm_set_efer(uint64_t value)
788 {
789 struct vcpu *v = current;
791 value &= ~EFER_LMA;
793 if ( (value & ~(EFER_FFXSE | EFER_LME | EFER_NX | EFER_SCE)) ||
794 ((sizeof(long) != 8) && (value & EFER_LME)) ||
795 (!cpu_has_nx && (value & EFER_NX)) ||
796 (!cpu_has_syscall && (value & EFER_SCE)) ||
797 (!cpu_has_ffxsr && (value & EFER_FFXSE)) )
798 {
799 gdprintk(XENLOG_WARNING, "Trying to set reserved bit in "
800 "EFER: %"PRIx64"\n", value);
801 hvm_inject_exception(TRAP_gp_fault, 0, 0);
802 return X86EMUL_EXCEPTION;
803 }
805 if ( ((value ^ v->arch.hvm_vcpu.guest_efer) & EFER_LME) &&
806 hvm_paging_enabled(v) )
807 {
808 gdprintk(XENLOG_WARNING,
809 "Trying to change EFER.LME with paging enabled\n");
810 hvm_inject_exception(TRAP_gp_fault, 0, 0);
811 return X86EMUL_EXCEPTION;
812 }
814 value |= v->arch.hvm_vcpu.guest_efer & EFER_LMA;
815 v->arch.hvm_vcpu.guest_efer = value;
816 hvm_update_guest_efer(v);
818 return X86EMUL_OKAY;
819 }
821 extern void shadow_blow_tables_per_domain(struct domain *d);
822 extern bool_t mtrr_pat_not_equal(struct vcpu *vd, struct vcpu *vs);
824 /* Exit UC mode only if all VCPUs agree on MTRR/PAT and are not in no_fill. */
825 static bool_t domain_exit_uc_mode(struct vcpu *v)
826 {
827 struct domain *d = v->domain;
828 struct vcpu *vs;
830 for_each_vcpu ( d, vs )
831 {
832 if ( (vs == v) || !vs->is_initialised )
833 continue;
834 if ( (vs->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) ||
835 mtrr_pat_not_equal(vs, v) )
836 return 0;
837 }
839 return 1;
840 }
842 static void local_flush_cache(void *info)
843 {
844 wbinvd();
845 }
847 static void hvm_set_uc_mode(struct vcpu *v, bool_t is_in_uc_mode)
848 {
849 v->domain->arch.hvm_domain.is_in_uc_mode = is_in_uc_mode;
850 shadow_blow_tables_per_domain(v->domain);
851 if ( hvm_funcs.set_uc_mode )
852 return hvm_funcs.set_uc_mode(v);
853 }
855 int hvm_set_cr0(unsigned long value)
856 {
857 struct vcpu *v = current;
858 p2m_type_t p2mt;
859 unsigned long gfn, mfn, old_value = v->arch.hvm_vcpu.guest_cr[0];
861 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx", value);
863 if ( (u32)value != value )
864 {
865 HVM_DBG_LOG(DBG_LEVEL_1,
866 "Guest attempts to set upper 32 bits in CR0: %lx",
867 value);
868 goto gpf;
869 }
871 value &= ~HVM_CR0_GUEST_RESERVED_BITS;
873 /* ET is reserved and should be always be 1. */
874 value |= X86_CR0_ET;
876 if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PG )
877 goto gpf;
879 if ( (value & X86_CR0_PG) && !(old_value & X86_CR0_PG) )
880 {
881 if ( v->arch.hvm_vcpu.guest_efer & EFER_LME )
882 {
883 if ( !(v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_PAE) )
884 {
885 HVM_DBG_LOG(DBG_LEVEL_1, "Enable paging before PAE enable");
886 goto gpf;
887 }
888 HVM_DBG_LOG(DBG_LEVEL_1, "Enabling long mode");
889 v->arch.hvm_vcpu.guest_efer |= EFER_LMA;
890 hvm_update_guest_efer(v);
891 }
893 if ( !paging_mode_hap(v->domain) )
894 {
895 /* The guest CR3 must be pointing to the guest physical. */
896 gfn = v->arch.hvm_vcpu.guest_cr[3]>>PAGE_SHIFT;
897 mfn = mfn_x(gfn_to_mfn_current(gfn, &p2mt));
898 if ( !p2m_is_ram(p2mt) || !mfn_valid(mfn) ||
899 !get_page(mfn_to_page(mfn), v->domain))
900 {
901 gdprintk(XENLOG_ERR, "Invalid CR3 value = %lx (mfn=%lx)\n",
902 v->arch.hvm_vcpu.guest_cr[3], mfn);
903 domain_crash(v->domain);
904 return X86EMUL_UNHANDLEABLE;
905 }
907 /* Now arch.guest_table points to machine physical. */
908 v->arch.guest_table = pagetable_from_pfn(mfn);
910 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx",
911 v->arch.hvm_vcpu.guest_cr[3], mfn);
912 }
913 }
914 else if ( !(value & X86_CR0_PG) && (old_value & X86_CR0_PG) )
915 {
916 /* When CR0.PG is cleared, LMA is cleared immediately. */
917 if ( hvm_long_mode_enabled(v) )
918 {
919 v->arch.hvm_vcpu.guest_efer &= ~EFER_LMA;
920 hvm_update_guest_efer(v);
921 }
923 if ( !paging_mode_hap(v->domain) )
924 {
925 put_page(pagetable_get_page(v->arch.guest_table));
926 v->arch.guest_table = pagetable_null();
927 }
928 }
930 if ( has_arch_pdevs(v->domain) )
931 {
932 if ( (value & X86_CR0_CD) && !(value & X86_CR0_NW) )
933 {
934 /* Entering no fill cache mode. */
935 spin_lock(&v->domain->arch.hvm_domain.uc_lock);
936 v->arch.hvm_vcpu.cache_mode = NO_FILL_CACHE_MODE;
938 if ( !v->domain->arch.hvm_domain.is_in_uc_mode )
939 {
940 /* Flush physical caches. */
941 on_each_cpu(local_flush_cache, NULL, 1, 1);
942 hvm_set_uc_mode(v, 1);
943 }
944 spin_unlock(&v->domain->arch.hvm_domain.uc_lock);
945 }
946 else if ( !(value & (X86_CR0_CD | X86_CR0_NW)) &&
947 (v->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) )
948 {
949 /* Exit from no fill cache mode. */
950 spin_lock(&v->domain->arch.hvm_domain.uc_lock);
951 v->arch.hvm_vcpu.cache_mode = NORMAL_CACHE_MODE;
953 if ( domain_exit_uc_mode(v) )
954 hvm_set_uc_mode(v, 0);
956 spin_unlock(&v->domain->arch.hvm_domain.uc_lock);
957 }
958 }
960 v->arch.hvm_vcpu.guest_cr[0] = value;
961 hvm_update_guest_cr(v, 0);
963 if ( (value ^ old_value) & X86_CR0_PG )
964 paging_update_paging_modes(v);
966 return X86EMUL_OKAY;
968 gpf:
969 hvm_inject_exception(TRAP_gp_fault, 0, 0);
970 return X86EMUL_EXCEPTION;
971 }
973 int hvm_set_cr3(unsigned long value)
974 {
975 unsigned long mfn;
976 p2m_type_t p2mt;
977 struct vcpu *v = current;
979 if ( hvm_paging_enabled(v) && !paging_mode_hap(v->domain) &&
980 (value != v->arch.hvm_vcpu.guest_cr[3]) )
981 {
982 /* Shadow-mode CR3 change. Check PDBR and update refcounts. */
983 HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value);
984 mfn = mfn_x(gfn_to_mfn_current(value >> PAGE_SHIFT, &p2mt));
985 if ( !p2m_is_ram(p2mt) || !mfn_valid(mfn) ||
986 !get_page(mfn_to_page(mfn), v->domain) )
987 goto bad_cr3;
989 put_page(pagetable_get_page(v->arch.guest_table));
990 v->arch.guest_table = pagetable_from_pfn(mfn);
992 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx", value);
993 }
995 v->arch.hvm_vcpu.guest_cr[3] = value;
996 paging_update_cr3(v);
997 return X86EMUL_OKAY;
999 bad_cr3:
1000 gdprintk(XENLOG_ERR, "Invalid CR3\n");
1001 domain_crash(v->domain);
1002 return X86EMUL_UNHANDLEABLE;
1005 int hvm_set_cr4(unsigned long value)
1007 struct vcpu *v = current;
1008 unsigned long old_cr;
1010 if ( value & HVM_CR4_GUEST_RESERVED_BITS )
1012 HVM_DBG_LOG(DBG_LEVEL_1,
1013 "Guest attempts to set reserved bit in CR4: %lx",
1014 value);
1015 goto gpf;
1018 if ( !(value & X86_CR4_PAE) && hvm_long_mode_enabled(v) )
1020 HVM_DBG_LOG(DBG_LEVEL_1, "Guest cleared CR4.PAE while "
1021 "EFER.LMA is set");
1022 goto gpf;
1025 old_cr = v->arch.hvm_vcpu.guest_cr[4];
1026 v->arch.hvm_vcpu.guest_cr[4] = value;
1027 hvm_update_guest_cr(v, 4);
1029 /* Modifying CR4.{PSE,PAE,PGE} invalidates all TLB entries, inc. Global. */
1030 if ( (old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE) )
1031 paging_update_paging_modes(v);
1033 return X86EMUL_OKAY;
1035 gpf:
1036 hvm_inject_exception(TRAP_gp_fault, 0, 0);
1037 return X86EMUL_EXCEPTION;
1040 int hvm_virtual_to_linear_addr(
1041 enum x86_segment seg,
1042 struct segment_register *reg,
1043 unsigned long offset,
1044 unsigned int bytes,
1045 enum hvm_access_type access_type,
1046 unsigned int addr_size,
1047 unsigned long *linear_addr)
1049 unsigned long addr = offset;
1050 uint32_t last_byte;
1052 if ( !(current->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) )
1054 /*
1055 * REAL MODE: Don't bother with segment access checks.
1056 * Certain of them are not done in native real mode anyway.
1057 */
1058 addr = (uint32_t)(addr + reg->base);
1060 else if ( addr_size != 64 )
1062 /*
1063 * COMPATIBILITY MODE: Apply segment checks and add base.
1064 */
1066 switch ( access_type )
1068 case hvm_access_read:
1069 if ( (reg->attr.fields.type & 0xa) == 0x8 )
1070 goto gpf; /* execute-only code segment */
1071 break;
1072 case hvm_access_write:
1073 if ( (reg->attr.fields.type & 0xa) != 0x2 )
1074 goto gpf; /* not a writable data segment */
1075 break;
1076 default:
1077 break;
1080 last_byte = offset + bytes - 1;
1082 /* Is this a grows-down data segment? Special limit check if so. */
1083 if ( (reg->attr.fields.type & 0xc) == 0x4 )
1085 /* Is upper limit 0xFFFF or 0xFFFFFFFF? */
1086 if ( !reg->attr.fields.db )
1087 last_byte = (uint16_t)last_byte;
1089 /* Check first byte and last byte against respective bounds. */
1090 if ( (offset <= reg->limit) || (last_byte < offset) )
1091 goto gpf;
1093 else if ( (last_byte > reg->limit) || (last_byte < offset) )
1094 goto gpf; /* last byte is beyond limit or wraps 0xFFFFFFFF */
1096 /*
1097 * Hardware truncates to 32 bits in compatibility mode.
1098 * It does not truncate to 16 bits in 16-bit address-size mode.
1099 */
1100 addr = (uint32_t)(addr + reg->base);
1102 else
1104 /*
1105 * LONG MODE: FS and GS add segment base. Addresses must be canonical.
1106 */
1108 if ( (seg == x86_seg_fs) || (seg == x86_seg_gs) )
1109 addr += reg->base;
1111 if ( !is_canonical_address(addr) )
1112 goto gpf;
1115 *linear_addr = addr;
1116 return 1;
1118 gpf:
1119 return 0;
1122 static void *hvm_map_entry(unsigned long va)
1124 unsigned long gfn, mfn;
1125 p2m_type_t p2mt;
1126 uint32_t pfec;
1128 if ( ((va & ~PAGE_MASK) + 8) > PAGE_SIZE )
1130 gdprintk(XENLOG_ERR, "Descriptor table entry "
1131 "straddles page boundary\n");
1132 domain_crash(current->domain);
1133 return NULL;
1136 /* We're mapping on behalf of the segment-load logic, which might
1137 * write the accessed flags in the descriptors (in 32-bit mode), but
1138 * we still treat it as a kernel-mode read (i.e. no access checks). */
1139 pfec = PFEC_page_present;
1140 gfn = paging_gva_to_gfn(current, va, &pfec);
1141 mfn = mfn_x(gfn_to_mfn_current(gfn, &p2mt));
1142 if ( !p2m_is_ram(p2mt) )
1144 gdprintk(XENLOG_ERR, "Failed to look up descriptor table entry\n");
1145 domain_crash(current->domain);
1146 return NULL;
1149 ASSERT(mfn_valid(mfn));
1151 paging_mark_dirty(current->domain, mfn);
1153 return (char *)map_domain_page(mfn) + (va & ~PAGE_MASK);
1156 static void hvm_unmap_entry(void *p)
1158 if ( p )
1159 unmap_domain_page(p);
1162 static int hvm_load_segment_selector(
1163 struct vcpu *v, enum x86_segment seg, uint16_t sel)
1165 struct segment_register desctab, cs, segr;
1166 struct desc_struct *pdesc, desc;
1167 u8 dpl, rpl, cpl;
1168 int fault_type = TRAP_invalid_tss;
1170 /* NULL selector? */
1171 if ( (sel & 0xfffc) == 0 )
1173 if ( (seg == x86_seg_cs) || (seg == x86_seg_ss) )
1174 goto fail;
1175 memset(&segr, 0, sizeof(segr));
1176 hvm_set_segment_register(v, seg, &segr);
1177 return 0;
1180 /* LDT descriptor must be in the GDT. */
1181 if ( (seg == x86_seg_ldtr) && (sel & 4) )
1182 goto fail;
1184 hvm_get_segment_register(v, x86_seg_cs, &cs);
1185 hvm_get_segment_register(
1186 v, (sel & 4) ? x86_seg_ldtr : x86_seg_gdtr, &desctab);
1188 /* Check against descriptor table limit. */
1189 if ( ((sel & 0xfff8) + 7) > desctab.limit )
1190 goto fail;
1192 pdesc = hvm_map_entry(desctab.base + (sel & 0xfff8));
1193 if ( pdesc == NULL )
1194 goto hvm_map_fail;
1196 do {
1197 desc = *pdesc;
1199 /* Segment present in memory? */
1200 if ( !(desc.b & (1u<<15)) )
1202 fault_type = TRAP_no_segment;
1203 goto unmap_and_fail;
1206 /* LDT descriptor is a system segment. All others are code/data. */
1207 if ( (desc.b & (1u<<12)) == ((seg == x86_seg_ldtr) << 12) )
1208 goto unmap_and_fail;
1210 dpl = (desc.b >> 13) & 3;
1211 rpl = sel & 3;
1212 cpl = cs.sel & 3;
1214 switch ( seg )
1216 case x86_seg_cs:
1217 /* Code segment? */
1218 if ( !(desc.b & (1u<<11)) )
1219 goto unmap_and_fail;
1220 /* Non-conforming segment: check DPL against RPL. */
1221 if ( ((desc.b & (6u<<9)) != 6) && (dpl != rpl) )
1222 goto unmap_and_fail;
1223 break;
1224 case x86_seg_ss:
1225 /* Writable data segment? */
1226 if ( (desc.b & (5u<<9)) != (1u<<9) )
1227 goto unmap_and_fail;
1228 if ( (dpl != cpl) || (dpl != rpl) )
1229 goto unmap_and_fail;
1230 break;
1231 case x86_seg_ldtr:
1232 /* LDT system segment? */
1233 if ( (desc.b & (15u<<8)) != (2u<<8) )
1234 goto unmap_and_fail;
1235 goto skip_accessed_flag;
1236 default:
1237 /* Readable code or data segment? */
1238 if ( (desc.b & (5u<<9)) == (4u<<9) )
1239 goto unmap_and_fail;
1240 /* Non-conforming segment: check DPL against RPL and CPL. */
1241 if ( ((desc.b & (6u<<9)) != 6) && ((dpl < cpl) || (dpl < rpl)) )
1242 goto unmap_and_fail;
1243 break;
1245 } while ( !(desc.b & 0x100) && /* Ensure Accessed flag is set */
1246 (cmpxchg(&pdesc->b, desc.b, desc.b | 0x100) != desc.b) );
1248 /* Force the Accessed flag in our local copy. */
1249 desc.b |= 0x100;
1251 skip_accessed_flag:
1252 hvm_unmap_entry(pdesc);
1254 segr.base = (((desc.b << 0) & 0xff000000u) |
1255 ((desc.b << 16) & 0x00ff0000u) |
1256 ((desc.a >> 16) & 0x0000ffffu));
1257 segr.attr.bytes = (((desc.b >> 8) & 0x00ffu) |
1258 ((desc.b >> 12) & 0x0f00u));
1259 segr.limit = (desc.b & 0x000f0000u) | (desc.a & 0x0000ffffu);
1260 if ( segr.attr.fields.g )
1261 segr.limit = (segr.limit << 12) | 0xfffu;
1262 segr.sel = sel;
1263 hvm_set_segment_register(v, seg, &segr);
1265 return 0;
1267 unmap_and_fail:
1268 hvm_unmap_entry(pdesc);
1269 fail:
1270 hvm_inject_exception(fault_type, sel & 0xfffc, 0);
1271 hvm_map_fail:
1272 return 1;
1275 void hvm_task_switch(
1276 uint16_t tss_sel, enum hvm_task_switch_reason taskswitch_reason,
1277 int32_t errcode)
1279 struct vcpu *v = current;
1280 struct cpu_user_regs *regs = guest_cpu_user_regs();
1281 struct segment_register gdt, tr, prev_tr, segr;
1282 struct desc_struct *optss_desc = NULL, *nptss_desc = NULL, tss_desc;
1283 unsigned long eflags;
1284 int exn_raised, rc;
1285 struct {
1286 u16 back_link,__blh;
1287 u32 esp0;
1288 u16 ss0, _0;
1289 u32 esp1;
1290 u16 ss1, _1;
1291 u32 esp2;
1292 u16 ss2, _2;
1293 u32 cr3, eip, eflags, eax, ecx, edx, ebx, esp, ebp, esi, edi;
1294 u16 es, _3, cs, _4, ss, _5, ds, _6, fs, _7, gs, _8, ldt, _9;
1295 u16 trace, iomap;
1296 } tss = { 0 };
1298 hvm_get_segment_register(v, x86_seg_gdtr, &gdt);
1299 hvm_get_segment_register(v, x86_seg_tr, &prev_tr);
1301 if ( ((tss_sel & 0xfff8) + 7) > gdt.limit )
1303 hvm_inject_exception((taskswitch_reason == TSW_iret) ?
1304 TRAP_invalid_tss : TRAP_gp_fault,
1305 tss_sel & 0xfff8, 0);
1306 goto out;
1309 optss_desc = hvm_map_entry(gdt.base + (prev_tr.sel & 0xfff8));
1310 if ( optss_desc == NULL )
1311 goto out;
1313 nptss_desc = hvm_map_entry(gdt.base + (tss_sel & 0xfff8));
1314 if ( nptss_desc == NULL )
1315 goto out;
1317 tss_desc = *nptss_desc;
1318 tr.sel = tss_sel;
1319 tr.base = (((tss_desc.b << 0) & 0xff000000u) |
1320 ((tss_desc.b << 16) & 0x00ff0000u) |
1321 ((tss_desc.a >> 16) & 0x0000ffffu));
1322 tr.attr.bytes = (((tss_desc.b >> 8) & 0x00ffu) |
1323 ((tss_desc.b >> 12) & 0x0f00u));
1324 tr.limit = (tss_desc.b & 0x000f0000u) | (tss_desc.a & 0x0000ffffu);
1325 if ( tr.attr.fields.g )
1326 tr.limit = (tr.limit << 12) | 0xfffu;
1328 if ( !tr.attr.fields.p )
1330 hvm_inject_exception(TRAP_no_segment, tss_sel & 0xfff8, 0);
1331 goto out;
1334 if ( tr.attr.fields.type != ((taskswitch_reason == TSW_iret) ? 0xb : 0x9) )
1336 hvm_inject_exception(
1337 (taskswitch_reason == TSW_iret) ? TRAP_invalid_tss : TRAP_gp_fault,
1338 tss_sel & 0xfff8, 0);
1339 goto out;
1342 if ( tr.limit < (sizeof(tss)-1) )
1344 hvm_inject_exception(TRAP_invalid_tss, tss_sel & 0xfff8, 0);
1345 goto out;
1348 rc = hvm_copy_from_guest_virt(
1349 &tss, prev_tr.base, sizeof(tss), PFEC_page_present);
1350 if ( rc == HVMCOPY_bad_gva_to_gfn )
1351 goto out;
1353 eflags = regs->eflags;
1354 if ( taskswitch_reason == TSW_iret )
1355 eflags &= ~X86_EFLAGS_NT;
1357 tss.cr3 = v->arch.hvm_vcpu.guest_cr[3];
1358 tss.eip = regs->eip;
1359 tss.eflags = eflags;
1360 tss.eax = regs->eax;
1361 tss.ecx = regs->ecx;
1362 tss.edx = regs->edx;
1363 tss.ebx = regs->ebx;
1364 tss.esp = regs->esp;
1365 tss.ebp = regs->ebp;
1366 tss.esi = regs->esi;
1367 tss.edi = regs->edi;
1369 hvm_get_segment_register(v, x86_seg_es, &segr);
1370 tss.es = segr.sel;
1371 hvm_get_segment_register(v, x86_seg_cs, &segr);
1372 tss.cs = segr.sel;
1373 hvm_get_segment_register(v, x86_seg_ss, &segr);
1374 tss.ss = segr.sel;
1375 hvm_get_segment_register(v, x86_seg_ds, &segr);
1376 tss.ds = segr.sel;
1377 hvm_get_segment_register(v, x86_seg_fs, &segr);
1378 tss.fs = segr.sel;
1379 hvm_get_segment_register(v, x86_seg_gs, &segr);
1380 tss.gs = segr.sel;
1381 hvm_get_segment_register(v, x86_seg_ldtr, &segr);
1382 tss.ldt = segr.sel;
1384 rc = hvm_copy_to_guest_virt(
1385 prev_tr.base, &tss, sizeof(tss), PFEC_page_present);
1386 if ( rc == HVMCOPY_bad_gva_to_gfn )
1387 goto out;
1389 rc = hvm_copy_from_guest_virt(
1390 &tss, tr.base, sizeof(tss), PFEC_page_present);
1391 if ( rc == HVMCOPY_bad_gva_to_gfn )
1392 goto out;
1394 if ( hvm_set_cr3(tss.cr3) )
1395 goto out;
1397 regs->eip = tss.eip;
1398 regs->eflags = tss.eflags | 2;
1399 regs->eax = tss.eax;
1400 regs->ecx = tss.ecx;
1401 regs->edx = tss.edx;
1402 regs->ebx = tss.ebx;
1403 regs->esp = tss.esp;
1404 regs->ebp = tss.ebp;
1405 regs->esi = tss.esi;
1406 regs->edi = tss.edi;
1408 if ( (taskswitch_reason == TSW_call_or_int) )
1410 regs->eflags |= X86_EFLAGS_NT;
1411 tss.back_link = prev_tr.sel;
1414 exn_raised = 0;
1415 if ( hvm_load_segment_selector(v, x86_seg_es, tss.es) ||
1416 hvm_load_segment_selector(v, x86_seg_cs, tss.cs) ||
1417 hvm_load_segment_selector(v, x86_seg_ss, tss.ss) ||
1418 hvm_load_segment_selector(v, x86_seg_ds, tss.ds) ||
1419 hvm_load_segment_selector(v, x86_seg_fs, tss.fs) ||
1420 hvm_load_segment_selector(v, x86_seg_gs, tss.gs) ||
1421 hvm_load_segment_selector(v, x86_seg_ldtr, tss.ldt) )
1422 exn_raised = 1;
1424 rc = hvm_copy_to_guest_virt(
1425 tr.base, &tss, sizeof(tss), PFEC_page_present);
1426 if ( rc == HVMCOPY_bad_gva_to_gfn )
1427 exn_raised = 1;
1429 if ( (tss.trace & 1) && !exn_raised )
1430 hvm_inject_exception(TRAP_debug, tss_sel & 0xfff8, 0);
1432 tr.attr.fields.type = 0xb; /* busy 32-bit tss */
1433 hvm_set_segment_register(v, x86_seg_tr, &tr);
1435 v->arch.hvm_vcpu.guest_cr[0] |= X86_CR0_TS;
1436 hvm_update_guest_cr(v, 0);
1438 if ( (taskswitch_reason == TSW_iret) ||
1439 (taskswitch_reason == TSW_jmp) )
1440 clear_bit(41, optss_desc); /* clear B flag of old task */
1442 if ( taskswitch_reason != TSW_iret )
1443 set_bit(41, nptss_desc); /* set B flag of new task */
1445 if ( errcode >= 0 )
1447 struct segment_register reg;
1448 unsigned long linear_addr;
1449 regs->esp -= 4;
1450 hvm_get_segment_register(current, x86_seg_ss, &reg);
1451 /* Todo: do not ignore access faults here. */
1452 if ( hvm_virtual_to_linear_addr(x86_seg_ss, &reg, regs->esp,
1453 4, hvm_access_write, 32,
1454 &linear_addr) )
1455 hvm_copy_to_guest_virt_nofault(linear_addr, &errcode, 4, 0);
1458 out:
1459 hvm_unmap_entry(optss_desc);
1460 hvm_unmap_entry(nptss_desc);
1463 #define HVMCOPY_from_guest (0u<<0)
1464 #define HVMCOPY_to_guest (1u<<0)
1465 #define HVMCOPY_no_fault (0u<<1)
1466 #define HVMCOPY_fault (1u<<1)
1467 #define HVMCOPY_phys (0u<<2)
1468 #define HVMCOPY_virt (1u<<2)
1469 static enum hvm_copy_result __hvm_copy(
1470 void *buf, paddr_t addr, int size, unsigned int flags, uint32_t pfec)
1472 struct vcpu *curr = current;
1473 unsigned long gfn, mfn;
1474 p2m_type_t p2mt;
1475 char *p;
1476 int count, todo = size;
1478 while ( todo > 0 )
1480 count = min_t(int, PAGE_SIZE - (addr & ~PAGE_MASK), todo);
1482 if ( flags & HVMCOPY_virt )
1484 gfn = paging_gva_to_gfn(curr, addr, &pfec);
1485 if ( gfn == INVALID_GFN )
1487 if ( flags & HVMCOPY_fault )
1488 hvm_inject_exception(TRAP_page_fault, pfec, addr);
1489 return HVMCOPY_bad_gva_to_gfn;
1492 else
1494 gfn = addr >> PAGE_SHIFT;
1497 mfn = mfn_x(gfn_to_mfn_current(gfn, &p2mt));
1499 if ( !p2m_is_ram(p2mt) )
1500 return HVMCOPY_bad_gfn_to_mfn;
1501 ASSERT(mfn_valid(mfn));
1503 p = (char *)map_domain_page(mfn) + (addr & ~PAGE_MASK);
1505 if ( flags & HVMCOPY_to_guest )
1507 if ( p2mt == p2m_ram_ro )
1509 static unsigned long lastpage;
1510 if ( xchg(&lastpage, gfn) != gfn )
1511 gdprintk(XENLOG_DEBUG, "guest attempted write to read-only"
1512 " memory page. gfn=%#lx, mfn=%#lx\n",
1513 gfn, mfn);
1515 else
1517 memcpy(p, buf, count);
1518 paging_mark_dirty(curr->domain, mfn);
1521 else
1523 memcpy(buf, p, count);
1526 unmap_domain_page(p);
1528 addr += count;
1529 buf += count;
1530 todo -= count;
1533 return HVMCOPY_okay;
1536 enum hvm_copy_result hvm_copy_to_guest_phys(
1537 paddr_t paddr, void *buf, int size)
1539 return __hvm_copy(buf, paddr, size,
1540 HVMCOPY_to_guest | HVMCOPY_fault | HVMCOPY_phys,
1541 0);
1544 enum hvm_copy_result hvm_copy_from_guest_phys(
1545 void *buf, paddr_t paddr, int size)
1547 return __hvm_copy(buf, paddr, size,
1548 HVMCOPY_from_guest | HVMCOPY_fault | HVMCOPY_phys,
1549 0);
1552 enum hvm_copy_result hvm_copy_to_guest_virt(
1553 unsigned long vaddr, void *buf, int size, uint32_t pfec)
1555 return __hvm_copy(buf, vaddr, size,
1556 HVMCOPY_to_guest | HVMCOPY_fault | HVMCOPY_virt,
1557 PFEC_page_present | PFEC_write_access | pfec);
1560 enum hvm_copy_result hvm_copy_from_guest_virt(
1561 void *buf, unsigned long vaddr, int size, uint32_t pfec)
1563 return __hvm_copy(buf, vaddr, size,
1564 HVMCOPY_from_guest | HVMCOPY_fault | HVMCOPY_virt,
1565 PFEC_page_present | pfec);
1568 enum hvm_copy_result hvm_fetch_from_guest_virt(
1569 void *buf, unsigned long vaddr, int size, uint32_t pfec)
1571 if ( hvm_nx_enabled(current) )
1572 pfec |= PFEC_insn_fetch;
1573 return __hvm_copy(buf, vaddr, size,
1574 HVMCOPY_from_guest | HVMCOPY_fault | HVMCOPY_virt,
1575 PFEC_page_present | pfec);
1578 enum hvm_copy_result hvm_copy_to_guest_virt_nofault(
1579 unsigned long vaddr, void *buf, int size, uint32_t pfec)
1581 return __hvm_copy(buf, vaddr, size,
1582 HVMCOPY_to_guest | HVMCOPY_no_fault | HVMCOPY_virt,
1583 PFEC_page_present | PFEC_write_access | pfec);
1586 enum hvm_copy_result hvm_copy_from_guest_virt_nofault(
1587 void *buf, unsigned long vaddr, int size, uint32_t pfec)
1589 return __hvm_copy(buf, vaddr, size,
1590 HVMCOPY_from_guest | HVMCOPY_no_fault | HVMCOPY_virt,
1591 PFEC_page_present | pfec);
1594 enum hvm_copy_result hvm_fetch_from_guest_virt_nofault(
1595 void *buf, unsigned long vaddr, int size, uint32_t pfec)
1597 if ( hvm_nx_enabled(current) )
1598 pfec |= PFEC_insn_fetch;
1599 return __hvm_copy(buf, vaddr, size,
1600 HVMCOPY_from_guest | HVMCOPY_no_fault | HVMCOPY_virt,
1601 PFEC_page_present | pfec);
1604 #ifdef __x86_64__
1605 DEFINE_PER_CPU(bool_t, hvm_64bit_hcall);
1606 #endif
1608 unsigned long copy_to_user_hvm(void *to, const void *from, unsigned int len)
1610 int rc;
1612 #ifdef __x86_64__
1613 if ( !this_cpu(hvm_64bit_hcall) && is_compat_arg_xlat_range(to, len) )
1615 memcpy(to, from, len);
1616 return 0;
1618 #endif
1620 rc = hvm_copy_to_guest_virt_nofault((unsigned long)to, (void *)from,
1621 len, 0);
1622 return rc ? len : 0; /* fake a copy_to_user() return code */
1625 unsigned long copy_from_user_hvm(void *to, const void *from, unsigned len)
1627 int rc;
1629 #ifdef __x86_64__
1630 if ( !this_cpu(hvm_64bit_hcall) && is_compat_arg_xlat_range(from, len) )
1632 memcpy(to, from, len);
1633 return 0;
1635 #endif
1637 rc = hvm_copy_from_guest_virt_nofault(to, (unsigned long)from, len, 0);
1638 return rc ? len : 0; /* fake a copy_from_user() return code */
1641 #define bitmaskof(idx) (1U << ((idx) & 31))
1642 void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
1643 unsigned int *ecx, unsigned int *edx)
1645 struct vcpu *v = current;
1647 if ( cpuid_hypervisor_leaves(input, eax, ebx, ecx, edx) )
1648 return;
1650 domain_cpuid(v->domain, input, *ecx, eax, ebx, ecx, edx);
1652 if ( input == 0x00000001 )
1654 /* Fix up VLAPIC details. */
1655 *ebx &= 0x00FFFFFFu;
1656 *ebx |= (v->vcpu_id * 2) << 24;
1657 if ( vlapic_hw_disabled(vcpu_vlapic(v)) )
1658 __clear_bit(X86_FEATURE_APIC & 31, edx);
1662 void hvm_rdtsc_intercept(struct cpu_user_regs *regs)
1664 uint64_t tsc;
1665 struct vcpu *v = current;
1667 tsc = hvm_get_guest_tsc(v);
1668 regs->eax = (uint32_t)tsc;
1669 regs->edx = (uint32_t)(tsc >> 32);
1672 int hvm_msr_read_intercept(struct cpu_user_regs *regs)
1674 uint32_t ecx = regs->ecx;
1675 uint64_t msr_content = 0;
1676 struct vcpu *v = current;
1677 uint64_t *var_range_base, *fixed_range_base;
1678 int index, mtrr;
1679 uint32_t cpuid[4];
1681 var_range_base = (uint64_t *)v->arch.hvm_vcpu.mtrr.var_ranges;
1682 fixed_range_base = (uint64_t *)v->arch.hvm_vcpu.mtrr.fixed_ranges;
1684 hvm_cpuid(1, &cpuid[0], &cpuid[1], &cpuid[2], &cpuid[3]);
1685 mtrr = !!(cpuid[3] & bitmaskof(X86_FEATURE_MTRR));
1687 switch ( ecx )
1689 case MSR_IA32_TSC:
1690 msr_content = hvm_get_guest_tsc(v);
1691 break;
1693 case MSR_IA32_APICBASE:
1694 msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
1695 break;
1697 case MSR_IA32_MCG_CAP:
1698 case MSR_IA32_MCG_STATUS:
1699 case MSR_IA32_MC0_STATUS:
1700 case MSR_IA32_MC1_STATUS:
1701 case MSR_IA32_MC2_STATUS:
1702 case MSR_IA32_MC3_STATUS:
1703 case MSR_IA32_MC4_STATUS:
1704 case MSR_IA32_MC5_STATUS:
1705 /* No point in letting the guest see real MCEs */
1706 msr_content = 0;
1707 break;
1709 case MSR_IA32_CR_PAT:
1710 msr_content = v->arch.hvm_vcpu.pat_cr;
1711 break;
1713 case MSR_MTRRcap:
1714 if ( !mtrr )
1715 goto gp_fault;
1716 msr_content = v->arch.hvm_vcpu.mtrr.mtrr_cap;
1717 break;
1718 case MSR_MTRRdefType:
1719 if ( !mtrr )
1720 goto gp_fault;
1721 msr_content = v->arch.hvm_vcpu.mtrr.def_type
1722 | (v->arch.hvm_vcpu.mtrr.enabled << 10);
1723 break;
1724 case MSR_MTRRfix64K_00000:
1725 if ( !mtrr )
1726 goto gp_fault;
1727 msr_content = fixed_range_base[0];
1728 break;
1729 case MSR_MTRRfix16K_80000:
1730 case MSR_MTRRfix16K_A0000:
1731 if ( !mtrr )
1732 goto gp_fault;
1733 index = regs->ecx - MSR_MTRRfix16K_80000;
1734 msr_content = fixed_range_base[index + 1];
1735 break;
1736 case MSR_MTRRfix4K_C0000...MSR_MTRRfix4K_F8000:
1737 if ( !mtrr )
1738 goto gp_fault;
1739 index = regs->ecx - MSR_MTRRfix4K_C0000;
1740 msr_content = fixed_range_base[index + 3];
1741 break;
1742 case MSR_IA32_MTRR_PHYSBASE0...MSR_IA32_MTRR_PHYSMASK7:
1743 if ( !mtrr )
1744 goto gp_fault;
1745 index = regs->ecx - MSR_IA32_MTRR_PHYSBASE0;
1746 msr_content = var_range_base[index];
1747 break;
1749 default:
1750 return hvm_funcs.msr_read_intercept(regs);
1753 regs->eax = (uint32_t)msr_content;
1754 regs->edx = (uint32_t)(msr_content >> 32);
1755 return X86EMUL_OKAY;
1757 gp_fault:
1758 hvm_inject_exception(TRAP_gp_fault, 0, 0);
1759 return X86EMUL_EXCEPTION;
1762 int hvm_msr_write_intercept(struct cpu_user_regs *regs)
1764 extern bool_t mtrr_var_range_msr_set(
1765 struct mtrr_state *v, u32 msr, u64 msr_content);
1766 extern bool_t mtrr_fix_range_msr_set(
1767 struct mtrr_state *v, int row, u64 msr_content);
1768 extern bool_t mtrr_def_type_msr_set(struct mtrr_state *v, u64 msr_content);
1769 extern bool_t pat_msr_set(u64 *pat, u64 msr);
1771 uint32_t ecx = regs->ecx;
1772 uint64_t msr_content = (uint32_t)regs->eax | ((uint64_t)regs->edx << 32);
1773 struct vcpu *v = current;
1774 int index, mtrr;
1775 uint32_t cpuid[4];
1777 hvm_cpuid(1, &cpuid[0], &cpuid[1], &cpuid[2], &cpuid[3]);
1778 mtrr = !!(cpuid[3] & bitmaskof(X86_FEATURE_MTRR));
1780 switch ( ecx )
1782 case MSR_IA32_TSC:
1783 hvm_set_guest_tsc(v, msr_content);
1784 pt_reset(v);
1785 break;
1787 case MSR_IA32_APICBASE:
1788 vlapic_msr_set(vcpu_vlapic(v), msr_content);
1789 break;
1791 case MSR_IA32_CR_PAT:
1792 if ( !pat_msr_set(&v->arch.hvm_vcpu.pat_cr, msr_content) )
1793 goto gp_fault;
1794 break;
1796 case MSR_MTRRcap:
1797 if ( !mtrr )
1798 goto gp_fault;
1799 goto gp_fault;
1800 case MSR_MTRRdefType:
1801 if ( !mtrr )
1802 goto gp_fault;
1803 if ( !mtrr_def_type_msr_set(&v->arch.hvm_vcpu.mtrr, msr_content) )
1804 goto gp_fault;
1805 break;
1806 case MSR_MTRRfix64K_00000:
1807 if ( !mtrr )
1808 goto gp_fault;
1809 if ( !mtrr_fix_range_msr_set(&v->arch.hvm_vcpu.mtrr, 0, msr_content) )
1810 goto gp_fault;
1811 break;
1812 case MSR_MTRRfix16K_80000:
1813 case MSR_MTRRfix16K_A0000:
1814 if ( !mtrr )
1815 goto gp_fault;
1816 index = regs->ecx - MSR_MTRRfix16K_80000 + 1;
1817 if ( !mtrr_fix_range_msr_set(&v->arch.hvm_vcpu.mtrr,
1818 index, msr_content) )
1819 goto gp_fault;
1820 break;
1821 case MSR_MTRRfix4K_C0000...MSR_MTRRfix4K_F8000:
1822 if ( !mtrr )
1823 goto gp_fault;
1824 index = regs->ecx - MSR_MTRRfix4K_C0000 + 3;
1825 if ( !mtrr_fix_range_msr_set(&v->arch.hvm_vcpu.mtrr,
1826 index, msr_content) )
1827 goto gp_fault;
1828 break;
1829 case MSR_IA32_MTRR_PHYSBASE0...MSR_IA32_MTRR_PHYSMASK7:
1830 if ( !mtrr )
1831 goto gp_fault;
1832 if ( !mtrr_var_range_msr_set(&v->arch.hvm_vcpu.mtrr,
1833 regs->ecx, msr_content) )
1834 goto gp_fault;
1835 break;
1837 default:
1838 return hvm_funcs.msr_write_intercept(regs);
1841 return X86EMUL_OKAY;
1843 gp_fault:
1844 hvm_inject_exception(TRAP_gp_fault, 0, 0);
1845 return X86EMUL_EXCEPTION;
1848 enum hvm_intblk hvm_interrupt_blocked(struct vcpu *v, struct hvm_intack intack)
1850 unsigned long intr_shadow;
1852 ASSERT(v == current);
1854 if ( (intack.source != hvm_intsrc_nmi) &&
1855 !(guest_cpu_user_regs()->eflags & X86_EFLAGS_IF) )
1856 return hvm_intblk_rflags_ie;
1858 intr_shadow = hvm_funcs.get_interrupt_shadow(v);
1860 if ( intr_shadow & (HVM_INTR_SHADOW_STI|HVM_INTR_SHADOW_MOV_SS) )
1861 return hvm_intblk_shadow;
1863 if ( intack.source == hvm_intsrc_nmi )
1864 return ((intr_shadow & HVM_INTR_SHADOW_NMI) ?
1865 hvm_intblk_nmi_iret : hvm_intblk_none);
1867 if ( intack.source == hvm_intsrc_lapic )
1869 uint32_t tpr = vlapic_get_reg(vcpu_vlapic(v), APIC_TASKPRI) & 0xF0;
1870 if ( (tpr >> 4) >= (intack.vector >> 4) )
1871 return hvm_intblk_tpr;
1874 return hvm_intblk_none;
1877 static long hvm_grant_table_op(
1878 unsigned int cmd, XEN_GUEST_HANDLE(void) uop, unsigned int count)
1880 if ( (cmd != GNTTABOP_query_size) && (cmd != GNTTABOP_setup_table) )
1881 return -ENOSYS; /* all other commands need auditing */
1882 return do_grant_table_op(cmd, uop, count);
1885 static long hvm_memory_op(int cmd, XEN_GUEST_HANDLE(void) arg)
1887 long rc = do_memory_op(cmd, arg);
1888 if ( (cmd & MEMOP_CMD_MASK) == XENMEM_decrease_reservation )
1889 current->domain->arch.hvm_domain.qemu_mapcache_invalidate = 1;
1890 return rc;
1893 typedef unsigned long hvm_hypercall_t(
1894 unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
1896 #define HYPERCALL(x) \
1897 [ __HYPERVISOR_ ## x ] = (hvm_hypercall_t *) do_ ## x
1899 #if defined(__i386__)
1901 static hvm_hypercall_t *hvm_hypercall32_table[NR_hypercalls] = {
1902 [ __HYPERVISOR_memory_op ] = (hvm_hypercall_t *)hvm_memory_op,
1903 [ __HYPERVISOR_grant_table_op ] = (hvm_hypercall_t *)hvm_grant_table_op,
1904 HYPERCALL(xen_version),
1905 HYPERCALL(event_channel_op),
1906 HYPERCALL(sched_op),
1907 HYPERCALL(hvm_op)
1908 };
1910 #else /* defined(__x86_64__) */
1912 static long hvm_memory_op_compat32(int cmd, XEN_GUEST_HANDLE(void) arg)
1914 long rc = compat_memory_op(cmd, arg);
1915 if ( (cmd & MEMOP_CMD_MASK) == XENMEM_decrease_reservation )
1916 current->domain->arch.hvm_domain.qemu_mapcache_invalidate = 1;
1917 return rc;
1920 static hvm_hypercall_t *hvm_hypercall64_table[NR_hypercalls] = {
1921 [ __HYPERVISOR_memory_op ] = (hvm_hypercall_t *)hvm_memory_op,
1922 [ __HYPERVISOR_grant_table_op ] = (hvm_hypercall_t *)hvm_grant_table_op,
1923 HYPERCALL(xen_version),
1924 HYPERCALL(event_channel_op),
1925 HYPERCALL(sched_op),
1926 HYPERCALL(hvm_op)
1927 };
1929 static hvm_hypercall_t *hvm_hypercall32_table[NR_hypercalls] = {
1930 [ __HYPERVISOR_memory_op ] = (hvm_hypercall_t *)hvm_memory_op_compat32,
1931 [ __HYPERVISOR_grant_table_op ] = (hvm_hypercall_t *)hvm_grant_table_op,
1932 HYPERCALL(xen_version),
1933 HYPERCALL(event_channel_op),
1934 HYPERCALL(sched_op),
1935 HYPERCALL(hvm_op)
1936 };
1938 #endif /* defined(__x86_64__) */
1940 int hvm_do_hypercall(struct cpu_user_regs *regs)
1942 struct vcpu *curr = current;
1943 struct segment_register sreg;
1944 int mode = hvm_guest_x86_mode(curr);
1945 uint32_t eax = regs->eax;
1947 switch ( mode )
1949 #ifdef __x86_64__
1950 case 8:
1951 #endif
1952 case 4:
1953 case 2:
1954 hvm_get_segment_register(curr, x86_seg_ss, &sreg);
1955 if ( unlikely(sreg.attr.fields.dpl == 3) )
1957 default:
1958 regs->eax = -EPERM;
1959 return HVM_HCALL_completed;
1961 case 0:
1962 break;
1965 if ( (eax >= NR_hypercalls) || !hvm_hypercall32_table[eax] )
1967 regs->eax = -ENOSYS;
1968 return HVM_HCALL_completed;
1971 this_cpu(hc_preempted) = 0;
1973 #ifdef __x86_64__
1974 if ( mode == 8 )
1976 HVM_DBG_LOG(DBG_LEVEL_HCALL, "hcall%u(%lx, %lx, %lx, %lx, %lx)", eax,
1977 regs->rdi, regs->rsi, regs->rdx, regs->r10, regs->r8);
1979 this_cpu(hvm_64bit_hcall) = 1;
1980 regs->rax = hvm_hypercall64_table[eax](regs->rdi,
1981 regs->rsi,
1982 regs->rdx,
1983 regs->r10,
1984 regs->r8);
1985 this_cpu(hvm_64bit_hcall) = 0;
1987 else
1988 #endif
1990 HVM_DBG_LOG(DBG_LEVEL_HCALL, "hcall%u(%x, %x, %x, %x, %x)", eax,
1991 (uint32_t)regs->ebx, (uint32_t)regs->ecx,
1992 (uint32_t)regs->edx, (uint32_t)regs->esi,
1993 (uint32_t)regs->edi);
1995 regs->eax = hvm_hypercall32_table[eax]((uint32_t)regs->ebx,
1996 (uint32_t)regs->ecx,
1997 (uint32_t)regs->edx,
1998 (uint32_t)regs->esi,
1999 (uint32_t)regs->edi);
2002 HVM_DBG_LOG(DBG_LEVEL_HCALL, "hcall%u -> %lx",
2003 eax, (unsigned long)regs->eax);
2005 if ( this_cpu(hc_preempted) )
2006 return HVM_HCALL_preempted;
2008 if ( unlikely(curr->domain->arch.hvm_domain.qemu_mapcache_invalidate) &&
2009 test_and_clear_bool(curr->domain->arch.hvm_domain.
2010 qemu_mapcache_invalidate) )
2011 return HVM_HCALL_invalidate;
2013 return HVM_HCALL_completed;
2016 static void hvm_latch_shinfo_size(struct domain *d)
2018 /*
2019 * Called from operations which are among the very first executed by
2020 * PV drivers on initialisation or after save/restore. These are sensible
2021 * points at which to sample the execution mode of the guest and latch
2022 * 32- or 64-bit format for shared state.
2023 */
2024 if ( current->domain == d )
2025 d->arch.has_32bit_shinfo = (hvm_guest_x86_mode(current) != 8);
2028 /* Initialise a hypercall transfer page for a VMX domain using
2029 paravirtualised drivers. */
2030 void hvm_hypercall_page_initialise(struct domain *d,
2031 void *hypercall_page)
2033 hvm_latch_shinfo_size(d);
2034 hvm_funcs.init_hypercall_page(d, hypercall_page);
2037 static int hvmop_set_pci_intx_level(
2038 XEN_GUEST_HANDLE(xen_hvm_set_pci_intx_level_t) uop)
2040 struct xen_hvm_set_pci_intx_level op;
2041 struct domain *d;
2042 int rc;
2044 if ( copy_from_guest(&op, uop, 1) )
2045 return -EFAULT;
2047 if ( (op.domain > 0) || (op.bus > 0) || (op.device > 31) || (op.intx > 3) )
2048 return -EINVAL;
2050 d = rcu_lock_domain_by_id(op.domid);
2051 if ( d == NULL )
2052 return -ESRCH;
2054 rc = -EPERM;
2055 if ( !IS_PRIV_FOR(current->domain, d) )
2056 goto out;
2058 rc = -EINVAL;
2059 if ( !is_hvm_domain(d) )
2060 goto out;
2062 rc = xsm_hvm_set_pci_intx_level(d);
2063 if ( rc )
2064 goto out;
2066 rc = 0;
2067 switch ( op.level )
2069 case 0:
2070 hvm_pci_intx_deassert(d, op.device, op.intx);
2071 break;
2072 case 1:
2073 hvm_pci_intx_assert(d, op.device, op.intx);
2074 break;
2075 default:
2076 rc = -EINVAL;
2077 break;
2080 out:
2081 rcu_unlock_domain(d);
2082 return rc;
2085 void hvm_vcpu_reset_state(struct vcpu *v, uint16_t cs, uint16_t ip)
2087 struct domain *d = current->domain;
2088 struct vcpu_guest_context *ctxt;
2089 struct segment_register reg;
2091 BUG_ON(vcpu_runnable(v));
2093 domain_lock(d);
2095 if ( v->is_initialised )
2096 goto out;
2098 if ( !paging_mode_hap(d) )
2100 if ( v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG )
2101 put_page(pagetable_get_page(v->arch.guest_table));
2102 v->arch.guest_table = pagetable_null();
2105 ctxt = &v->arch.guest_context;
2106 memset(ctxt, 0, sizeof(*ctxt));
2107 ctxt->flags = VGCF_online;
2108 ctxt->user_regs.eflags = 2;
2109 ctxt->user_regs.edx = 0x00000f00;
2110 ctxt->user_regs.eip = ip;
2112 v->arch.hvm_vcpu.guest_cr[0] = X86_CR0_ET;
2113 hvm_update_guest_cr(v, 0);
2115 v->arch.hvm_vcpu.guest_cr[2] = 0;
2116 hvm_update_guest_cr(v, 2);
2118 v->arch.hvm_vcpu.guest_cr[3] = 0;
2119 hvm_update_guest_cr(v, 3);
2121 v->arch.hvm_vcpu.guest_cr[4] = 0;
2122 hvm_update_guest_cr(v, 4);
2124 v->arch.hvm_vcpu.guest_efer = 0;
2125 hvm_update_guest_efer(v);
2127 reg.sel = cs;
2128 reg.base = (uint32_t)reg.sel << 4;
2129 reg.limit = 0xffff;
2130 reg.attr.bytes = 0x09b;
2131 hvm_set_segment_register(v, x86_seg_cs, &reg);
2133 reg.sel = reg.base = 0;
2134 reg.limit = 0xffff;
2135 reg.attr.bytes = 0x093;
2136 hvm_set_segment_register(v, x86_seg_ds, &reg);
2137 hvm_set_segment_register(v, x86_seg_es, &reg);
2138 hvm_set_segment_register(v, x86_seg_fs, &reg);
2139 hvm_set_segment_register(v, x86_seg_gs, &reg);
2140 hvm_set_segment_register(v, x86_seg_ss, &reg);
2142 reg.attr.bytes = 0x82; /* LDT */
2143 hvm_set_segment_register(v, x86_seg_ldtr, &reg);
2145 reg.attr.bytes = 0x8b; /* 32-bit TSS (busy) */
2146 hvm_set_segment_register(v, x86_seg_tr, &reg);
2148 reg.attr.bytes = 0;
2149 hvm_set_segment_register(v, x86_seg_gdtr, &reg);
2150 hvm_set_segment_register(v, x86_seg_idtr, &reg);
2152 /* Sync AP's TSC with BSP's. */
2153 v->arch.hvm_vcpu.cache_tsc_offset =
2154 v->domain->vcpu[0]->arch.hvm_vcpu.cache_tsc_offset;
2155 hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset);
2157 paging_update_paging_modes(v);
2159 v->arch.flags |= TF_kernel_mode;
2160 v->is_initialised = 1;
2161 clear_bit(_VPF_down, &v->pause_flags);
2163 out:
2164 domain_unlock(d);
2167 static void hvm_s3_suspend(struct domain *d)
2169 struct vcpu *v;
2171 domain_pause(d);
2172 domain_lock(d);
2174 if ( d->is_dying || (d->vcpu[0] == NULL) ||
2175 test_and_set_bool(d->arch.hvm_domain.is_s3_suspended) )
2177 domain_unlock(d);
2178 domain_unpause(d);
2179 return;
2182 for_each_vcpu ( d, v )
2184 vlapic_reset(vcpu_vlapic(v));
2185 vcpu_reset(v);
2188 vpic_reset(d);
2189 vioapic_reset(d);
2190 pit_reset(d);
2191 rtc_reset(d);
2192 pmtimer_reset(d);
2193 hpet_reset(d);
2195 hvm_vcpu_reset_state(d->vcpu[0], 0xf000, 0xfff0);
2197 domain_unlock(d);
2200 static void hvm_s3_resume(struct domain *d)
2202 if ( test_and_clear_bool(d->arch.hvm_domain.is_s3_suspended) )
2203 domain_unpause(d);
2206 static int hvmop_set_isa_irq_level(
2207 XEN_GUEST_HANDLE(xen_hvm_set_isa_irq_level_t) uop)
2209 struct xen_hvm_set_isa_irq_level op;
2210 struct domain *d;
2211 int rc;
2213 if ( copy_from_guest(&op, uop, 1) )
2214 return -EFAULT;
2216 if ( op.isa_irq > 15 )
2217 return -EINVAL;
2219 d = rcu_lock_domain_by_id(op.domid);
2220 if ( d == NULL )
2221 return -ESRCH;
2223 rc = -EPERM;
2224 if ( !IS_PRIV_FOR(current->domain, d) )
2225 goto out;
2227 rc = -EINVAL;
2228 if ( !is_hvm_domain(d) )
2229 goto out;
2231 rc = xsm_hvm_set_isa_irq_level(d);
2232 if ( rc )
2233 goto out;
2235 rc = 0;
2236 switch ( op.level )
2238 case 0:
2239 hvm_isa_irq_deassert(d, op.isa_irq);
2240 break;
2241 case 1:
2242 hvm_isa_irq_assert(d, op.isa_irq);
2243 break;
2244 default:
2245 rc = -EINVAL;
2246 break;
2249 out:
2250 rcu_unlock_domain(d);
2251 return rc;
2254 static int hvmop_set_pci_link_route(
2255 XEN_GUEST_HANDLE(xen_hvm_set_pci_link_route_t) uop)
2257 struct xen_hvm_set_pci_link_route op;
2258 struct domain *d;
2259 int rc;
2261 if ( copy_from_guest(&op, uop, 1) )
2262 return -EFAULT;
2264 if ( (op.link > 3) || (op.isa_irq > 15) )
2265 return -EINVAL;
2267 d = rcu_lock_domain_by_id(op.domid);
2268 if ( d == NULL )
2269 return -ESRCH;
2271 rc = -EPERM;
2272 if ( !IS_PRIV_FOR(current->domain, d) )
2273 goto out;
2275 rc = -EINVAL;
2276 if ( !is_hvm_domain(d) )
2277 goto out;
2279 rc = xsm_hvm_set_pci_link_route(d);
2280 if ( rc )
2281 goto out;
2283 rc = 0;
2284 hvm_set_pci_link_route(d, op.link, op.isa_irq);
2286 out:
2287 rcu_unlock_domain(d);
2288 return rc;
2291 static int hvmop_flush_tlb_all(void)
2293 struct domain *d = current->domain;
2294 struct vcpu *v;
2296 /* Avoid deadlock if more than one vcpu tries this at the same time. */
2297 if ( !spin_trylock(&d->hypercall_deadlock_mutex) )
2298 return -EAGAIN;
2300 /* Pause all other vcpus. */
2301 for_each_vcpu ( d, v )
2302 if ( v != current )
2303 vcpu_pause_nosync(v);
2305 /* Now that all VCPUs are signalled to deschedule, we wait... */
2306 for_each_vcpu ( d, v )
2307 if ( v != current )
2308 while ( !vcpu_runnable(v) && v->is_running )
2309 cpu_relax();
2311 /* All other vcpus are paused, safe to unlock now. */
2312 spin_unlock(&d->hypercall_deadlock_mutex);
2314 /* Flush paging-mode soft state (e.g., va->gfn cache; PAE PDPE cache). */
2315 for_each_vcpu ( d, v )
2316 paging_update_cr3(v);
2318 /* Flush all dirty TLBs. */
2319 flush_tlb_mask(d->domain_dirty_cpumask);
2321 /* Done. */
2322 for_each_vcpu ( d, v )
2323 if ( v != current )
2324 vcpu_unpause(v);
2326 return 0;
2329 long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE(void) arg)
2332 long rc = 0;
2334 switch ( op )
2336 case HVMOP_set_param:
2337 case HVMOP_get_param:
2339 struct xen_hvm_param a;
2340 struct hvm_ioreq_page *iorp;
2341 struct domain *d;
2342 struct vcpu *v;
2344 if ( copy_from_guest(&a, arg, 1) )
2345 return -EFAULT;
2347 if ( a.index >= HVM_NR_PARAMS )
2348 return -EINVAL;
2350 if ( a.domid == DOMID_SELF )
2352 d = rcu_lock_current_domain();
2354 else
2356 if ( (d = rcu_lock_domain_by_id(a.domid)) == NULL )
2357 return -ESRCH;
2358 if ( !IS_PRIV_FOR(current->domain, d) )
2360 rc = -EPERM;
2361 goto param_fail;
2366 rc = -EINVAL;
2367 if ( !is_hvm_domain(d) )
2368 goto param_fail;
2370 rc = xsm_hvm_param(d, op);
2371 if ( rc )
2372 goto param_fail;
2374 if ( op == HVMOP_set_param )
2376 rc = 0;
2378 switch ( a.index )
2380 case HVM_PARAM_IOREQ_PFN:
2381 iorp = &d->arch.hvm_domain.ioreq;
2382 if ( (rc = hvm_set_ioreq_page(d, iorp, a.value)) != 0 )
2383 break;
2384 spin_lock(&iorp->lock);
2385 if ( iorp->va != NULL )
2386 /* Initialise evtchn port info if VCPUs already created. */
2387 for_each_vcpu ( d, v )
2388 get_ioreq(v)->vp_eport = v->arch.hvm_vcpu.xen_port;
2389 spin_unlock(&iorp->lock);
2390 break;
2391 case HVM_PARAM_BUFIOREQ_PFN:
2392 iorp = &d->arch.hvm_domain.buf_ioreq;
2393 rc = hvm_set_ioreq_page(d, iorp, a.value);
2394 break;
2395 case HVM_PARAM_CALLBACK_IRQ:
2396 hvm_set_callback_via(d, a.value);
2397 hvm_latch_shinfo_size(d);
2398 break;
2399 case HVM_PARAM_TIMER_MODE:
2400 if ( a.value > HVMPTM_one_missed_tick_pending )
2401 rc = -EINVAL;
2402 break;
2403 case HVM_PARAM_IDENT_PT:
2404 rc = -EPERM;
2405 if ( !IS_PRIV(current->domain) )
2406 break;
2408 rc = -EINVAL;
2409 if ( d->arch.hvm_domain.params[a.index] != 0 )
2410 break;
2412 rc = 0;
2413 if ( !paging_mode_hap(d) )
2414 break;
2416 domain_pause(d);
2418 /*
2419 * Update GUEST_CR3 in each VMCS to point at identity map.
2420 * All foreign updates to guest state must synchronise on
2421 * the domctl_lock.
2422 */
2423 spin_lock(&domctl_lock);
2424 d->arch.hvm_domain.params[a.index] = a.value;
2425 for_each_vcpu ( d, v )
2426 paging_update_cr3(v);
2427 spin_unlock(&domctl_lock);
2429 domain_unpause(d);
2430 break;
2431 case HVM_PARAM_DM_DOMAIN:
2432 /* Privileged domains only, as we must domain_pause(d). */
2433 rc = -EPERM;
2434 if ( !IS_PRIV_FOR(current->domain, d) )
2435 break;
2437 if ( a.value == DOMID_SELF )
2438 a.value = current->domain->domain_id;
2440 rc = 0;
2441 domain_pause(d); /* safe to change per-vcpu xen_port */
2442 iorp = &d->arch.hvm_domain.ioreq;
2443 for_each_vcpu ( d, v )
2445 int old_port, new_port;
2446 new_port = alloc_unbound_xen_event_channel(v, a.value);
2447 if ( new_port < 0 )
2449 rc = new_port;
2450 break;
2452 /* xchg() ensures that only we free_xen_event_channel() */
2453 old_port = xchg(&v->arch.hvm_vcpu.xen_port, new_port);
2454 free_xen_event_channel(v, old_port);
2455 spin_lock(&iorp->lock);
2456 if ( iorp->va != NULL )
2457 get_ioreq(v)->vp_eport = v->arch.hvm_vcpu.xen_port;
2458 spin_unlock(&iorp->lock);
2460 domain_unpause(d);
2461 break;
2462 case HVM_PARAM_ACPI_S_STATE:
2463 /* Privileged domains only, as we must domain_pause(d). */
2464 rc = -EPERM;
2465 if ( !IS_PRIV_FOR(current->domain, d) )
2466 break;
2468 rc = 0;
2469 if ( a.value == 3 )
2470 hvm_s3_suspend(d);
2471 else if ( a.value == 0 )
2472 hvm_s3_resume(d);
2473 else
2474 rc = -EINVAL;
2476 break;
2479 if ( rc == 0 )
2480 d->arch.hvm_domain.params[a.index] = a.value;
2482 else
2484 switch ( a.index )
2486 case HVM_PARAM_ACPI_S_STATE:
2487 a.value = d->arch.hvm_domain.is_s3_suspended ? 3 : 0;
2488 break;
2489 default:
2490 a.value = d->arch.hvm_domain.params[a.index];
2491 break;
2493 rc = copy_to_guest(arg, &a, 1) ? -EFAULT : 0;
2496 HVM_DBG_LOG(DBG_LEVEL_HCALL, "%s param %u = %"PRIx64,
2497 op == HVMOP_set_param ? "set" : "get",
2498 a.index, a.value);
2500 param_fail:
2501 rcu_unlock_domain(d);
2502 break;
2505 case HVMOP_set_pci_intx_level:
2506 rc = hvmop_set_pci_intx_level(
2507 guest_handle_cast(arg, xen_hvm_set_pci_intx_level_t));
2508 break;
2510 case HVMOP_set_isa_irq_level:
2511 rc = hvmop_set_isa_irq_level(
2512 guest_handle_cast(arg, xen_hvm_set_isa_irq_level_t));
2513 break;
2515 case HVMOP_set_pci_link_route:
2516 rc = hvmop_set_pci_link_route(
2517 guest_handle_cast(arg, xen_hvm_set_pci_link_route_t));
2518 break;
2520 case HVMOP_flush_tlbs:
2521 rc = guest_handle_is_null(arg) ? hvmop_flush_tlb_all() : -ENOSYS;
2522 break;
2524 case HVMOP_track_dirty_vram:
2526 struct xen_hvm_track_dirty_vram a;
2527 struct domain *d;
2529 if ( copy_from_guest(&a, arg, 1) )
2530 return -EFAULT;
2532 if ( a.domid == DOMID_SELF )
2534 d = rcu_lock_current_domain();
2536 else
2538 if ( (d = rcu_lock_domain_by_id(a.domid)) == NULL )
2539 return -ESRCH;
2540 if ( !IS_PRIV_FOR(current->domain, d) )
2542 rc = -EPERM;
2543 goto param_fail2;
2547 rc = -EINVAL;
2548 if ( !is_hvm_domain(d) )
2549 goto param_fail2;
2551 rc = xsm_hvm_param(d, op);
2552 if ( rc )
2553 goto param_fail2;
2555 rc = -ESRCH;
2556 if ( d->is_dying )
2557 goto param_fail2;
2559 rc = -EINVAL;
2560 if ( !shadow_mode_enabled(d))
2561 goto param_fail2;
2562 if ( d->vcpu[0] == NULL )
2563 goto param_fail2;
2565 rc = shadow_track_dirty_vram(d, a.first_pfn, a.nr, a.dirty_bitmap);
2567 param_fail2:
2568 rcu_unlock_domain(d);
2569 break;
2572 case HVMOP_modified_memory:
2574 struct xen_hvm_modified_memory a;
2575 struct domain *d;
2576 unsigned long pfn;
2578 if ( copy_from_guest(&a, arg, 1) )
2579 return -EFAULT;
2581 if ( a.domid == DOMID_SELF )
2583 d = rcu_lock_current_domain();
2585 else
2587 if ( (d = rcu_lock_domain_by_id(a.domid)) == NULL )
2588 return -ESRCH;
2589 if ( !IS_PRIV_FOR(current->domain, d) )
2591 rc = -EPERM;
2592 goto param_fail3;
2596 rc = -EINVAL;
2597 if ( !is_hvm_domain(d) )
2598 goto param_fail3;
2600 rc = xsm_hvm_param(d, op);
2601 if ( rc )
2602 goto param_fail3;
2604 rc = -EINVAL;
2605 if ( (a.first_pfn > domain_get_maximum_gpfn(d)) ||
2606 ((a.first_pfn + a.nr - 1) < a.first_pfn) ||
2607 ((a.first_pfn + a.nr - 1) > domain_get_maximum_gpfn(d)) )
2608 goto param_fail3;
2610 rc = 0;
2611 if ( !paging_mode_log_dirty(d) )
2612 goto param_fail3;
2614 for ( pfn = a.first_pfn; pfn < a.first_pfn + a.nr; pfn++ )
2616 p2m_type_t t;
2617 mfn_t mfn = gfn_to_mfn(d, pfn, &t);
2618 if ( mfn_x(mfn) != INVALID_MFN )
2620 paging_mark_dirty(d, mfn_x(mfn));
2621 /* These are most probably not page tables any more */
2622 /* don't take a long time and don't die either */
2623 sh_remove_shadows(d->vcpu[0], mfn, 1, 0);
2627 param_fail3:
2628 rcu_unlock_domain(d);
2629 break;
2632 case HVMOP_set_mem_type:
2634 struct xen_hvm_set_mem_type a;
2635 struct domain *d;
2636 unsigned long pfn;
2638 /* Interface types to internal p2m types */
2639 p2m_type_t memtype[] = {
2640 p2m_ram_rw, /* HVMMEM_ram_rw */
2641 p2m_ram_ro, /* HVMMEM_ram_ro */
2642 p2m_mmio_dm /* HVMMEM_mmio_dm */
2643 };
2645 if ( copy_from_guest(&a, arg, 1) )
2646 return -EFAULT;
2648 if ( a.domid == DOMID_SELF )
2650 d = rcu_lock_current_domain();
2652 else
2654 if ( (d = rcu_lock_domain_by_id(a.domid)) == NULL )
2655 return -ESRCH;
2656 if ( !IS_PRIV_FOR(current->domain, d) )
2658 rc = -EPERM;
2659 goto param_fail4;
2663 rc = -EINVAL;
2664 if ( !is_hvm_domain(d) )
2665 goto param_fail4;
2667 rc = -EINVAL;
2668 if ( (a.first_pfn > domain_get_maximum_gpfn(d)) ||
2669 ((a.first_pfn + a.nr - 1) < a.first_pfn) ||
2670 ((a.first_pfn + a.nr - 1) > domain_get_maximum_gpfn(d)) )
2671 goto param_fail4;
2673 if ( a.hvmmem_type >= ARRAY_SIZE(memtype) )
2674 goto param_fail4;
2676 rc = 0;
2678 for ( pfn = a.first_pfn; pfn < a.first_pfn + a.nr; pfn++ )
2680 p2m_type_t t;
2681 mfn_t mfn;
2682 mfn = gfn_to_mfn(d, pfn, &t);
2683 p2m_change_type(d, pfn, t, memtype[a.hvmmem_type]);
2686 param_fail4:
2687 rcu_unlock_domain(d);
2688 break;
2691 default:
2693 gdprintk(XENLOG_WARNING, "Bad HVM op %ld.\n", op);
2694 rc = -ENOSYS;
2695 break;
2699 if ( rc == -EAGAIN )
2700 rc = hypercall_create_continuation(
2701 __HYPERVISOR_hvm_op, "lh", op, arg);
2703 return rc;
2706 /*
2707 * Local variables:
2708 * mode: C
2709 * c-set-style: "BSD"
2710 * c-basic-offset: 4
2711 * tab-width: 4
2712 * indent-tabs-mode: nil
2713 * End:
2714 */