ia64/xen-unstable

annotate xen/arch/x86/traps.c @ 10892:0d2ba35c0cf2

[XEN] Add hypercall support for HVM guests. This is
fairly useless at the moment, since all of the hypercalls
fail, since copy_from_user doesn't work correctly in HVM
domains.

Signed-off-by: Steven Smith <ssmith@xensource.com>

Add a CPUID hypervisor platform interface at leaf
0x40000000. Allow hypercall transfer page to be filled
in via MSR 0x40000000.

Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Tue Aug 01 17:18:05 2006 +0100 (2006-08-01)
parents 2d2ed4d9b1c1
children 16aa4b417c6b
rev   line source
kaf24@1452 1 /******************************************************************************
kaf24@3597 2 * arch/x86/traps.c
kaf24@1452 3 *
kaf24@2076 4 * Modifications to Linux original are copyright (c) 2002-2004, K A Fraser
kaf24@1452 5 *
kaf24@1452 6 * This program is free software; you can redistribute it and/or modify
kaf24@1452 7 * it under the terms of the GNU General Public License as published by
kaf24@1452 8 * the Free Software Foundation; either version 2 of the License, or
kaf24@1452 9 * (at your option) any later version.
kaf24@1452 10 *
kaf24@1452 11 * This program is distributed in the hope that it will be useful,
kaf24@1452 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
kaf24@1452 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
kaf24@1452 14 * GNU General Public License for more details.
kaf24@1452 15 *
kaf24@1452 16 * You should have received a copy of the GNU General Public License
kaf24@1452 17 * along with this program; if not, write to the Free Software
kaf24@1452 18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
kaf24@1452 19 */
kaf24@1452 20
kaf24@1452 21 /*
kaf24@1452 22 * Copyright (C) 1991, 1992 Linus Torvalds
kaf24@1452 23 *
kaf24@1452 24 * Pentium III FXSR, SSE support
kaf24@1452 25 * Gareth Hughes <gareth@valinux.com>, May 2000
kaf24@1452 26 */
kaf24@1452 27
kaf24@1452 28 #include <xen/config.h>
kaf24@1452 29 #include <xen/init.h>
kaf24@1452 30 #include <xen/sched.h>
kaf24@1452 31 #include <xen/lib.h>
kaf24@1452 32 #include <xen/errno.h>
kaf24@1452 33 #include <xen/mm.h>
kaf24@2041 34 #include <xen/console.h>
kaf24@9617 35 #include <xen/reboot.h>
ach61@2805 36 #include <asm/regs.h>
kaf24@1452 37 #include <xen/delay.h>
kaf24@2047 38 #include <xen/event.h>
kaf24@1452 39 #include <xen/spinlock.h>
kaf24@1452 40 #include <xen/irq.h>
kaf24@1452 41 #include <xen/perfc.h>
kaf24@2047 42 #include <xen/softirq.h>
kaf24@5356 43 #include <xen/domain_page.h>
kaf24@5840 44 #include <xen/symbols.h>
kaf24@8467 45 #include <xen/iocap.h>
Ian@9813 46 #include <xen/nmi.h>
kfraser@10892 47 #include <xen/version.h>
kaf24@1749 48 #include <asm/shadow.h>
kaf24@1452 49 #include <asm/system.h>
kaf24@1452 50 #include <asm/io.h>
kaf24@1452 51 #include <asm/atomic.h>
kaf24@1452 52 #include <asm/desc.h>
kaf24@1452 53 #include <asm/debugreg.h>
kaf24@1452 54 #include <asm/smp.h>
kaf24@1452 55 #include <asm/flushtlb.h>
kaf24@1452 56 #include <asm/uaccess.h>
kaf24@1452 57 #include <asm/i387.h>
kaf24@2971 58 #include <asm/debugger.h>
kaf24@3337 59 #include <asm/msr.h>
kaf24@4047 60 #include <asm/x86_emulate.h>
kaf24@1452 61
kaf24@3334 62 /*
kaf24@3334 63 * opt_nmi: one of 'ignore', 'dom0', or 'fatal'.
kaf24@3334 64 * fatal: Xen prints diagnostic message and then hangs.
kaf24@3334 65 * dom0: The NMI is virtualised to DOM0.
kaf24@3334 66 * ignore: The NMI error is cleared and ignored.
kaf24@3334 67 */
kaf24@3334 68 #ifdef NDEBUG
kaf24@3334 69 char opt_nmi[10] = "dom0";
kaf24@3334 70 #else
kaf24@3334 71 char opt_nmi[10] = "fatal";
kaf24@3334 72 #endif
kaf24@3334 73 string_param("nmi", opt_nmi);
kaf24@3334 74
kaf24@3774 75 /* Master table, used by all CPUs on x86/64, and by CPU0 on x86/32.*/
kaf24@3847 76 idt_entry_t idt_table[IDT_ENTRIES];
kaf24@1452 77
kaf24@4972 78 #define DECLARE_TRAP_HANDLER(_name) \
kaf24@4972 79 asmlinkage void _name(void); \
kaf24@4972 80 asmlinkage int do_ ## _name(struct cpu_user_regs *regs)
kaf24@1452 81
kaf24@1452 82 asmlinkage void nmi(void);
kaf24@4972 83 DECLARE_TRAP_HANDLER(divide_error);
kaf24@4972 84 DECLARE_TRAP_HANDLER(debug);
kaf24@4972 85 DECLARE_TRAP_HANDLER(int3);
kaf24@4972 86 DECLARE_TRAP_HANDLER(overflow);
kaf24@4972 87 DECLARE_TRAP_HANDLER(bounds);
kaf24@4972 88 DECLARE_TRAP_HANDLER(invalid_op);
kaf24@4972 89 DECLARE_TRAP_HANDLER(device_not_available);
kaf24@4972 90 DECLARE_TRAP_HANDLER(coprocessor_segment_overrun);
kaf24@4972 91 DECLARE_TRAP_HANDLER(invalid_TSS);
kaf24@4972 92 DECLARE_TRAP_HANDLER(segment_not_present);
kaf24@4972 93 DECLARE_TRAP_HANDLER(stack_segment);
kaf24@4972 94 DECLARE_TRAP_HANDLER(general_protection);
kaf24@4972 95 DECLARE_TRAP_HANDLER(page_fault);
kaf24@4972 96 DECLARE_TRAP_HANDLER(coprocessor_error);
kaf24@4972 97 DECLARE_TRAP_HANDLER(simd_coprocessor_error);
kaf24@4972 98 DECLARE_TRAP_HANDLER(alignment_check);
kaf24@4972 99 DECLARE_TRAP_HANDLER(spurious_interrupt_bug);
kaf24@4972 100 DECLARE_TRAP_HANDLER(machine_check);
kaf24@1452 101
kaf24@5696 102 long do_set_debugreg(int reg, unsigned long value);
kaf24@5696 103 unsigned long do_get_debugreg(int reg);
kaf24@5696 104
kaf24@4923 105 static int debug_stack_lines = 20;
kaf24@4923 106 integer_param("debug_stack_lines", debug_stack_lines);
kaf24@6591 107
kaf24@6591 108 #ifdef CONFIG_X86_32
kaf24@6591 109 #define stack_words_per_line 8
kaf24@6591 110 #define ESP_BEFORE_EXCEPTION(regs) ((unsigned long *)&regs->esp)
kaf24@6591 111 #else
kaf24@6591 112 #define stack_words_per_line 4
kaf24@6776 113 #define ESP_BEFORE_EXCEPTION(regs) ((unsigned long *)regs->rsp)
kaf24@6591 114 #endif
kaf24@1452 115
kaf24@5840 116 int is_kernel_text(unsigned long addr)
kaf24@1452 117 {
kaf24@5146 118 extern char _stext, _etext;
kaf24@1452 119 if (addr >= (unsigned long) &_stext &&
kaf24@1452 120 addr <= (unsigned long) &_etext)
kaf24@1452 121 return 1;
kaf24@1452 122 return 0;
kaf24@1452 123
kaf24@1452 124 }
kaf24@1452 125
kaf24@5840 126 unsigned long kernel_text_end(void)
kaf24@5840 127 {
kaf24@5840 128 extern char _etext;
kaf24@5840 129 return (unsigned long) &_etext;
kaf24@5840 130 }
kaf24@5840 131
kaf24@6591 132 static void show_guest_stack(struct cpu_user_regs *regs)
iap10@2441 133 {
iap10@2441 134 int i;
Ian@8597 135 unsigned long *stack, addr;
iap10@2441 136
kaf24@9192 137 if ( hvm_guest(current) )
kaf24@8759 138 return;
kaf24@8759 139
kaf24@9192 140 if ( vm86_mode(regs) )
Ian@8597 141 {
Ian@8597 142 stack = (unsigned long *)((regs->ss << 4) + (regs->esp & 0xffff));
kaf24@9584 143 printk("Guest stack trace from ss:sp = %04x:%04x (VM86)\n ",
Ian@8597 144 regs->ss, (uint16_t)(regs->esp & 0xffff));
Ian@8597 145 }
Ian@8597 146 else
Ian@8597 147 {
Ian@8597 148 stack = (unsigned long *)regs->esp;
kaf24@9584 149 printk("Guest stack trace from "__OP"sp=%p:\n ", stack);
Ian@8597 150 }
kaf24@4923 151
kaf24@6495 152 for ( i = 0; i < (debug_stack_lines*stack_words_per_line); i++ )
iap10@2441 153 {
kaf24@6591 154 if ( ((long)stack & (STACK_SIZE-BYTES_PER_LONG)) == 0 )
iap10@2441 155 break;
kaf24@4923 156 if ( get_user(addr, stack) )
kaf24@4923 157 {
kaf24@4923 158 if ( i != 0 )
kaf24@4923 159 printk("\n ");
kaf24@4923 160 printk("Fault while accessing guest memory.");
kaf24@4923 161 i = 1;
kaf24@4923 162 break;
kaf24@4923 163 }
kaf24@6495 164 if ( (i != 0) && ((i % stack_words_per_line) == 0) )
kaf24@9584 165 printk("\n ");
kaf24@9584 166 printk(" %p", _p(addr));
kaf24@4923 167 stack++;
iap10@2441 168 }
kaf24@4923 169 if ( i == 0 )
kaf24@4923 170 printk("Stack empty.");
iap10@2441 171 printk("\n");
iap10@2441 172 }
iap10@2441 173
kaf24@6591 174 #ifdef NDEBUG
kaf24@6591 175
kaf24@6591 176 static void show_trace(struct cpu_user_regs *regs)
iap10@2471 177 {
kaf24@6591 178 unsigned long *stack = ESP_BEFORE_EXCEPTION(regs), addr;
iap10@2471 179
kaf24@6591 180 printk("Xen call trace:\n ");
kaf24@4923 181
kaf24@6591 182 printk("[<%p>]", _p(regs->eip));
kaf24@6591 183 print_symbol(" %s\n ", regs->eip);
kaf24@6591 184
kaf24@6591 185 while ( ((long)stack & (STACK_SIZE-BYTES_PER_LONG)) != 0 )
kaf24@4923 186 {
iap10@2471 187 addr = *stack++;
kaf24@5840 188 if ( is_kernel_text(addr) )
kaf24@4923 189 {
kaf24@5840 190 printk("[<%p>]", _p(addr));
kaf24@6247 191 print_symbol(" %s\n ", addr);
iap10@2471 192 }
iap10@2471 193 }
kaf24@6591 194
iap10@2471 195 printk("\n");
iap10@2471 196 }
iap10@2471 197
kaf24@6591 198 #else
kaf24@6591 199
kaf24@6591 200 static void show_trace(struct cpu_user_regs *regs)
kaf24@1452 201 {
kaf24@6591 202 unsigned long *frame, next, addr, low, high;
kaf24@6591 203
kaf24@6591 204 printk("Xen call trace:\n ");
kaf24@6591 205
kaf24@6591 206 printk("[<%p>]", _p(regs->eip));
kaf24@6591 207 print_symbol(" %s\n ", regs->eip);
kaf24@6591 208
kaf24@6591 209 /* Bounds for range of valid frame pointer. */
kaf24@6591 210 low = (unsigned long)(ESP_BEFORE_EXCEPTION(regs) - 2);
kaf24@8457 211 high = (low & ~(STACK_SIZE - 1)) +
kaf24@8457 212 (STACK_SIZE - sizeof(struct cpu_info) - 2*sizeof(unsigned long));
kaf24@6591 213
kaf24@6591 214 /* The initial frame pointer. */
kaf24@6591 215 next = regs->ebp;
kaf24@6591 216
kaf24@6591 217 for ( ; ; )
kaf24@6591 218 {
kaf24@6591 219 /* Valid frame pointer? */
kaf24@8457 220 if ( (next < low) || (next >= high) )
kaf24@6591 221 {
kaf24@6591 222 /*
kaf24@6591 223 * Exception stack frames have a different layout, denoted by an
kaf24@6591 224 * inverted frame pointer.
kaf24@6591 225 */
kaf24@6591 226 next = ~next;
kaf24@8457 227 if ( (next < low) || (next >= high) )
kaf24@6591 228 break;
kaf24@6591 229 frame = (unsigned long *)next;
kaf24@6591 230 next = frame[0];
kaf24@6591 231 addr = frame[(offsetof(struct cpu_user_regs, eip) -
kaf24@6591 232 offsetof(struct cpu_user_regs, ebp))
kaf24@6591 233 / BYTES_PER_LONG];
kaf24@6591 234 }
kaf24@6591 235 else
kaf24@6591 236 {
kaf24@6591 237 /* Ordinary stack frame. */
kaf24@6591 238 frame = (unsigned long *)next;
kaf24@6591 239 next = frame[0];
kaf24@6591 240 addr = frame[1];
kaf24@6591 241 }
kaf24@6591 242
kaf24@6591 243 printk("[<%p>]", _p(addr));
kaf24@6591 244 print_symbol(" %s\n ", addr);
kaf24@6591 245
kaf24@6591 246 low = (unsigned long)&frame[2];
kaf24@6591 247 }
kaf24@6591 248
kaf24@6591 249 printk("\n");
kaf24@6591 250 }
kaf24@6591 251
kaf24@6591 252 #endif
kaf24@6591 253
kaf24@6591 254 void show_stack(struct cpu_user_regs *regs)
kaf24@6591 255 {
kaf24@6591 256 unsigned long *stack = ESP_BEFORE_EXCEPTION(regs), addr;
kaf24@1452 257 int i;
kaf24@1452 258
kaf24@9192 259 if ( guest_mode(regs) )
kaf24@6591 260 return show_guest_stack(regs);
kaf24@6591 261
kaf24@9584 262 printk("Xen stack trace from "__OP"sp=%p:\n ", stack);
kaf24@1452 263
kaf24@6495 264 for ( i = 0; i < (debug_stack_lines*stack_words_per_line); i++ )
kaf24@1452 265 {
kaf24@6591 266 if ( ((long)stack & (STACK_SIZE-BYTES_PER_LONG)) == 0 )
kaf24@1452 267 break;
kaf24@6495 268 if ( (i != 0) && ((i % stack_words_per_line) == 0) )
kaf24@9584 269 printk("\n ");
kaf24@4923 270 addr = *stack++;
kaf24@9584 271 printk(" %p", _p(addr));
kaf24@1452 272 }
kaf24@4923 273 if ( i == 0 )
kaf24@4923 274 printk("Stack empty.");
kaf24@1452 275 printk("\n");
kaf24@1452 276
kaf24@6591 277 show_trace(regs);
kaf24@1452 278 }
kaf24@1452 279
kfraser@10477 280 void show_stack_overflow(unsigned long esp)
kfraser@10477 281 {
kfraser@10477 282 #ifdef MEMORY_GUARD
kfraser@10795 283 unsigned long esp_top;
kfraser@10477 284 unsigned long *stack, addr;
kfraser@10477 285
kfraser@10795 286 esp_top = (esp | (STACK_SIZE - 1)) - DEBUG_STACK_SIZE;
kfraser@10795 287
kfraser@10795 288 /* Trigger overflow trace if %esp is within 512 bytes of the guard page. */
kfraser@10795 289 if ( ((unsigned long)(esp - esp_top) > 512) &&
kfraser@10795 290 ((unsigned long)(esp_top - esp) > 512) )
kfraser@10477 291 return;
kfraser@10477 292
kfraser@10477 293 if ( esp < esp_top )
kfraser@10477 294 esp = esp_top;
kfraser@10477 295
kfraser@10477 296 printk("Xen stack overflow:\n ");
kfraser@10477 297
kfraser@10477 298 stack = (unsigned long *)esp;
kfraser@10477 299 while ( ((long)stack & (STACK_SIZE-BYTES_PER_LONG)) != 0 )
kfraser@10477 300 {
kfraser@10477 301 addr = *stack++;
kfraser@10477 302 if ( is_kernel_text(addr) )
kfraser@10477 303 {
kfraser@10477 304 printk("%p: [<%p>]", stack, _p(addr));
kfraser@10477 305 print_symbol(" %s\n ", addr);
kfraser@10477 306 }
kfraser@10477 307 }
kfraser@10477 308
kfraser@10477 309 printk("\n");
kfraser@10477 310 #endif
kfraser@10477 311 }
kfraser@10477 312
kfraser@10478 313 void show_execution_state(struct cpu_user_regs *regs)
kfraser@10478 314 {
kfraser@10478 315 show_registers(regs);
kfraser@10478 316 show_stack(regs);
kfraser@10478 317 }
kfraser@10478 318
kaf24@3041 319 /*
kaf24@3041 320 * This is called for faults at very unexpected times (e.g., when interrupts
kaf24@3041 321 * are disabled). In such situations we can't do much that is safe. We try to
kaf24@3041 322 * print out some tracing and then we just spin.
kaf24@3041 323 */
kaf24@4683 324 asmlinkage void fatal_trap(int trapnr, struct cpu_user_regs *regs)
kaf24@1452 325 {
kaf24@3041 326 int cpu = smp_processor_id();
kaf24@3089 327 unsigned long cr2;
kaf24@3041 328 static char *trapstr[] = {
kaf24@3041 329 "divide error", "debug", "nmi", "bkpt", "overflow", "bounds",
kaf24@9412 330 "invalid opcode", "device not available", "double fault",
kaf24@3041 331 "coprocessor segment", "invalid tss", "segment not found",
kaf24@3041 332 "stack error", "general protection fault", "page fault",
kaf24@3041 333 "spurious interrupt", "coprocessor error", "alignment check",
kaf24@3041 334 "machine check", "simd error"
kaf24@3041 335 };
kaf24@3041 336
kaf24@4926 337 watchdog_disable();
kaf24@5321 338 console_start_sync();
kaf24@3849 339
kfraser@10478 340 show_execution_state(regs);
kaf24@3089 341
kaf24@3089 342 if ( trapnr == TRAP_page_fault )
kaf24@3089 343 {
kaf24@3602 344 __asm__ __volatile__ ("mov %%cr2,%0" : "=r" (cr2) : );
kaf24@4817 345 printk("Faulting linear address: %p\n", _p(cr2));
kaf24@4817 346 show_page_walk(cr2);
kaf24@3089 347 }
kaf24@3089 348
kaf24@3041 349 printk("************************************\n");
kaf24@3089 350 printk("CPU%d FATAL TRAP %d (%s), ERROR_CODE %04x%s.\n",
kaf24@3089 351 cpu, trapnr, trapstr[trapnr], regs->error_code,
kaf24@3041 352 (regs->eflags & X86_EFLAGS_IF) ? "" : ", IN INTERRUPT CONTEXT");
kaf24@3041 353 printk("System shutting down -- need manual reset.\n");
kaf24@3041 354 printk("************************************\n");
kaf24@3041 355
sos22@3792 356 (void)debugger_trap_fatal(trapnr, regs);
sos22@3771 357
kaf24@3041 358 /* Lock up the console to prevent spurious output from other CPUs. */
kaf24@3041 359 console_force_lock();
kaf24@3041 360
kaf24@3041 361 /* Wait for manual reset. */
kaf24@9617 362 machine_halt();
kaf24@1452 363 }
kaf24@1452 364
kaf24@3052 365 static inline int do_trap(int trapnr, char *str,
kaf24@4683 366 struct cpu_user_regs *regs,
kaf24@3089 367 int use_error_code)
kaf24@1452 368 {
kaf24@5289 369 struct vcpu *v = current;
kaf24@5289 370 struct trap_bounce *tb = &v->arch.trap_bounce;
kaf24@8679 371 struct trap_info *ti;
kaf24@1452 372 unsigned long fixup;
kaf24@1452 373
kaf24@3089 374 DEBUGGER_trap_entry(trapnr, regs);
kaf24@2971 375
kaf24@9192 376 if ( !guest_mode(regs) )
kaf24@1590 377 goto xen_fault;
kaf24@1452 378
kaf24@4689 379 ti = &current->arch.guest_context.trap_ctxt[trapnr];
kaf24@3089 380 tb->flags = TBF_EXCEPTION;
kaf24@3089 381 tb->cs = ti->cs;
kaf24@3089 382 tb->eip = ti->address;
kaf24@3089 383 if ( use_error_code )
kaf24@3089 384 {
kaf24@3089 385 tb->flags |= TBF_EXCEPTION_ERRCODE;
kaf24@3089 386 tb->error_code = regs->error_code;
kaf24@3089 387 }
kaf24@1452 388 if ( TI_GET_IF(ti) )
kaf24@4949 389 tb->flags |= TBF_INTERRUPT;
kaf24@3052 390 return 0;
kaf24@1452 391
kaf24@1590 392 xen_fault:
kaf24@1452 393
kaf24@1452 394 if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
kaf24@1452 395 {
kaf24@4654 396 DPRINTK("Trap %d: %p -> %p\n", trapnr, _p(regs->eip), _p(fixup));
kaf24@1452 397 regs->eip = fixup;
kaf24@3052 398 return 0;
kaf24@1452 399 }
kaf24@1452 400
kaf24@3089 401 DEBUGGER_trap_fatal(trapnr, regs);
cl349@2957 402
kfraser@10478 403 show_execution_state(regs);
kaf24@1452 404 panic("CPU%d FATAL TRAP: vector = %d (%s)\n"
kaf24@3089 405 "[error_code=%04x]\n",
kaf24@3089 406 smp_processor_id(), trapnr, str, regs->error_code);
kaf24@3052 407 return 0;
kaf24@1452 408 }
kaf24@1452 409
kfraser@10735 410 #define DO_ERROR_NOCODE(trapnr, str, name) \
kfraser@10735 411 asmlinkage int do_##name(struct cpu_user_regs *regs) \
kfraser@10735 412 { \
kfraser@10735 413 return do_trap(trapnr, str, regs, 0); \
kaf24@1452 414 }
kaf24@1452 415
kfraser@10735 416 #define DO_ERROR(trapnr, str, name) \
kfraser@10735 417 asmlinkage int do_##name(struct cpu_user_regs *regs) \
kfraser@10735 418 { \
kfraser@10735 419 return do_trap(trapnr, str, regs, 1); \
kaf24@1452 420 }
kaf24@1452 421
kaf24@1452 422 DO_ERROR_NOCODE( 0, "divide error", divide_error)
kaf24@2076 423 DO_ERROR_NOCODE( 4, "overflow", overflow)
kaf24@2076 424 DO_ERROR_NOCODE( 5, "bounds", bounds)
kaf24@2076 425 DO_ERROR_NOCODE( 9, "coprocessor segment overrun", coprocessor_segment_overrun)
kaf24@2076 426 DO_ERROR(10, "invalid TSS", invalid_TSS)
kaf24@2076 427 DO_ERROR(11, "segment not present", segment_not_present)
kaf24@2076 428 DO_ERROR(12, "stack segment", stack_segment)
kaf24@2076 429 DO_ERROR_NOCODE(16, "fpu error", coprocessor_error)
kaf24@2076 430 DO_ERROR(17, "alignment check", alignment_check)
kaf24@2076 431 DO_ERROR_NOCODE(19, "simd error", simd_coprocessor_error)
kaf24@1452 432
kfraser@10892 433 int rdmsr_hypervisor_regs(
kfraser@10892 434 uint32_t idx, uint32_t *eax, uint32_t *edx)
kfraser@10892 435 {
kfraser@10892 436 idx -= 0x40000000;
kfraser@10892 437 if ( idx > 0 )
kfraser@10892 438 return 0;
kfraser@10892 439
kfraser@10892 440 *eax = *edx = 0;
kfraser@10892 441 return 1;
kfraser@10892 442 }
kfraser@10892 443
kfraser@10892 444 int wrmsr_hypervisor_regs(
kfraser@10892 445 uint32_t idx, uint32_t eax, uint32_t edx)
kfraser@10892 446 {
kfraser@10892 447 struct domain *d = current->domain;
kfraser@10892 448
kfraser@10892 449 idx -= 0x40000000;
kfraser@10892 450 if ( idx > 0 )
kfraser@10892 451 return 0;
kfraser@10892 452
kfraser@10892 453 switch ( idx )
kfraser@10892 454 {
kfraser@10892 455 case 0:
kfraser@10892 456 {
kfraser@10892 457 void *hypercall_page;
kfraser@10892 458 unsigned long mfn;
kfraser@10892 459 unsigned long gmfn = ((unsigned long)edx << 20) | (eax >> 12);
kfraser@10892 460 unsigned int idx = eax & 0xfff;
kfraser@10892 461
kfraser@10892 462 if ( idx > 0 )
kfraser@10892 463 {
kfraser@10892 464 DPRINTK("Dom%d: Out of range index %u to MSR %08x\n",
kfraser@10892 465 d->domain_id, idx, 0x40000000);
kfraser@10892 466 return 0;
kfraser@10892 467 }
kfraser@10892 468
kfraser@10892 469 mfn = gmfn_to_mfn(d, gmfn);
kfraser@10892 470
kfraser@10892 471 if ( !mfn_valid(mfn) ||
kfraser@10892 472 !get_page_and_type(mfn_to_page(mfn), d, PGT_writable_page) )
kfraser@10892 473 {
kfraser@10892 474 DPRINTK("Dom%d: Bad GMFN %lx (MFN %lx) to MSR %08x\n",
kfraser@10892 475 d->domain_id, gmfn, mfn, 0x40000000);
kfraser@10892 476 return 0;
kfraser@10892 477 }
kfraser@10892 478
kfraser@10892 479 hypercall_page = map_domain_page(mfn);
kfraser@10892 480 hypercall_page_initialise(d, hypercall_page);
kfraser@10892 481 unmap_domain_page(hypercall_page);
kfraser@10892 482
kfraser@10892 483 put_page_and_type(mfn_to_page(mfn));
kfraser@10892 484 break;
kfraser@10892 485 }
kfraser@10892 486
kfraser@10892 487 default:
kfraser@10892 488 BUG();
kfraser@10892 489 }
kfraser@10892 490
kfraser@10892 491 return 1;
kfraser@10892 492 }
kfraser@10892 493
kfraser@10661 494 int cpuid_hypervisor_leaves(
kfraser@10661 495 uint32_t idx, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
kfraser@10661 496 {
kfraser@10892 497 idx -= 0x40000000;
kfraser@10892 498 if ( idx > 2 )
kfraser@10661 499 return 0;
kfraser@10661 500
kfraser@10892 501 switch ( idx )
kfraser@10661 502 {
kfraser@10661 503 case 0:
kfraser@10892 504 *eax = 0x40000002; /* Largest leaf */
kfraser@10892 505 *ebx = 0x566e6558; /* Signature 1: "XenV" */
kfraser@10892 506 *ecx = 0x65584d4d; /* Signature 2: "MMXe" */
kfraser@10892 507 *edx = 0x4d4d566e; /* Signature 3: "nVMM" */
kfraser@10892 508 break;
kfraser@10892 509
kfraser@10892 510 case 1:
kfraser@10892 511 *eax = (xen_major_version() << 16) | xen_minor_version();
kfraser@10892 512 *ebx = 0; /* Reserved */
kfraser@10892 513 *ecx = 0; /* Reserved */
kfraser@10892 514 *edx = 0; /* Reserved */
kfraser@10892 515 break;
kfraser@10892 516
kfraser@10892 517 case 2:
kfraser@10892 518 *eax = 1; /* Number of hypercall-transfer pages */
kfraser@10892 519 *ebx = 0x40000000; /* MSR base address */
kfraser@10892 520 *ecx = 0; /* Features 1 */
kfraser@10892 521 *edx = 0; /* Features 2 */
kfraser@10661 522 break;
kfraser@10661 523
kfraser@10661 524 default:
kfraser@10661 525 BUG();
kfraser@10661 526 }
kfraser@10661 527
kfraser@10661 528 return 1;
kfraser@10661 529 }
kfraser@10661 530
kaf24@9412 531 static int emulate_forced_invalid_op(struct cpu_user_regs *regs)
kaf24@9412 532 {
kfraser@10735 533 char sig[5], instr[2];
kfraser@10661 534 uint32_t a, b, c, d;
kfraser@10735 535 unsigned long eip, rc;
kaf24@9412 536
kaf24@9412 537 a = regs->eax;
kaf24@9412 538 b = regs->ebx;
kaf24@9412 539 c = regs->ecx;
kaf24@9412 540 d = regs->edx;
kaf24@9412 541 eip = regs->eip;
kaf24@9412 542
kaf24@9412 543 /* Check for forced emulation signature: ud2 ; .ascii "xen". */
kfraser@10735 544 if ( (rc = copy_from_user(sig, (char *)eip, sizeof(sig))) != 0 )
kfraser@10735 545 {
kfraser@10735 546 propagate_page_fault(eip + sizeof(sig) - rc, 0);
kfraser@10735 547 return EXCRET_fault_fixed;
kfraser@10735 548 }
kfraser@10735 549 if ( memcmp(sig, "\xf\xbxen", sizeof(sig)) )
kaf24@9412 550 return 0;
kfraser@10735 551 eip += sizeof(sig);
kaf24@9412 552
kaf24@9412 553 /* We only emulate CPUID. */
kfraser@10735 554 if ( ( rc = copy_from_user(instr, (char *)eip, sizeof(instr))) != 0 )
kfraser@10735 555 {
kfraser@10735 556 propagate_page_fault(eip + sizeof(instr) - rc, 0);
kfraser@10735 557 return EXCRET_fault_fixed;
kfraser@10735 558 }
kfraser@10735 559 if ( memcmp(instr, "\xf\xa2", sizeof(instr)) )
kaf24@9412 560 return 0;
kaf24@9412 561 eip += sizeof(instr);
kaf24@9412 562
kaf24@9412 563 __asm__ (
kaf24@9412 564 "cpuid"
kaf24@9412 565 : "=a" (a), "=b" (b), "=c" (c), "=d" (d)
kaf24@9412 566 : "0" (a), "1" (b), "2" (c), "3" (d) );
kaf24@9412 567
kaf24@9412 568 if ( regs->eax == 1 )
kaf24@9412 569 {
kaf24@9412 570 /* Modify Feature Information. */
kaf24@9412 571 clear_bit(X86_FEATURE_VME, &d);
kaf24@9412 572 clear_bit(X86_FEATURE_DE, &d);
kaf24@9412 573 clear_bit(X86_FEATURE_PSE, &d);
kaf24@9412 574 clear_bit(X86_FEATURE_PGE, &d);
Ian@9816 575 if ( !supervisor_mode_kernel )
Ian@9816 576 clear_bit(X86_FEATURE_SEP, &d);
kaf24@9412 577 if ( !IS_PRIV(current->domain) )
kaf24@9412 578 clear_bit(X86_FEATURE_MTRR, &d);
kaf24@9412 579 }
kfraser@10661 580 else
kfraser@10661 581 {
kfraser@10661 582 (void)cpuid_hypervisor_leaves(regs->eax, &a, &b, &c, &d);
kfraser@10661 583 }
kaf24@9412 584
kaf24@9412 585 regs->eax = a;
kaf24@9412 586 regs->ebx = b;
kaf24@9412 587 regs->ecx = c;
kaf24@9412 588 regs->edx = d;
kaf24@9412 589 regs->eip = eip;
kaf24@9412 590
kaf24@9412 591 return EXCRET_fault_fixed;
kaf24@9412 592 }
kaf24@9412 593
kaf24@9412 594 asmlinkage int do_invalid_op(struct cpu_user_regs *regs)
kaf24@9412 595 {
kaf24@9412 596 struct vcpu *v = current;
kaf24@9412 597 struct trap_bounce *tb = &v->arch.trap_bounce;
kaf24@9412 598 struct trap_info *ti;
kaf24@9412 599 int rc;
kaf24@9412 600
kaf24@9412 601 DEBUGGER_trap_entry(TRAP_invalid_op, regs);
kaf24@9412 602
kaf24@9412 603 if ( unlikely(!guest_mode(regs)) )
kaf24@9412 604 {
kfraser@10479 605 char sig[5];
kfraser@10479 606 /* Signature (ud2; .ascii "dbg") indicates dump state and continue. */
kfraser@10479 607 if ( (__copy_from_user(sig, (char *)regs->eip, sizeof(sig)) == 0) &&
kfraser@10479 608 (memcmp(sig, "\xf\xb""dbg", sizeof(sig)) == 0) )
kfraser@10479 609 {
kfraser@10479 610 show_execution_state(regs);
kfraser@10479 611 regs->eip += sizeof(sig);
kfraser@10479 612 return EXCRET_fault_fixed;
kfraser@10479 613 }
kfraser@10479 614 printk("%02x %02x %02x %02x %02x\n",
kfraser@10479 615 (unsigned char)sig[0],
kfraser@10479 616 (unsigned char)sig[1],
kfraser@10479 617 (unsigned char)sig[2],
kfraser@10479 618 (unsigned char)sig[3],
kfraser@10479 619 (unsigned char)sig[4]);
kaf24@9415 620 DEBUGGER_trap_fatal(TRAP_invalid_op, regs);
kfraser@10478 621 show_execution_state(regs);
kaf24@9412 622 panic("CPU%d FATAL TRAP: vector = %d (invalid opcode)\n",
kaf24@9412 623 smp_processor_id(), TRAP_invalid_op);
kaf24@9412 624 }
kaf24@9412 625
kaf24@9412 626 if ( (rc = emulate_forced_invalid_op(regs)) != 0 )
kaf24@9412 627 return rc;
kaf24@9412 628
kaf24@9412 629 ti = &current->arch.guest_context.trap_ctxt[TRAP_invalid_op];
kaf24@9412 630 tb->flags = TBF_EXCEPTION;
kaf24@9412 631 tb->cs = ti->cs;
kaf24@9412 632 tb->eip = ti->address;
kaf24@9412 633 if ( TI_GET_IF(ti) )
kaf24@9412 634 tb->flags |= TBF_INTERRUPT;
kaf24@9412 635
kaf24@9412 636 return 0;
kaf24@9412 637 }
kaf24@9412 638
kaf24@4683 639 asmlinkage int do_int3(struct cpu_user_regs *regs)
kaf24@1452 640 {
kaf24@5289 641 struct vcpu *v = current;
kaf24@5289 642 struct trap_bounce *tb = &v->arch.trap_bounce;
kaf24@8679 643 struct trap_info *ti;
kaf24@1452 644
kaf24@3089 645 DEBUGGER_trap_entry(TRAP_int3, regs);
kaf24@1452 646
kaf24@9192 647 if ( !guest_mode(regs) )
kaf24@1452 648 {
kaf24@3089 649 DEBUGGER_trap_fatal(TRAP_int3, regs);
kfraser@10478 650 show_execution_state(regs);
kaf24@3089 651 panic("CPU%d FATAL TRAP: vector = 3 (Int3)\n", smp_processor_id());
kmacy@4118 652 }
kaf24@1452 653
kaf24@4689 654 ti = &current->arch.guest_context.trap_ctxt[TRAP_int3];
kaf24@3089 655 tb->flags = TBF_EXCEPTION;
kaf24@3089 656 tb->cs = ti->cs;
kaf24@3089 657 tb->eip = ti->address;
kaf24@1452 658 if ( TI_GET_IF(ti) )
kaf24@4949 659 tb->flags |= TBF_INTERRUPT;
kaf24@3052 660
kaf24@3052 661 return 0;
kaf24@1452 662 }
kaf24@1452 663
kaf24@4972 664 asmlinkage int do_machine_check(struct cpu_user_regs *regs)
kaf24@3041 665 {
kaf24@3089 666 fatal_trap(TRAP_machine_check, regs);
kaf24@4972 667 return 0;
kaf24@1452 668 }
kaf24@1452 669
kaf24@3424 670 void propagate_page_fault(unsigned long addr, u16 error_code)
kaf24@3424 671 {
kaf24@8679 672 struct trap_info *ti;
kaf24@5289 673 struct vcpu *v = current;
kaf24@5289 674 struct trap_bounce *tb = &v->arch.trap_bounce;
kaf24@3424 675
kaf24@7744 676 v->arch.guest_context.ctrlreg[2] = addr;
kaf24@7744 677 v->vcpu_info->arch.cr2 = addr;
kaf24@7744 678
kaf24@9185 679 /* Re-set error_code.user flag appropriately for the guest. */
kfraser@10456 680 error_code &= ~PGERR_user_mode;
kaf24@9192 681 if ( !guest_kernel_mode(v, guest_cpu_user_regs()) )
kfraser@10456 682 error_code |= PGERR_user_mode;
kaf24@9185 683
kaf24@5289 684 ti = &v->arch.guest_context.trap_ctxt[TRAP_page_fault];
kaf24@7744 685 tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE;
kaf24@3424 686 tb->error_code = error_code;
kaf24@3424 687 tb->cs = ti->cs;
kaf24@3424 688 tb->eip = ti->address;
kaf24@3424 689 if ( TI_GET_IF(ti) )
kaf24@4949 690 tb->flags |= TBF_INTERRUPT;
kaf24@3424 691 }
kaf24@3424 692
kaf24@8533 693 static int handle_gdt_ldt_mapping_fault(
kaf24@4972 694 unsigned long offset, struct cpu_user_regs *regs)
kaf24@1452 695 {
kaf24@1452 696 extern int map_ldt_shadow_page(unsigned int);
kaf24@1452 697
kaf24@5289 698 struct vcpu *v = current;
kaf24@5289 699 struct domain *d = v->domain;
kaf24@4972 700 int ret;
kaf24@4972 701
kaf24@4972 702 /* Which vcpu's area did we fault in, and is it in the ldt sub-area? */
kaf24@8533 703 unsigned int is_ldt_area = (offset >> (GDT_LDT_VCPU_VA_SHIFT-1)) & 1;
kaf24@8533 704 unsigned int vcpu_area = (offset >> GDT_LDT_VCPU_VA_SHIFT);
kaf24@4972 705
kaf24@4972 706 /* Should never fault in another vcpu's area. */
kaf24@4972 707 BUG_ON(vcpu_area != current->vcpu_id);
kaf24@4972 708
kaf24@4972 709 /* Byte offset within the gdt/ldt sub-area. */
kaf24@8533 710 offset &= (1UL << (GDT_LDT_VCPU_VA_SHIFT-1)) - 1UL;
kaf24@4972 711
kaf24@4972 712 if ( likely(is_ldt_area) )
kaf24@4972 713 {
kaf24@4972 714 /* LDT fault: Copy a mapping from the guest's LDT, if it is valid. */
kaf24@4972 715 LOCK_BIGLOCK(d);
kaf24@10472 716 cleanup_writable_pagetable(d);
kaf24@4972 717 ret = map_ldt_shadow_page(offset >> PAGE_SHIFT);
kaf24@4972 718 UNLOCK_BIGLOCK(d);
kaf24@4972 719
kaf24@4972 720 if ( unlikely(ret == 0) )
kaf24@4972 721 {
kaf24@4972 722 /* In hypervisor mode? Leave it to the #PF handler to fix up. */
kaf24@9192 723 if ( !guest_mode(regs) )
kaf24@4972 724 return 0;
kaf24@4972 725 /* In guest mode? Propagate #PF to guest, with adjusted %cr2. */
kaf24@4972 726 propagate_page_fault(
kaf24@5289 727 v->arch.guest_context.ldt_base + offset, regs->error_code);
kaf24@4972 728 }
kaf24@4972 729 }
kaf24@4972 730 else
kaf24@4972 731 {
kaf24@4972 732 /* GDT fault: handle the fault as #GP(selector). */
kaf24@4972 733 regs->error_code = (u16)offset & ~7;
kaf24@4972 734 (void)do_general_protection(regs);
kaf24@4972 735 }
kaf24@4972 736
kaf24@4972 737 return EXCRET_fault_fixed;
kaf24@4972 738 }
kaf24@4972 739
kaf24@6720 740 #ifdef HYPERVISOR_VIRT_END
kaf24@6720 741 #define IN_HYPERVISOR_RANGE(va) \
kaf24@6720 742 (((va) >= HYPERVISOR_VIRT_START) && ((va) < HYPERVISOR_VIRT_END))
kaf24@6720 743 #else
kaf24@6720 744 #define IN_HYPERVISOR_RANGE(va) \
kaf24@6720 745 (((va) >= HYPERVISOR_VIRT_START))
kaf24@6720 746 #endif
kaf24@6720 747
kfraser@10456 748 static int __spurious_page_fault(
kfraser@10456 749 unsigned long addr, struct cpu_user_regs *regs)
kfraser@10456 750 {
kfraser@10480 751 unsigned long mfn, cr3 = read_cr3();
kfraser@10456 752 #if CONFIG_PAGING_LEVELS >= 4
kfraser@10456 753 l4_pgentry_t l4e, *l4t;
kfraser@10456 754 #endif
kfraser@10456 755 #if CONFIG_PAGING_LEVELS >= 3
kfraser@10456 756 l3_pgentry_t l3e, *l3t;
kfraser@10456 757 #endif
kfraser@10456 758 l2_pgentry_t l2e, *l2t;
kfraser@10456 759 l1_pgentry_t l1e, *l1t;
kfraser@10456 760 unsigned int required_flags, disallowed_flags;
kfraser@10456 761
kaf24@10471 762 /* Reserved bit violations are never spurious faults. */
kaf24@10471 763 if ( regs->error_code & PGERR_reserved_bit )
kaf24@10471 764 return 0;
kaf24@10471 765
kfraser@10456 766 required_flags = _PAGE_PRESENT;
kfraser@10456 767 if ( regs->error_code & PGERR_write_access )
kfraser@10456 768 required_flags |= _PAGE_RW;
kfraser@10456 769 if ( regs->error_code & PGERR_user_mode )
kfraser@10456 770 required_flags |= _PAGE_USER;
kfraser@10456 771
kfraser@10456 772 disallowed_flags = 0;
kfraser@10456 773 if ( regs->error_code & PGERR_instr_fetch )
kfraser@10456 774 disallowed_flags |= _PAGE_NX;
kfraser@10456 775
kfraser@10480 776 mfn = cr3 >> PAGE_SHIFT;
kfraser@10480 777
kfraser@10456 778 #if CONFIG_PAGING_LEVELS >= 4
kfraser@10456 779 l4t = map_domain_page(mfn);
kfraser@10456 780 l4e = l4t[l4_table_offset(addr)];
kfraser@10456 781 mfn = l4e_get_pfn(l4e);
kfraser@10456 782 unmap_domain_page(l4t);
kfraser@10456 783 if ( !(l4e_get_flags(l4e) & required_flags) ||
kfraser@10456 784 (l4e_get_flags(l4e) & disallowed_flags) )
kfraser@10456 785 return 0;
kfraser@10456 786 #endif
kfraser@10456 787
kfraser@10456 788 #if CONFIG_PAGING_LEVELS >= 3
kfraser@10480 789 l3t = map_domain_page(mfn);
kfraser@10480 790 #ifdef CONFIG_X86_PAE
kfraser@10480 791 l3t += (cr3 & 0xFE0UL) >> 3;
kfraser@10480 792 #endif
kfraser@10456 793 l3e = l3t[l3_table_offset(addr)];
kfraser@10456 794 mfn = l3e_get_pfn(l3e);
kfraser@10456 795 unmap_domain_page(l3t);
kfraser@10456 796 #ifdef CONFIG_X86_PAE
kfraser@10456 797 if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
kfraser@10456 798 return 0;
kfraser@10456 799 #else
kfraser@10456 800 if ( !(l3e_get_flags(l3e) & required_flags) ||
kfraser@10456 801 (l3e_get_flags(l3e) & disallowed_flags) )
kfraser@10456 802 return 0;
kfraser@10456 803 #endif
kfraser@10456 804 #endif
kfraser@10456 805
kfraser@10456 806 l2t = map_domain_page(mfn);
kfraser@10456 807 l2e = l2t[l2_table_offset(addr)];
kfraser@10456 808 mfn = l2e_get_pfn(l2e);
kfraser@10456 809 unmap_domain_page(l2t);
kfraser@10456 810 if ( !(l2e_get_flags(l2e) & required_flags) ||
kfraser@10456 811 (l2e_get_flags(l2e) & disallowed_flags) )
kfraser@10456 812 return 0;
kfraser@10456 813 if ( l2e_get_flags(l2e) & _PAGE_PSE )
kfraser@10879 814 {
kfraser@10879 815 l1e = l1e_empty(); /* define before use in debug tracing */
kfraser@10879 816 goto spurious;
kfraser@10879 817 }
kfraser@10456 818
kfraser@10456 819 l1t = map_domain_page(mfn);
kfraser@10456 820 l1e = l1t[l1_table_offset(addr)];
kfraser@10456 821 mfn = l1e_get_pfn(l1e);
kfraser@10456 822 unmap_domain_page(l1t);
kfraser@10456 823 if ( !(l1e_get_flags(l1e) & required_flags) ||
kfraser@10456 824 (l1e_get_flags(l1e) & disallowed_flags) )
kfraser@10456 825 return 0;
kfraser@10879 826
kfraser@10879 827 spurious:
kfraser@10879 828 DPRINTK("Spurious fault in domain %u:%u at addr %lx, e/c %04x\n",
kfraser@10879 829 current->domain->domain_id, current->vcpu_id,
kfraser@10879 830 addr, regs->error_code);
kfraser@10879 831 #if CONFIG_PAGING_LEVELS >= 4
kfraser@10879 832 DPRINTK(" l4e = %"PRIpte"\n", l4e_get_intpte(l4e));
kfraser@10879 833 #endif
kfraser@10879 834 #if CONFIG_PAGING_LEVELS >= 3
kfraser@10879 835 DPRINTK(" l3e = %"PRIpte"\n", l3e_get_intpte(l3e));
kfraser@10879 836 #endif
kfraser@10879 837 DPRINTK(" l2e = %"PRIpte"\n", l2e_get_intpte(l2e));
kfraser@10879 838 DPRINTK(" l1e = %"PRIpte"\n", l1e_get_intpte(l1e));
kfraser@10879 839 #ifndef NDEBUG
kfraser@10879 840 show_registers(regs);
kfraser@10879 841 #endif
kfraser@10456 842 return 1;
kfraser@10456 843 }
kfraser@10456 844
kfraser@10456 845 static int spurious_page_fault(
kfraser@10456 846 unsigned long addr, struct cpu_user_regs *regs)
kfraser@10456 847 {
kaf24@10472 848 struct domain *d = current->domain;
kfraser@10456 849 int is_spurious;
kfraser@10456 850
kfraser@10456 851 LOCK_BIGLOCK(d);
kaf24@10472 852 cleanup_writable_pagetable(d);
kfraser@10456 853 is_spurious = __spurious_page_fault(addr, regs);
kaf24@10472 854 UNLOCK_BIGLOCK(d);
kfraser@10456 855
kfraser@10456 856 return is_spurious;
kfraser@10456 857 }
kfraser@10456 858
kaf24@6720 859 static int fixup_page_fault(unsigned long addr, struct cpu_user_regs *regs)
kaf24@1452 860 {
kaf24@6720 861 struct vcpu *v = current;
kaf24@5289 862 struct domain *d = v->domain;
kaf24@1452 863
kaf24@6720 864 if ( unlikely(IN_HYPERVISOR_RANGE(addr)) )
kaf24@6720 865 {
kaf24@9192 866 if ( shadow_mode_external(d) && guest_mode(regs) )
kaf24@6720 867 return shadow_fault(addr, regs);
kaf24@8533 868 if ( (addr >= GDT_LDT_VIRT_START) && (addr < GDT_LDT_VIRT_END) )
kaf24@8533 869 return handle_gdt_ldt_mapping_fault(
kaf24@8533 870 addr - GDT_LDT_VIRT_START, regs);
kfraser@10456 871 /*
kfraser@10456 872 * Do not propagate spurious faults in the hypervisor area to the
kfraser@10456 873 * guest. It cannot fix them up.
kfraser@10456 874 */
kaf24@10472 875 return (spurious_page_fault(addr, regs) ? EXCRET_not_a_fault : 0);
kaf24@6720 876 }
kfraser@10456 877
kfraser@10456 878 if ( unlikely(shadow_mode_enabled(d)) )
kaf24@6720 879 return shadow_fault(addr, regs);
kfraser@10456 880
kfraser@10456 881 if ( likely(VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) )
kaf24@2073 882 {
cl349@3148 883 LOCK_BIGLOCK(d);
kaf24@4455 884 if ( unlikely(d->arch.ptwr[PTWR_PT_ACTIVE].l1va) &&
kaf24@6054 885 unlikely(l2_linear_offset(addr) ==
kaf24@4455 886 d->arch.ptwr[PTWR_PT_ACTIVE].l2_idx) )
kaf24@2337 887 {
kaf24@4455 888 ptwr_flush(d, PTWR_PT_ACTIVE);
cl349@2998 889 UNLOCK_BIGLOCK(d);
kaf24@3052 890 return EXCRET_fault_fixed;
kaf24@2337 891 }
kaf24@2337 892
kaf24@10472 893 /*
kaf24@10472 894 * Note it is *not* safe to check PGERR_page_present here. It can be
kaf24@10472 895 * clear, due to unhooked page table, when we would otherwise expect
kaf24@10472 896 * it to be set. We have an aversion to trusting that flag in Xen, and
kaf24@10472 897 * guests ought to be leery too.
kaf24@10472 898 */
kaf24@9192 899 if ( guest_kernel_mode(v, regs) &&
kaf24@10472 900 (regs->error_code & PGERR_write_access) &&
iap10@6150 901 ptwr_do_page_fault(d, addr, regs) )
kaf24@3052 902 {
cl349@3148 903 UNLOCK_BIGLOCK(d);
kaf24@3052 904 return EXCRET_fault_fixed;
kaf24@3052 905 }
cl349@3148 906 UNLOCK_BIGLOCK(d);
cl349@1822 907 }
cl349@1822 908
kaf24@6720 909 return 0;
kaf24@6720 910 }
kaf24@1452 911
kaf24@6720 912 /*
kaf24@6720 913 * #PF error code:
kaf24@6720 914 * Bit 0: Protection violation (=1) ; Page not present (=0)
kaf24@6720 915 * Bit 1: Write access
kaf24@9185 916 * Bit 2: User mode (=1) ; Supervisor mode (=0)
kaf24@6720 917 * Bit 3: Reserved bit violation
kaf24@6720 918 * Bit 4: Instruction fetch
kaf24@6720 919 */
kaf24@6720 920 asmlinkage int do_page_fault(struct cpu_user_regs *regs)
kaf24@6720 921 {
kaf24@6720 922 unsigned long addr, fixup;
kaf24@6720 923 int rc;
iap10@2573 924
kaf24@9954 925 ASSERT(!in_irq());
kaf24@9954 926
kaf24@6720 927 __asm__ __volatile__ ("mov %%cr2,%0" : "=r" (addr) : );
kaf24@6720 928
kaf24@6720 929 DEBUGGER_trap_entry(TRAP_page_fault, regs);
kaf24@6720 930
kaf24@6720 931 perfc_incrc(page_faults);
kaf24@6720 932
kaf24@6720 933 if ( unlikely((rc = fixup_page_fault(addr, regs)) != 0) )
kaf24@6720 934 return rc;
kaf24@6720 935
kaf24@9192 936 if ( unlikely(!guest_mode(regs)) )
kaf24@6720 937 {
kaf24@9448 938 if ( spurious_page_fault(addr, regs) )
kaf24@9448 939 return EXCRET_not_a_fault;
kaf24@9448 940
kaf24@6720 941 if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
kaf24@6720 942 {
kaf24@6720 943 perfc_incrc(copy_user_faults);
kaf24@6720 944 regs->eip = fixup;
kaf24@6720 945 return 0;
kaf24@6720 946 }
kaf24@6720 947
kaf24@6720 948 DEBUGGER_trap_fatal(TRAP_page_fault, regs);
kaf24@6720 949
kfraser@10478 950 show_execution_state(regs);
kaf24@6720 951 show_page_walk(addr);
kaf24@6720 952 panic("CPU%d FATAL PAGE FAULT\n"
kaf24@6720 953 "[error_code=%04x]\n"
kaf24@6720 954 "Faulting linear address: %p\n",
kaf24@9454 955 smp_processor_id(), regs->error_code, _p(addr));
kaf24@6720 956 }
kaf24@1452 957
kaf24@3424 958 propagate_page_fault(addr, regs->error_code);
kaf24@6054 959 return 0;
kaf24@1452 960 }
kaf24@1452 961
kaf24@3917 962 long do_fpu_taskswitch(int set)
kaf24@1452 963 {
kaf24@5289 964 struct vcpu *v = current;
kaf24@3917 965
kaf24@3917 966 if ( set )
kaf24@3917 967 {
kaf24@5576 968 v->arch.guest_context.ctrlreg[0] |= X86_CR0_TS;
kaf24@3917 969 stts();
kaf24@3917 970 }
kaf24@3917 971 else
kaf24@3917 972 {
kaf24@5576 973 v->arch.guest_context.ctrlreg[0] &= ~X86_CR0_TS;
kaf24@5289 974 if ( test_bit(_VCPUF_fpu_dirtied, &v->vcpu_flags) )
kaf24@3917 975 clts();
kaf24@3917 976 }
kaf24@3917 977
kaf24@3917 978 return 0;
kaf24@3917 979 }
kaf24@3917 980
kaf24@4275 981 /* Has the guest requested sufficient permission for this I/O access? */
kaf24@4275 982 static inline int guest_io_okay(
kaf24@4103 983 unsigned int port, unsigned int bytes,
kaf24@5289 984 struct vcpu *v, struct cpu_user_regs *regs)
kaf24@4103 985 {
kaf24@4275 986 u16 x;
kaf24@4276 987 #if defined(__x86_64__)
kaf24@4276 988 /* If in user mode, switch to kernel mode just to read I/O bitmap. */
kaf24@5289 989 int user_mode = !(v->arch.flags & TF_kernel_mode);
kaf24@5289 990 #define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v)
kaf24@4276 991 #elif defined(__i386__)
kaf24@4276 992 #define TOGGLE_MODE() ((void)0)
kaf24@4276 993 #endif
kaf24@4276 994
kaf24@10347 995 if ( !vm86_mode(regs) &&
kaf24@10347 996 (v->arch.iopl >= (guest_kernel_mode(v, regs) ? 1 : 3)) )
kaf24@4275 997 return 1;
kaf24@4276 998
kaf24@5289 999 if ( v->arch.iobmp_limit > (port + bytes) )
kaf24@4276 1000 {
kaf24@4276 1001 TOGGLE_MODE();
kaf24@5289 1002 __get_user(x, (u16 *)(v->arch.iobmp+(port>>3)));
kaf24@4276 1003 TOGGLE_MODE();
kaf24@4276 1004 if ( (x & (((1<<bytes)-1) << (port&7))) == 0 )
kaf24@4276 1005 return 1;
kaf24@4276 1006 }
kaf24@4276 1007
kaf24@4275 1008 return 0;
kaf24@4103 1009 }
kaf24@4103 1010
kaf24@4275 1011 /* Has the administrator granted sufficient permission for this I/O access? */
kaf24@4275 1012 static inline int admin_io_okay(
kaf24@4275 1013 unsigned int port, unsigned int bytes,
kaf24@5289 1014 struct vcpu *v, struct cpu_user_regs *regs)
kaf24@4275 1015 {
kaf24@8468 1016 return ioports_access_permitted(v->domain, port, port + bytes - 1);
kaf24@4275 1017 }
kaf24@4275 1018
kaf24@4275 1019 /* Check admin limits. Silently fail the access if it is disallowed. */
kaf24@4275 1020 #define inb_user(_p, _d, _r) (admin_io_okay(_p, 1, _d, _r) ? inb(_p) : ~0)
kaf24@4275 1021 #define inw_user(_p, _d, _r) (admin_io_okay(_p, 2, _d, _r) ? inw(_p) : ~0)
kaf24@4275 1022 #define inl_user(_p, _d, _r) (admin_io_okay(_p, 4, _d, _r) ? inl(_p) : ~0)
kaf24@4275 1023 #define outb_user(_v, _p, _d, _r) \
kaf24@4275 1024 (admin_io_okay(_p, 1, _d, _r) ? outb(_v, _p) : ((void)0))
kaf24@4275 1025 #define outw_user(_v, _p, _d, _r) \
kaf24@4275 1026 (admin_io_okay(_p, 2, _d, _r) ? outw(_v, _p) : ((void)0))
kaf24@4275 1027 #define outl_user(_v, _p, _d, _r) \
kaf24@4275 1028 (admin_io_okay(_p, 4, _d, _r) ? outl(_v, _p) : ((void)0))
kaf24@4275 1029
kfraser@10735 1030 /* Instruction fetch with error handling. */
kfraser@10735 1031 #define insn_fetch(_type, _size, _ptr) \
kfraser@10735 1032 ({ unsigned long _rc, _x; \
kfraser@10735 1033 if ( (_rc = copy_from_user(&_x, (_type *)eip, sizeof(_type))) != 0 ) \
kfraser@10735 1034 { \
kfraser@10735 1035 propagate_page_fault(eip + sizeof(_type) - _rc, 0); \
kfraser@10735 1036 return EXCRET_fault_fixed; \
kfraser@10735 1037 } \
kaf24@4103 1038 eip += _size; (_type)_x; })
kaf24@4103 1039
kaf24@4683 1040 static int emulate_privileged_op(struct cpu_user_regs *regs)
kaf24@3311 1041 {
kaf24@5289 1042 struct vcpu *v = current;
kaf24@5696 1043 unsigned long *reg, eip = regs->eip, res;
kaf24@5696 1044 u8 opcode, modrm_reg = 0, modrm_rm = 0, rep_prefix = 0;
kfraser@10735 1045 unsigned int port, i, op_bytes = 4, data, rc;
kaf24@8783 1046 u32 l, h;
kaf24@3311 1047
kaf24@4103 1048 /* Legacy prefixes. */
kaf24@4103 1049 for ( i = 0; i < 8; i++ )
kaf24@4103 1050 {
kaf24@4103 1051 switch ( opcode = insn_fetch(u8, 1, eip) )
kaf24@4103 1052 {
kaf24@4103 1053 case 0x66: /* operand-size override */
kaf24@4103 1054 op_bytes ^= 6; /* switch between 2/4 bytes */
kaf24@4103 1055 break;
kaf24@4103 1056 case 0x67: /* address-size override */
kaf24@4103 1057 case 0x2e: /* CS override */
kaf24@4103 1058 case 0x3e: /* DS override */
kaf24@4103 1059 case 0x26: /* ES override */
kaf24@4103 1060 case 0x64: /* FS override */
kaf24@4103 1061 case 0x65: /* GS override */
kaf24@4103 1062 case 0x36: /* SS override */
kaf24@4103 1063 case 0xf0: /* LOCK */
kaf24@4103 1064 case 0xf2: /* REPNE/REPNZ */
kaf24@4103 1065 break;
kaf24@4103 1066 case 0xf3: /* REP/REPE/REPZ */
kaf24@4103 1067 rep_prefix = 1;
kaf24@4103 1068 break;
kaf24@4103 1069 default:
kaf24@4103 1070 goto done_prefixes;
kaf24@4103 1071 }
kaf24@4103 1072 }
kaf24@4103 1073 done_prefixes:
kaf24@4103 1074
kaf24@4103 1075 #ifdef __x86_64__
kaf24@4103 1076 /* REX prefix. */
kaf24@4103 1077 if ( (opcode & 0xf0) == 0x40 )
kaf24@4103 1078 {
kaf24@4103 1079 modrm_reg = (opcode & 4) << 1; /* REX.R */
kaf24@5696 1080 modrm_rm = (opcode & 1) << 3; /* REX.B */
kaf24@5696 1081
kaf24@5696 1082 /* REX.W and REX.X do not need to be decoded. */
kaf24@4103 1083 opcode = insn_fetch(u8, 1, eip);
kaf24@4103 1084 }
kaf24@4103 1085 #endif
kaf24@4103 1086
kaf24@4103 1087 /* Input/Output String instructions. */
kaf24@4103 1088 if ( (opcode >= 0x6c) && (opcode <= 0x6f) )
kaf24@4103 1089 {
kaf24@4103 1090 if ( rep_prefix && (regs->ecx == 0) )
kaf24@4103 1091 goto done;
kaf24@4103 1092
kaf24@4103 1093 continue_io_string:
kaf24@4103 1094 switch ( opcode )
kaf24@4103 1095 {
kaf24@4103 1096 case 0x6c: /* INSB */
kaf24@4103 1097 op_bytes = 1;
kaf24@4103 1098 case 0x6d: /* INSW/INSL */
kaf24@5289 1099 if ( !guest_io_okay((u16)regs->edx, op_bytes, v, regs) )
kaf24@4103 1100 goto fail;
kaf24@4103 1101 switch ( op_bytes )
kaf24@4103 1102 {
kaf24@4103 1103 case 1:
kaf24@5289 1104 data = (u8)inb_user((u16)regs->edx, v, regs);
kaf24@4103 1105 break;
kaf24@4103 1106 case 2:
kaf24@5289 1107 data = (u16)inw_user((u16)regs->edx, v, regs);
kaf24@4103 1108 break;
kaf24@4103 1109 case 4:
kaf24@5289 1110 data = (u32)inl_user((u16)regs->edx, v, regs);
kaf24@4103 1111 break;
kaf24@4103 1112 }
kfraser@10735 1113 if ( (rc = copy_to_user((void *)regs->edi, &data, op_bytes)) != 0 )
kfraser@10735 1114 {
kfraser@10735 1115 propagate_page_fault(regs->edi + op_bytes - rc,
kfraser@10735 1116 PGERR_write_access);
kfraser@10735 1117 return EXCRET_fault_fixed;
kfraser@10735 1118 }
kfraser@10279 1119 regs->edi += (int)((regs->eflags & EF_DF) ? -op_bytes : op_bytes);
kaf24@4103 1120 break;
kaf24@4103 1121
kaf24@4103 1122 case 0x6e: /* OUTSB */
kaf24@4103 1123 op_bytes = 1;
kaf24@4103 1124 case 0x6f: /* OUTSW/OUTSL */
kaf24@5289 1125 if ( !guest_io_okay((u16)regs->edx, op_bytes, v, regs) )
kaf24@4103 1126 goto fail;
kfraser@10735 1127 rc = copy_from_user(&data, (void *)regs->esi, op_bytes);
kfraser@10735 1128 if ( rc != 0 )
kfraser@10735 1129 {
kfraser@10735 1130 propagate_page_fault(regs->esi + op_bytes - rc, 0);
kfraser@10735 1131 return EXCRET_fault_fixed;
kfraser@10735 1132 }
kaf24@4103 1133 switch ( op_bytes )
kaf24@4103 1134 {
kaf24@4103 1135 case 1:
kaf24@5289 1136 outb_user((u8)data, (u16)regs->edx, v, regs);
kaf24@4103 1137 break;
kaf24@4103 1138 case 2:
kaf24@5289 1139 outw_user((u16)data, (u16)regs->edx, v, regs);
kaf24@4103 1140 break;
kaf24@4103 1141 case 4:
kaf24@5289 1142 outl_user((u32)data, (u16)regs->edx, v, regs);
kaf24@4103 1143 break;
kaf24@4103 1144 }
kfraser@10279 1145 regs->esi += (int)((regs->eflags & EF_DF) ? -op_bytes : op_bytes);
kaf24@4103 1146 break;
kaf24@4103 1147 }
kaf24@4103 1148
kaf24@4103 1149 if ( rep_prefix && (--regs->ecx != 0) )
kaf24@4103 1150 {
kaf24@4103 1151 if ( !hypercall_preempt_check() )
kaf24@4103 1152 goto continue_io_string;
kaf24@4103 1153 eip = regs->eip;
kaf24@4103 1154 }
kaf24@4103 1155
kaf24@4103 1156 goto done;
kaf24@4103 1157 }
kaf24@4103 1158
kaf24@4103 1159 /* I/O Port and Interrupt Flag instructions. */
kaf24@4103 1160 switch ( opcode )
kaf24@4103 1161 {
kaf24@4103 1162 case 0xe4: /* IN imm8,%al */
kaf24@4103 1163 op_bytes = 1;
kaf24@4103 1164 case 0xe5: /* IN imm8,%eax */
kaf24@4103 1165 port = insn_fetch(u8, 1, eip);
kaf24@4103 1166 exec_in:
kaf24@5289 1167 if ( !guest_io_okay(port, op_bytes, v, regs) )
kaf24@4103 1168 goto fail;
kaf24@4103 1169 switch ( op_bytes )
kaf24@4103 1170 {
kaf24@4103 1171 case 1:
kaf24@4103 1172 regs->eax &= ~0xffUL;
kaf24@5289 1173 regs->eax |= (u8)inb_user(port, v, regs);
kaf24@4103 1174 break;
kaf24@4103 1175 case 2:
kaf24@4103 1176 regs->eax &= ~0xffffUL;
kaf24@5289 1177 regs->eax |= (u16)inw_user(port, v, regs);
kaf24@4103 1178 break;
kaf24@4103 1179 case 4:
kaf24@5289 1180 regs->eax = (u32)inl_user(port, v, regs);
kaf24@4103 1181 break;
kaf24@4103 1182 }
kaf24@4103 1183 goto done;
kaf24@4103 1184
kaf24@4103 1185 case 0xec: /* IN %dx,%al */
kaf24@4103 1186 op_bytes = 1;
kaf24@4103 1187 case 0xed: /* IN %dx,%eax */
kaf24@4103 1188 port = (u16)regs->edx;
kaf24@4103 1189 goto exec_in;
kaf24@4103 1190
kaf24@4103 1191 case 0xe6: /* OUT %al,imm8 */
kaf24@4103 1192 op_bytes = 1;
kaf24@4103 1193 case 0xe7: /* OUT %eax,imm8 */
kaf24@4103 1194 port = insn_fetch(u8, 1, eip);
kaf24@4103 1195 exec_out:
kaf24@5289 1196 if ( !guest_io_okay(port, op_bytes, v, regs) )
kaf24@4103 1197 goto fail;
kaf24@4103 1198 switch ( op_bytes )
kaf24@4103 1199 {
kaf24@4103 1200 case 1:
kaf24@5289 1201 outb_user((u8)regs->eax, port, v, regs);
kaf24@4103 1202 break;
kaf24@4103 1203 case 2:
kaf24@5289 1204 outw_user((u16)regs->eax, port, v, regs);
kaf24@4103 1205 break;
kaf24@4103 1206 case 4:
kaf24@5289 1207 outl_user((u32)regs->eax, port, v, regs);
kaf24@4103 1208 break;
kaf24@4103 1209 }
kaf24@4103 1210 goto done;
kaf24@4103 1211
kaf24@4103 1212 case 0xee: /* OUT %al,%dx */
kaf24@4103 1213 op_bytes = 1;
kaf24@4103 1214 case 0xef: /* OUT %eax,%dx */
kaf24@4103 1215 port = (u16)regs->edx;
kaf24@4103 1216 goto exec_out;
kaf24@4103 1217
kaf24@4103 1218 case 0xfa: /* CLI */
kaf24@4103 1219 case 0xfb: /* STI */
kaf24@9192 1220 if ( v->arch.iopl < (guest_kernel_mode(v, regs) ? 1 : 3) )
kaf24@4103 1221 goto fail;
kaf24@4103 1222 /*
kaf24@4103 1223 * This is just too dangerous to allow, in my opinion. Consider if the
kaf24@4103 1224 * caller then tries to reenable interrupts using POPF: we can't trap
kaf24@4103 1225 * that and we'll end up with hard-to-debug lockups. Fast & loose will
kaf24@4103 1226 * do for us. :-)
kaf24@4103 1227 */
kaf24@5289 1228 /*v->vcpu_info->evtchn_upcall_mask = (opcode == 0xfa);*/
kaf24@4103 1229 goto done;
kaf24@4103 1230
kaf24@4103 1231 case 0x0f: /* Two-byte opcode */
kaf24@4103 1232 break;
kaf24@4103 1233
kaf24@4103 1234 default:
kaf24@4103 1235 goto fail;
kaf24@4103 1236 }
kaf24@4103 1237
kaf24@4103 1238 /* Remaining instructions only emulated from guest kernel. */
kaf24@9192 1239 if ( !guest_kernel_mode(v, regs) )
kaf24@3405 1240 goto fail;
kaf24@3405 1241
kaf24@4103 1242 /* Privileged (ring 0) instructions. */
kaf24@4103 1243 opcode = insn_fetch(u8, 1, eip);
kaf24@3405 1244 switch ( opcode )
kaf24@3311 1245 {
kaf24@3405 1246 case 0x06: /* CLTS */
kaf24@3917 1247 (void)do_fpu_taskswitch(0);
kaf24@3405 1248 break;
kaf24@3405 1249
kaf24@3311 1250 case 0x09: /* WBINVD */
kaf24@3861 1251 /* Ignore the instruction if unprivileged. */
kaf24@8468 1252 if ( !cache_flush_permitted(v->domain) )
smh22@8805 1253 /* Non-physdev domain attempted WBINVD; ignore for now since
smh22@8805 1254 newer linux uses this in some start-of-day timing loops */
smh22@8805 1255 ;
kaf24@3861 1256 else
kaf24@3861 1257 wbinvd();
kaf24@3405 1258 break;
kaf24@3405 1259
kaf24@3405 1260 case 0x20: /* MOV CR?,<reg> */
kaf24@4103 1261 opcode = insn_fetch(u8, 1, eip);
kaf24@5696 1262 modrm_reg |= (opcode >> 3) & 7;
kaf24@5696 1263 modrm_rm |= (opcode >> 0) & 7;
kaf24@5696 1264 reg = decode_register(modrm_rm, regs, 0);
kaf24@5696 1265 switch ( modrm_reg )
kaf24@3405 1266 {
kaf24@3405 1267 case 0: /* Read CR0 */
kaf24@8446 1268 *reg = (read_cr0() & ~X86_CR0_TS) |
kaf24@8446 1269 v->arch.guest_context.ctrlreg[0];
kaf24@3405 1270 break;
kaf24@3405 1271
kaf24@3405 1272 case 2: /* Read CR2 */
kaf24@5576 1273 *reg = v->arch.guest_context.ctrlreg[2];
kaf24@3405 1274 break;
kaf24@3405 1275
kaf24@3405 1276 case 3: /* Read CR3 */
kaf24@10295 1277 *reg = xen_pfn_to_cr3(mfn_to_gmfn(
kaf24@10295 1278 v->domain, pagetable_get_pfn(v->arch.guest_table)));
kaf24@3405 1279 break;
kaf24@3405 1280
kaf24@7228 1281 case 4: /* Read CR4 */
kaf24@7228 1282 /*
kaf24@7228 1283 * Guests can read CR4 to see what features Xen has enabled. We
kaf24@7228 1284 * therefore lie about PGE & PSE as they are unavailable to guests.
kaf24@7228 1285 */
kaf24@7228 1286 *reg = read_cr4() & ~(X86_CR4_PGE|X86_CR4_PSE);
kaf24@7228 1287 break;
kaf24@7228 1288
kaf24@3405 1289 default:
kaf24@3405 1290 goto fail;
kaf24@3405 1291 }
kaf24@3405 1292 break;
kaf24@3405 1293
kaf24@5696 1294 case 0x21: /* MOV DR?,<reg> */
kaf24@5696 1295 opcode = insn_fetch(u8, 1, eip);
kaf24@5696 1296 modrm_reg |= (opcode >> 3) & 7;
kaf24@5696 1297 modrm_rm |= (opcode >> 0) & 7;
kaf24@5696 1298 reg = decode_register(modrm_rm, regs, 0);
kaf24@5696 1299 if ( (res = do_get_debugreg(modrm_reg)) > (unsigned long)-256 )
kaf24@5696 1300 goto fail;
kaf24@5696 1301 *reg = res;
kaf24@5696 1302 break;
kaf24@5696 1303
kaf24@3405 1304 case 0x22: /* MOV <reg>,CR? */
kaf24@4103 1305 opcode = insn_fetch(u8, 1, eip);
kaf24@5696 1306 modrm_reg |= (opcode >> 3) & 7;
kaf24@5696 1307 modrm_rm |= (opcode >> 0) & 7;
kaf24@5696 1308 reg = decode_register(modrm_rm, regs, 0);
kaf24@5696 1309 switch ( modrm_reg )
kaf24@3405 1310 {
kaf24@3405 1311 case 0: /* Write CR0 */
kaf24@8446 1312 if ( (*reg ^ read_cr0()) & ~X86_CR0_TS )
kaf24@8446 1313 {
kaf24@8446 1314 DPRINTK("Attempt to change unmodifiable CR0 flags.\n");
kaf24@8446 1315 goto fail;
kaf24@8446 1316 }
kaf24@3917 1317 (void)do_fpu_taskswitch(!!(*reg & X86_CR0_TS));
kaf24@3405 1318 break;
kaf24@3405 1319
kaf24@3405 1320 case 2: /* Write CR2 */
kaf24@5576 1321 v->arch.guest_context.ctrlreg[2] = *reg;
kaf24@7744 1322 v->vcpu_info->arch.cr2 = *reg;
kaf24@3405 1323 break;
kaf24@10472 1324
kaf24@3405 1325 case 3: /* Write CR3 */
kaf24@5289 1326 LOCK_BIGLOCK(v->domain);
kaf24@9074 1327 cleanup_writable_pagetable(v->domain);
kaf24@10295 1328 (void)new_guest_cr3(gmfn_to_mfn(v->domain, xen_cr3_to_pfn(*reg)));
kaf24@5289 1329 UNLOCK_BIGLOCK(v->domain);
kaf24@3405 1330 break;
kaf24@3405 1331
kaf24@8446 1332 case 4:
kaf24@8446 1333 if ( *reg != (read_cr4() & ~(X86_CR4_PGE|X86_CR4_PSE)) )
kaf24@8446 1334 {
kaf24@8446 1335 DPRINTK("Attempt to change CR4 flags.\n");
kaf24@8446 1336 goto fail;
kaf24@8446 1337 }
kaf24@8446 1338 break;
kaf24@8446 1339
kaf24@5723 1340 default:
kaf24@5723 1341 goto fail;
kaf24@5723 1342 }
kaf24@5723 1343 break;
kaf24@5723 1344
kaf24@5696 1345 case 0x23: /* MOV <reg>,DR? */
kaf24@5696 1346 opcode = insn_fetch(u8, 1, eip);
kaf24@5696 1347 modrm_reg |= (opcode >> 3) & 7;
kaf24@5696 1348 modrm_rm |= (opcode >> 0) & 7;
kaf24@5696 1349 reg = decode_register(modrm_rm, regs, 0);
kaf24@5696 1350 if ( do_set_debugreg(modrm_reg, *reg) != 0 )
kaf24@5696 1351 goto fail;
kaf24@5696 1352 break;
kaf24@5696 1353
kaf24@3311 1354 case 0x30: /* WRMSR */
kaf24@8783 1355 switch ( regs->ecx )
kaf24@8582 1356 {
kaf24@8783 1357 #ifdef CONFIG_X86_64
kaf24@8783 1358 case MSR_FS_BASE:
kaf24@8846 1359 if ( wrmsr_safe(MSR_FS_BASE, regs->eax, regs->edx) )
kaf24@8783 1360 goto fail;
kaf24@8783 1361 v->arch.guest_context.fs_base =
kaf24@8783 1362 ((u64)regs->edx << 32) | regs->eax;
kaf24@8783 1363 break;
kaf24@8783 1364 case MSR_GS_BASE:
kaf24@8846 1365 if ( wrmsr_safe(MSR_GS_BASE, regs->eax, regs->edx) )
kaf24@8783 1366 goto fail;
kaf24@8783 1367 v->arch.guest_context.gs_base_kernel =
kaf24@8783 1368 ((u64)regs->edx << 32) | regs->eax;
kaf24@8783 1369 break;
kaf24@8783 1370 case MSR_SHADOW_GS_BASE:
kaf24@8846 1371 if ( wrmsr_safe(MSR_SHADOW_GS_BASE, regs->eax, regs->edx) )
kaf24@8783 1372 goto fail;
kaf24@8783 1373 v->arch.guest_context.gs_base_user =
kaf24@8783 1374 ((u64)regs->edx << 32) | regs->eax;
kaf24@8783 1375 break;
kaf24@8783 1376 #endif
kaf24@8783 1377 default:
kfraser@10892 1378 if ( wrmsr_hypervisor_regs(regs->ecx, regs->eax, regs->edx) )
kfraser@10892 1379 break;
kfraser@10892 1380
kaf24@8846 1381 if ( (rdmsr_safe(regs->ecx, l, h) != 0) ||
kaf24@8582 1382 (regs->eax != l) || (regs->edx != h) )
kaf24@8783 1383 DPRINTK("Domain attempted WRMSR %p from "
kaf24@8582 1384 "%08x:%08x to %08lx:%08lx.\n",
kaf24@8582 1385 _p(regs->ecx), h, l, (long)regs->edx, (long)regs->eax);
kaf24@8783 1386 break;
kaf24@8582 1387 }
kaf24@3405 1388 break;
kaf24@3311 1389
kaf24@3311 1390 case 0x32: /* RDMSR */
kaf24@8783 1391 switch ( regs->ecx )
kaf24@8582 1392 {
kaf24@8783 1393 #ifdef CONFIG_X86_64
kaf24@8783 1394 case MSR_FS_BASE:
kaf24@8783 1395 regs->eax = v->arch.guest_context.fs_base & 0xFFFFFFFFUL;
kaf24@8783 1396 regs->edx = v->arch.guest_context.fs_base >> 32;
kaf24@8783 1397 break;
kaf24@8783 1398 case MSR_GS_BASE:
kaf24@8783 1399 regs->eax = v->arch.guest_context.gs_base_kernel & 0xFFFFFFFFUL;
kaf24@8783 1400 regs->edx = v->arch.guest_context.gs_base_kernel >> 32;
kaf24@8783 1401 break;
kaf24@8783 1402 case MSR_SHADOW_GS_BASE:
kaf24@8783 1403 regs->eax = v->arch.guest_context.gs_base_user & 0xFFFFFFFFUL;
kaf24@8783 1404 regs->edx = v->arch.guest_context.gs_base_user >> 32;
kaf24@8783 1405 break;
kaf24@8783 1406 #endif
kaf24@8783 1407 case MSR_EFER:
kaf24@8846 1408 if ( rdmsr_safe(regs->ecx, regs->eax, regs->edx) )
kaf24@8783 1409 goto fail;
kaf24@8783 1410 break;
kaf24@8783 1411 default:
kfraser@10892 1412 if ( rdmsr_hypervisor_regs(regs->ecx, &l, &h) )
kfraser@10892 1413 {
kfraser@10892 1414 regs->eax = l;
kfraser@10892 1415 regs->edx = h;
kfraser@10892 1416 break;
kfraser@10892 1417 }
kaf24@8783 1418 /* Everyone can read the MSR space. */
kaf24@9018 1419 /*DPRINTK("Domain attempted RDMSR %p.\n", _p(regs->ecx));*/
kaf24@8846 1420 if ( rdmsr_safe(regs->ecx, regs->eax, regs->edx) )
kaf24@8783 1421 goto fail;
kaf24@8783 1422 break;
kaf24@8582 1423 }
kaf24@3405 1424 break;
kaf24@3405 1425
kaf24@3405 1426 default:
kaf24@3405 1427 goto fail;
kaf24@3311 1428 }
kaf24@3311 1429
kaf24@4103 1430 done:
kaf24@3405 1431 regs->eip = eip;
kaf24@3405 1432 return EXCRET_fault_fixed;
kaf24@3405 1433
kaf24@3405 1434 fail:
kaf24@3311 1435 return 0;
kaf24@3311 1436 }
kaf24@3311 1437
kaf24@4683 1438 asmlinkage int do_general_protection(struct cpu_user_regs *regs)
kaf24@1452 1439 {
kaf24@5289 1440 struct vcpu *v = current;
kaf24@5289 1441 struct trap_bounce *tb = &v->arch.trap_bounce;
kaf24@8679 1442 struct trap_info *ti;
kaf24@1452 1443 unsigned long fixup;
kaf24@1452 1444
kaf24@3089 1445 DEBUGGER_trap_entry(TRAP_gp_fault, regs);
kaf24@3650 1446
kaf24@3169 1447 if ( regs->error_code & 1 )
kaf24@3169 1448 goto hardware_gp;
kaf24@3169 1449
kaf24@9192 1450 if ( !guest_mode(regs) )
kaf24@1452 1451 goto gp_in_kernel;
kaf24@1452 1452
kaf24@1452 1453 /*
kaf24@1452 1454 * Cunning trick to allow arbitrary "INT n" handling.
kaf24@1452 1455 *
kaf24@1452 1456 * We set DPL == 0 on all vectors in the IDT. This prevents any INT <n>
kaf24@1452 1457 * instruction from trapping to the appropriate vector, when that might not
kaf24@1452 1458 * be expected by Xen or the guest OS. For example, that entry might be for
kaf24@1452 1459 * a fault handler (unlike traps, faults don't increment EIP), or might
kaf24@1452 1460 * expect an error code on the stack (which a software trap never
kaf24@1452 1461 * provides), or might be a hardware interrupt handler that doesn't like
kaf24@1452 1462 * being called spuriously.
kaf24@1452 1463 *
kaf24@1452 1464 * Instead, a GPF occurs with the faulting IDT vector in the error code.
kaf24@1452 1465 * Bit 1 is set to indicate that an IDT entry caused the fault. Bit 0 is
kaf24@1452 1466 * clear to indicate that it's a software fault, not hardware.
kaf24@1452 1467 *
kaf24@1452 1468 * NOTE: Vectors 3 and 4 are dealt with from their own handler. This is
kaf24@1452 1469 * okay because they can only be triggered by an explicit DPL-checked
kaf24@1452 1470 * instruction. The DPL specified by the guest OS for these vectors is NOT
kaf24@1452 1471 * CHECKED!!
kaf24@1452 1472 */
kaf24@3089 1473 if ( (regs->error_code & 3) == 2 )
kaf24@1452 1474 {
kaf24@1452 1475 /* This fault must be due to <INT n> instruction. */
kaf24@4689 1476 ti = &current->arch.guest_context.trap_ctxt[regs->error_code>>3];
kaf24@9192 1477 if ( permit_softint(TI_GET_DPL(ti), v, regs) )
kaf24@1452 1478 {
kaf24@3089 1479 tb->flags = TBF_EXCEPTION;
kaf24@1452 1480 regs->eip += 2;
kaf24@1452 1481 goto finish_propagation;
kaf24@1452 1482 }
kaf24@1452 1483 }
kaf24@1816 1484
kaf24@4103 1485 /* Emulate some simple privileged and I/O instructions. */
kaf24@3311 1486 if ( (regs->error_code == 0) &&
kaf24@3311 1487 emulate_privileged_op(regs) )
kaf24@3311 1488 return 0;
kaf24@3311 1489
kaf24@1816 1490 #if defined(__i386__)
kaf24@5289 1491 if ( VM_ASSIST(v->domain, VMASST_TYPE_4gb_segments) &&
kaf24@3089 1492 (regs->error_code == 0) &&
kaf24@2073 1493 gpf_emulate_4gb(regs) )
kaf24@3052 1494 return 0;
kaf24@1816 1495 #endif
kaf24@2971 1496
kaf24@1452 1497 /* Pass on GPF as is. */
kaf24@4689 1498 ti = &current->arch.guest_context.trap_ctxt[TRAP_gp_fault];
kaf24@3089 1499 tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE;
kaf24@3089 1500 tb->error_code = regs->error_code;
kaf24@1452 1501 finish_propagation:
kaf24@3043 1502 tb->cs = ti->cs;
kaf24@3043 1503 tb->eip = ti->address;
kaf24@1452 1504 if ( TI_GET_IF(ti) )
kaf24@4949 1505 tb->flags |= TBF_INTERRUPT;
kaf24@3052 1506 return 0;
kaf24@1452 1507
kaf24@1452 1508 gp_in_kernel:
kaf24@1452 1509
kaf24@1452 1510 if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
kaf24@1452 1511 {
kaf24@3621 1512 DPRINTK("GPF (%04x): %p -> %p\n",
kaf24@4654 1513 regs->error_code, _p(regs->eip), _p(fixup));
kaf24@1452 1514 regs->eip = fixup;
kaf24@3052 1515 return 0;
kaf24@1452 1516 }
kaf24@1452 1517
kaf24@3089 1518 DEBUGGER_trap_fatal(TRAP_gp_fault, regs);
cl349@2957 1519
kaf24@3169 1520 hardware_gp:
kfraser@10478 1521 show_execution_state(regs);
kaf24@3089 1522 panic("CPU%d GENERAL PROTECTION FAULT\n[error_code=%04x]\n",
kaf24@3089 1523 smp_processor_id(), regs->error_code);
kaf24@3052 1524 return 0;
kaf24@1452 1525 }
kaf24@1452 1526
Ian@8559 1527 static void nmi_softirq(void)
Ian@8559 1528 {
Ian@8559 1529 /* Only used to defer wakeup of dom0,vcpu0 to a safe (non-NMI) context. */
kfraser@10405 1530 vcpu_kick(dom0->vcpu[0]);
Ian@8559 1531 }
kaf24@8418 1532
Ian@8559 1533 static void nmi_dom0_report(unsigned int reason_idx)
Ian@8559 1534 {
Ian@8559 1535 struct domain *d;
kfraser@10655 1536 struct vcpu *v;
kaf24@8418 1537
kfraser@10655 1538 if ( ((d = dom0) == NULL) || ((v = d->vcpu[0]) == NULL) )
kaf24@3695 1539 return;
kaf24@3695 1540
Ian@8559 1541 set_bit(reason_idx, &d->shared_info->arch.nmi_reason);
kaf24@3695 1542
kfraser@10655 1543 if ( test_and_set_bit(_VCPUF_nmi_pending, &v->vcpu_flags) )
Ian@8559 1544 raise_softirq(NMI_SOFTIRQ); /* not safe to wake up a vcpu here */
kaf24@1452 1545 }
kaf24@1452 1546
kaf24@4683 1547 asmlinkage void mem_parity_error(struct cpu_user_regs *regs)
kaf24@1452 1548 {
kaf24@3695 1549 switch ( opt_nmi[0] )
kaf24@3695 1550 {
kaf24@3695 1551 case 'd': /* 'dom0' */
Ian@8559 1552 nmi_dom0_report(_XEN_NMIREASON_parity_error);
kaf24@3695 1553 case 'i': /* 'ignore' */
kaf24@3695 1554 break;
kaf24@3695 1555 default: /* 'fatal' */
kaf24@3695 1556 console_force_unlock();
kaf24@3695 1557 printk("\n\nNMI - MEMORY ERROR\n");
kaf24@3695 1558 fatal_trap(TRAP_nmi, regs);
kaf24@3695 1559 }
kaf24@8418 1560
kaf24@8418 1561 outb((inb(0x61) & 0x0f) | 0x04, 0x61); /* clear-and-disable parity check */
kaf24@8418 1562 mdelay(1);
kaf24@8418 1563 outb((inb(0x61) & 0x0b) | 0x00, 0x61); /* enable parity check */
kaf24@1452 1564 }
kaf24@1452 1565
kaf24@4683 1566 asmlinkage void io_check_error(struct cpu_user_regs *regs)
kaf24@1452 1567 {
kaf24@3695 1568 switch ( opt_nmi[0] )
kaf24@3695 1569 {
kaf24@3695 1570 case 'd': /* 'dom0' */
Ian@8559 1571 nmi_dom0_report(_XEN_NMIREASON_io_error);
kaf24@3695 1572 case 'i': /* 'ignore' */
kaf24@3695 1573 break;
kaf24@3695 1574 default: /* 'fatal' */
kaf24@3695 1575 console_force_unlock();
kaf24@3695 1576 printk("\n\nNMI - I/O ERROR\n");
kaf24@3695 1577 fatal_trap(TRAP_nmi, regs);
kaf24@3695 1578 }
kaf24@8418 1579
kaf24@8418 1580 outb((inb(0x61) & 0x0f) | 0x08, 0x61); /* clear-and-disable IOCK */
kaf24@8418 1581 mdelay(1);
kaf24@8418 1582 outb((inb(0x61) & 0x07) | 0x00, 0x61); /* enable IOCK */
kaf24@1452 1583 }
kaf24@1452 1584
kaf24@3669 1585 static void unknown_nmi_error(unsigned char reason)
kaf24@1452 1586 {
kaf24@8418 1587 switch ( opt_nmi[0] )
kaf24@8418 1588 {
kaf24@8418 1589 case 'd': /* 'dom0' */
Ian@8559 1590 nmi_dom0_report(_XEN_NMIREASON_unknown);
kaf24@8418 1591 case 'i': /* 'ignore' */
kaf24@8418 1592 break;
kaf24@8418 1593 default: /* 'fatal' */
kaf24@8418 1594 printk("Uhhuh. NMI received for unknown reason %02x.\n", reason);
kaf24@8418 1595 printk("Dazed and confused, but trying to continue\n");
kaf24@8418 1596 printk("Do you have a strange power saving mode enabled?\n");
kaf24@8418 1597 }
kaf24@1452 1598 }
kaf24@1452 1599
kaf24@8255 1600 static int dummy_nmi_callback(struct cpu_user_regs *regs, int cpu)
kaf24@8255 1601 {
kaf24@8418 1602 return 0;
kaf24@8255 1603 }
kaf24@8255 1604
kaf24@8255 1605 static nmi_callback_t nmi_callback = dummy_nmi_callback;
kaf24@8255 1606
kaf24@8418 1607 asmlinkage void do_nmi(struct cpu_user_regs *regs)
kaf24@8255 1608 {
kaf24@8255 1609 unsigned int cpu = smp_processor_id();
kaf24@8418 1610 unsigned char reason;
kaf24@8255 1611
kaf24@8255 1612 ++nmi_count(cpu);
kaf24@8255 1613
kaf24@8418 1614 if ( nmi_callback(regs, cpu) )
kaf24@8418 1615 return;
kaf24@8418 1616
kaf24@8418 1617 if ( nmi_watchdog )
kaf24@8418 1618 nmi_watchdog_tick(regs);
kaf24@8418 1619
kaf24@8418 1620 /* Only the BSP gets external NMIs from the system. */
kaf24@8418 1621 if ( cpu == 0 )
kaf24@8418 1622 {
kaf24@8418 1623 reason = inb(0x61);
kaf24@8418 1624 if ( reason & 0x80 )
kaf24@8418 1625 mem_parity_error(regs);
kaf24@8418 1626 else if ( reason & 0x40 )
kaf24@8418 1627 io_check_error(regs);
kaf24@8418 1628 else if ( !nmi_watchdog )
kaf24@8418 1629 unknown_nmi_error((unsigned char)(reason&0xff));
kaf24@8418 1630 }
kaf24@8255 1631 }
kaf24@8255 1632
kaf24@8255 1633 void set_nmi_callback(nmi_callback_t callback)
kaf24@8255 1634 {
kaf24@8255 1635 nmi_callback = callback;
kaf24@8255 1636 }
kaf24@8255 1637
kaf24@8255 1638 void unset_nmi_callback(void)
kaf24@8255 1639 {
kaf24@8418 1640 nmi_callback = dummy_nmi_callback;
kaf24@8255 1641 }
kaf24@8255 1642
kaf24@4683 1643 asmlinkage int math_state_restore(struct cpu_user_regs *regs)
kaf24@1452 1644 {
kaf24@7553 1645 struct trap_bounce *tb;
kaf24@8679 1646 struct trap_info *ti;
kaf24@7553 1647
cl349@4856 1648 setup_fpu(current);
kaf24@1452 1649
kaf24@5576 1650 if ( current->arch.guest_context.ctrlreg[0] & X86_CR0_TS )
kaf24@1452 1651 {
kaf24@7553 1652 tb = &current->arch.trap_bounce;
kaf24@7553 1653 ti = &current->arch.guest_context.trap_ctxt[TRAP_no_device];
kaf24@7553 1654
kaf24@4689 1655 tb->flags = TBF_EXCEPTION;
kaf24@7553 1656 tb->cs = ti->cs;
kaf24@7553 1657 tb->eip = ti->address;
kaf24@7553 1658 if ( TI_GET_IF(ti) )
kaf24@7553 1659 tb->flags |= TBF_INTERRUPT;
kaf24@7553 1660
kaf24@5576 1661 current->arch.guest_context.ctrlreg[0] &= ~X86_CR0_TS;
kaf24@1452 1662 }
kaf24@3052 1663
kaf24@3052 1664 return EXCRET_fault_fixed;
kaf24@1452 1665 }
kaf24@1452 1666
kaf24@4683 1667 asmlinkage int do_debug(struct cpu_user_regs *regs)
kaf24@1452 1668 {
kaf24@3602 1669 unsigned long condition;
kaf24@5289 1670 struct vcpu *v = current;
kaf24@5289 1671 struct trap_bounce *tb = &v->arch.trap_bounce;
kaf24@8679 1672 struct trap_info *ti;
kaf24@1452 1673
kaf24@3602 1674 __asm__ __volatile__("mov %%db6,%0" : "=r" (condition));
kaf24@1452 1675
kaf24@1452 1676 /* Mask out spurious debug traps due to lazy DR7 setting */
kaf24@1452 1677 if ( (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) &&
kaf24@5289 1678 (v->arch.guest_context.debugreg[7] == 0) )
kaf24@1452 1679 {
kaf24@3602 1680 __asm__("mov %0,%%db7" : : "r" (0UL));
kaf24@3052 1681 goto out;
kaf24@1452 1682 }
kaf24@1452 1683
cl349@4339 1684 DEBUGGER_trap_entry(TRAP_debug, regs);
cl349@4339 1685
kaf24@9192 1686 if ( !guest_mode(regs) )
kaf24@1452 1687 {
kaf24@1452 1688 /* Clear TF just for absolute sanity. */
kaf24@1452 1689 regs->eflags &= ~EF_TF;
kaf24@1452 1690 /*
kaf24@1590 1691 * We ignore watchpoints when they trigger within Xen. This may happen
kaf24@1590 1692 * when a buffer is passed to us which previously had a watchpoint set
kaf24@1590 1693 * on it. No need to bump EIP; the only faulting trap is an instruction
kaf24@1590 1694 * breakpoint, which can't happen to us.
kaf24@1452 1695 */
kaf24@3052 1696 goto out;
kmacy@4118 1697 }
kaf24@1452 1698
kaf24@1452 1699 /* Save debug status register where guest OS can peek at it */
kaf24@5289 1700 v->arch.guest_context.debugreg[6] = condition;
kaf24@1452 1701
kaf24@7553 1702 ti = &v->arch.guest_context.trap_ctxt[TRAP_debug];
kaf24@3089 1703 tb->flags = TBF_EXCEPTION;
kaf24@7553 1704 tb->cs = ti->cs;
kaf24@7553 1705 tb->eip = ti->address;
kaf24@7553 1706 if ( TI_GET_IF(ti) )
kaf24@7553 1707 tb->flags |= TBF_INTERRUPT;
kaf24@3052 1708
kaf24@3052 1709 out:
kaf24@3052 1710 return EXCRET_not_a_fault;
kaf24@1452 1711 }
kaf24@1452 1712
kaf24@4683 1713 asmlinkage int do_spurious_interrupt_bug(struct cpu_user_regs *regs)
kaf24@3052 1714 {
kaf24@3052 1715 return EXCRET_not_a_fault;
kaf24@3052 1716 }
kaf24@1452 1717
kaf24@1452 1718 void set_intr_gate(unsigned int n, void *addr)
kaf24@3669 1719 {
kaf24@3847 1720 #ifdef __i386__
kaf24@3847 1721 int i;
kaf24@3847 1722 /* Keep secondary tables in sync with IRQ updates. */
kaf24@3847 1723 for ( i = 1; i < NR_CPUS; i++ )
kaf24@3847 1724 if ( idt_tables[i] != NULL )
kaf24@3847 1725 _set_gate(&idt_tables[i][n], 14, 0, addr);
kaf24@3847 1726 #endif
kaf24@3847 1727 _set_gate(&idt_table[n], 14, 0, addr);
kaf24@3669 1728 }
kaf24@3669 1729
kaf24@3602 1730 void set_system_gate(unsigned int n, void *addr)
kaf24@1452 1731 {
kaf24@1452 1732 _set_gate(idt_table+n,14,3,addr);
kaf24@1452 1733 }
kaf24@1452 1734
kaf24@3602 1735 void set_task_gate(unsigned int n, unsigned int sel)
kaf24@1452 1736 {
kaf24@1452 1737 idt_table[n].a = sel << 16;
kaf24@1452 1738 idt_table[n].b = 0x8500;
kaf24@1452 1739 }
kaf24@1452 1740
kaf24@1452 1741 void set_tss_desc(unsigned int n, void *addr)
kaf24@1452 1742 {
kaf24@3075 1743 _set_tssldt_desc(
kaf24@4972 1744 gdt_table + __TSS(n) - FIRST_RESERVED_GDT_ENTRY,
kaf24@3623 1745 (unsigned long)addr,
kaf24@3075 1746 offsetof(struct tss_struct, __cacheline_filler) - 1,
kaf24@3623 1747 9);
kaf24@1452 1748 }
kaf24@1452 1749
kaf24@1452 1750 void __init trap_init(void)
kaf24@1452 1751 {
kaf24@3695 1752 extern void percpu_traps_init(void);
kaf24@1452 1753
kaf24@1452 1754 /*
kaf24@1452 1755 * Note that interrupt gates are always used, rather than trap gates. We
kaf24@1452 1756 * must have interrupts disabled until DS/ES/FS/GS are saved because the
kaf24@1452 1757 * first activation must have the "bad" value(s) for these registers and
kaf24@1452 1758 * we may lose them if another activation is installed before they are
kaf24@1452 1759 * saved. The page-fault handler also needs interrupts disabled until %cr2
kaf24@1452 1760 * has been read and saved on the stack.
kaf24@1452 1761 */
kaf24@3041 1762 set_intr_gate(TRAP_divide_error,&divide_error);
kaf24@3041 1763 set_intr_gate(TRAP_debug,&debug);
kaf24@3041 1764 set_intr_gate(TRAP_nmi,&nmi);
kaf24@3041 1765 set_system_gate(TRAP_int3,&int3); /* usable from all privileges */
kaf24@3041 1766 set_system_gate(TRAP_overflow,&overflow); /* usable from all privileges */
kaf24@3041 1767 set_intr_gate(TRAP_bounds,&bounds);
kaf24@3041 1768 set_intr_gate(TRAP_invalid_op,&invalid_op);
kaf24@3041 1769 set_intr_gate(TRAP_no_device,&device_not_available);
kaf24@3041 1770 set_intr_gate(TRAP_copro_seg,&coprocessor_segment_overrun);
kaf24@3041 1771 set_intr_gate(TRAP_invalid_tss,&invalid_TSS);
kaf24@3041 1772 set_intr_gate(TRAP_no_segment,&segment_not_present);
kaf24@3041 1773 set_intr_gate(TRAP_stack_error,&stack_segment);
kaf24@3041 1774 set_intr_gate(TRAP_gp_fault,&general_protection);
kaf24@3041 1775 set_intr_gate(TRAP_page_fault,&page_fault);
kaf24@3041 1776 set_intr_gate(TRAP_spurious_int,&spurious_interrupt_bug);
kaf24@3041 1777 set_intr_gate(TRAP_copro_error,&coprocessor_error);
kaf24@3041 1778 set_intr_gate(TRAP_alignment_check,&alignment_check);
kaf24@3041 1779 set_intr_gate(TRAP_machine_check,&machine_check);
kaf24@3041 1780 set_intr_gate(TRAP_simd_error,&simd_coprocessor_error);
kaf24@1452 1781
kaf24@3695 1782 percpu_traps_init();
kaf24@1452 1783
kaf24@3695 1784 cpu_init();
kaf24@2047 1785
Ian@8559 1786 open_softirq(NMI_SOFTIRQ, nmi_softirq);
kaf24@1452 1787 }
kaf24@1452 1788
kaf24@1452 1789
kaf24@9873 1790 long do_set_trap_table(XEN_GUEST_HANDLE(trap_info_t) traps)
kaf24@1452 1791 {
kaf24@8679 1792 struct trap_info cur;
kaf24@8679 1793 struct trap_info *dst = current->arch.guest_context.trap_ctxt;
kaf24@4105 1794 long rc = 0;
kaf24@1452 1795
kaf24@9114 1796 /* If no table is presented then clear the entire virtual IDT. */
kaf24@9183 1797 if ( guest_handle_is_null(traps) )
kaf24@9114 1798 {
kaf24@9114 1799 memset(dst, 0, 256 * sizeof(*dst));
kaf24@9114 1800 init_int80_direct_trap(current);
kaf24@9114 1801 return 0;
kaf24@9114 1802 }
kaf24@1452 1803
kaf24@1452 1804 for ( ; ; )
kaf24@1452 1805 {
kaf24@3139 1806 if ( hypercall_preempt_check() )
kaf24@3139 1807 {
kaf24@9068 1808 rc = hypercall_create_continuation(
kaf24@9183 1809 __HYPERVISOR_set_trap_table, "h", traps);
kaf24@4105 1810 break;
kaf24@3139 1811 }
kaf24@1452 1812
kaf24@9183 1813 if ( copy_from_guest(&cur, traps, 1) )
kaf24@4105 1814 {
kaf24@4105 1815 rc = -EFAULT;
kaf24@4105 1816 break;
kaf24@4105 1817 }
kaf24@1452 1818
kaf24@4105 1819 if ( cur.address == 0 )
kaf24@4105 1820 break;
kaf24@1452 1821
kaf24@9043 1822 fixup_guest_code_selector(cur.cs);
kaf24@1452 1823
kaf24@4930 1824 memcpy(&dst[cur.vector], &cur, sizeof(cur));
kaf24@4930 1825
kaf24@4930 1826 if ( cur.vector == 0x80 )
kaf24@4930 1827 init_int80_direct_trap(current);
kaf24@4930 1828
kaf24@9183 1829 guest_handle_add_offset(traps, 1);
kaf24@1452 1830 }
kaf24@1452 1831
kaf24@4105 1832 return rc;
kaf24@1452 1833 }
kaf24@1452 1834
kaf24@1452 1835
kaf24@5289 1836 long set_debugreg(struct vcpu *p, int reg, unsigned long value)
kaf24@1452 1837 {
kaf24@1452 1838 int i;
kaf24@1452 1839
kaf24@1452 1840 switch ( reg )
kaf24@1452 1841 {
kaf24@1452 1842 case 0:
kaf24@4573 1843 if ( !access_ok(value, sizeof(long)) )
kaf24@4572 1844 return -EPERM;
kaf24@1452 1845 if ( p == current )
kaf24@3602 1846 __asm__ ( "mov %0, %%db0" : : "r" (value) );
kaf24@1452 1847 break;
kaf24@1452 1848 case 1:
kaf24@4573 1849 if ( !access_ok(value, sizeof(long)) )
kaf24@4572 1850 return -EPERM;
kaf24@1452 1851 if ( p == current )
kaf24@3602 1852 __asm__ ( "mov %0, %%db1" : : "r" (value) );
kaf24@1452 1853 break;
kaf24@1452 1854 case 2:
kaf24@4573 1855 if ( !access_ok(value, sizeof(long)) )
kaf24@4572 1856 return -EPERM;
kaf24@1452 1857 if ( p == current )
kaf24@3602 1858 __asm__ ( "mov %0, %%db2" : : "r" (value) );
kaf24@1452 1859 break;
kaf24@1452 1860 case 3:
kaf24@4573 1861 if ( !access_ok(value, sizeof(long)) )
kaf24@4572 1862 return -EPERM;
kaf24@1452 1863 if ( p == current )
kaf24@3602 1864 __asm__ ( "mov %0, %%db3" : : "r" (value) );
kaf24@1452 1865 break;
kaf24@1452 1866 case 6:
kaf24@1452 1867 /*
kaf24@1452 1868 * DR6: Bits 4-11,16-31 reserved (set to 1).
kaf24@1452 1869 * Bit 12 reserved (set to 0).
kaf24@1452 1870 */
kaf24@1452 1871 value &= 0xffffefff; /* reserved bits => 0 */
kaf24@1452 1872 value |= 0xffff0ff0; /* reserved bits => 1 */
kaf24@1452 1873 if ( p == current )
kaf24@3602 1874 __asm__ ( "mov %0, %%db6" : : "r" (value) );
kaf24@1452 1875 break;
kaf24@1452 1876 case 7:
kaf24@1452 1877 /*
kaf24@1452 1878 * DR7: Bit 10 reserved (set to 1).
kaf24@1452 1879 * Bits 11-12,14-15 reserved (set to 0).
kaf24@1452 1880 * Privileged bits:
kaf24@1452 1881 * GD (bit 13): must be 0.
kaf24@1452 1882 * R/Wn (bits 16-17,20-21,24-25,28-29): mustn't be 10.
kaf24@1452 1883 * LENn (bits 18-19,22-23,26-27,30-31): mustn't be 10.
kaf24@1452 1884 */
kaf24@1452 1885 /* DR7 == 0 => debugging disabled for this domain. */
kaf24@1452 1886 if ( value != 0 )
kaf24@1452 1887 {
kaf24@1452 1888 value &= 0xffff27ff; /* reserved bits => 0 */
kaf24@1452 1889 value |= 0x00000400; /* reserved bits => 1 */
kaf24@1452 1890 if ( (value & (1<<13)) != 0 ) return -EPERM;
kaf24@1452 1891 for ( i = 0; i < 16; i += 2 )
kaf24@1452 1892 if ( ((value >> (i+16)) & 3) == 2 ) return -EPERM;
kaf24@1452 1893 }
kaf24@1452 1894 if ( p == current )
kaf24@3602 1895 __asm__ ( "mov %0, %%db7" : : "r" (value) );
kaf24@1452 1896 break;
kaf24@1452 1897 default:
kaf24@1452 1898 return -EINVAL;
kaf24@1452 1899 }
kaf24@1452 1900
kaf24@4689 1901 p->arch.guest_context.debugreg[reg] = value;
kaf24@1452 1902 return 0;
kaf24@1452 1903 }
kaf24@1452 1904
kaf24@1452 1905 long do_set_debugreg(int reg, unsigned long value)
kaf24@1452 1906 {
kaf24@1452 1907 return set_debugreg(current, reg, value);
kaf24@1452 1908 }
kaf24@1452 1909
kaf24@1452 1910 unsigned long do_get_debugreg(int reg)
kaf24@1452 1911 {
kaf24@1452 1912 if ( (reg < 0) || (reg > 7) ) return -EINVAL;
kaf24@4689 1913 return current->arch.guest_context.debugreg[reg];
kaf24@1452 1914 }
kaf24@1672 1915
kaf24@3914 1916 /*
kaf24@3914 1917 * Local variables:
kaf24@3914 1918 * mode: C
kaf24@3914 1919 * c-set-style: "BSD"
kaf24@3914 1920 * c-basic-offset: 4
kaf24@3914 1921 * tab-width: 4
kaf24@3914 1922 * indent-tabs-mode: nil
kaf24@3988 1923 * End:
kaf24@3914 1924 */