ia64/xen-unstable

view xen/arch/x86/traps.c @ 8783:5caf1de3f268

Apply stricter checking to RDMSR/WRMSR emulations.
In particular, MSRs that domain0 may write to must now
be 'white listed': default policy is to ignore the write.

This will prevent SYSCALL/SYSENTER instructions from
crashign Xen, by preventing the target MSRs from being
overwritten by domain 0.

Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Tue Feb 07 15:56:39 2006 +0100 (2006-02-07)
parents 6f7c5439a6c4
children 01fa38f79207
line source
1 /******************************************************************************
2 * arch/x86/traps.c
3 *
4 * Modifications to Linux original are copyright (c) 2002-2004, K A Fraser
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
21 /*
22 * Copyright (C) 1991, 1992 Linus Torvalds
23 *
24 * Pentium III FXSR, SSE support
25 * Gareth Hughes <gareth@valinux.com>, May 2000
26 */
28 #include <xen/config.h>
29 #include <xen/init.h>
30 #include <xen/sched.h>
31 #include <xen/lib.h>
32 #include <xen/errno.h>
33 #include <xen/mm.h>
34 #include <xen/console.h>
35 #include <asm/regs.h>
36 #include <xen/delay.h>
37 #include <xen/event.h>
38 #include <xen/spinlock.h>
39 #include <xen/irq.h>
40 #include <xen/perfc.h>
41 #include <xen/softirq.h>
42 #include <xen/domain_page.h>
43 #include <xen/symbols.h>
44 #include <xen/iocap.h>
45 #include <asm/shadow.h>
46 #include <asm/system.h>
47 #include <asm/io.h>
48 #include <asm/atomic.h>
49 #include <asm/desc.h>
50 #include <asm/debugreg.h>
51 #include <asm/smp.h>
52 #include <asm/flushtlb.h>
53 #include <asm/uaccess.h>
54 #include <asm/i387.h>
55 #include <asm/debugger.h>
56 #include <asm/msr.h>
57 #include <asm/x86_emulate.h>
58 #include <asm/nmi.h>
60 /*
61 * opt_nmi: one of 'ignore', 'dom0', or 'fatal'.
62 * fatal: Xen prints diagnostic message and then hangs.
63 * dom0: The NMI is virtualised to DOM0.
64 * ignore: The NMI error is cleared and ignored.
65 */
66 #ifdef NDEBUG
67 char opt_nmi[10] = "dom0";
68 #else
69 char opt_nmi[10] = "fatal";
70 #endif
71 string_param("nmi", opt_nmi);
73 /* Master table, used by all CPUs on x86/64, and by CPU0 on x86/32.*/
74 idt_entry_t idt_table[IDT_ENTRIES];
76 #define DECLARE_TRAP_HANDLER(_name) \
77 asmlinkage void _name(void); \
78 asmlinkage int do_ ## _name(struct cpu_user_regs *regs)
80 asmlinkage void nmi(void);
81 DECLARE_TRAP_HANDLER(divide_error);
82 DECLARE_TRAP_HANDLER(debug);
83 DECLARE_TRAP_HANDLER(int3);
84 DECLARE_TRAP_HANDLER(overflow);
85 DECLARE_TRAP_HANDLER(bounds);
86 DECLARE_TRAP_HANDLER(invalid_op);
87 DECLARE_TRAP_HANDLER(device_not_available);
88 DECLARE_TRAP_HANDLER(coprocessor_segment_overrun);
89 DECLARE_TRAP_HANDLER(invalid_TSS);
90 DECLARE_TRAP_HANDLER(segment_not_present);
91 DECLARE_TRAP_HANDLER(stack_segment);
92 DECLARE_TRAP_HANDLER(general_protection);
93 DECLARE_TRAP_HANDLER(page_fault);
94 DECLARE_TRAP_HANDLER(coprocessor_error);
95 DECLARE_TRAP_HANDLER(simd_coprocessor_error);
96 DECLARE_TRAP_HANDLER(alignment_check);
97 DECLARE_TRAP_HANDLER(spurious_interrupt_bug);
98 DECLARE_TRAP_HANDLER(machine_check);
100 long do_set_debugreg(int reg, unsigned long value);
101 unsigned long do_get_debugreg(int reg);
103 static int debug_stack_lines = 20;
104 integer_param("debug_stack_lines", debug_stack_lines);
106 #ifdef CONFIG_X86_32
107 #define stack_words_per_line 8
108 #define ESP_BEFORE_EXCEPTION(regs) ((unsigned long *)&regs->esp)
109 #else
110 #define stack_words_per_line 4
111 #define ESP_BEFORE_EXCEPTION(regs) ((unsigned long *)regs->rsp)
112 #endif
114 int is_kernel_text(unsigned long addr)
115 {
116 extern char _stext, _etext;
117 if (addr >= (unsigned long) &_stext &&
118 addr <= (unsigned long) &_etext)
119 return 1;
120 return 0;
122 }
124 unsigned long kernel_text_end(void)
125 {
126 extern char _etext;
127 return (unsigned long) &_etext;
128 }
130 static void show_guest_stack(struct cpu_user_regs *regs)
131 {
132 int i;
133 unsigned long *stack, addr;
135 if ( HVM_DOMAIN(current) )
136 return;
138 if ( VM86_MODE(regs) )
139 {
140 stack = (unsigned long *)((regs->ss << 4) + (regs->esp & 0xffff));
141 printk("Guest stack trace from ss:sp = %04x:%04x (VM86)\n ",
142 regs->ss, (uint16_t)(regs->esp & 0xffff));
143 }
144 else
145 {
146 stack = (unsigned long *)regs->esp;
147 printk("Guest stack trace from "__OP"sp=%p:\n ", stack);
148 }
150 for ( i = 0; i < (debug_stack_lines*stack_words_per_line); i++ )
151 {
152 if ( ((long)stack & (STACK_SIZE-BYTES_PER_LONG)) == 0 )
153 break;
154 if ( get_user(addr, stack) )
155 {
156 if ( i != 0 )
157 printk("\n ");
158 printk("Fault while accessing guest memory.");
159 i = 1;
160 break;
161 }
162 if ( (i != 0) && ((i % stack_words_per_line) == 0) )
163 printk("\n ");
164 printk("%p ", _p(addr));
165 stack++;
166 }
167 if ( i == 0 )
168 printk("Stack empty.");
169 printk("\n");
170 }
172 #ifdef NDEBUG
174 static void show_trace(struct cpu_user_regs *regs)
175 {
176 unsigned long *stack = ESP_BEFORE_EXCEPTION(regs), addr;
178 printk("Xen call trace:\n ");
180 printk("[<%p>]", _p(regs->eip));
181 print_symbol(" %s\n ", regs->eip);
183 while ( ((long)stack & (STACK_SIZE-BYTES_PER_LONG)) != 0 )
184 {
185 addr = *stack++;
186 if ( is_kernel_text(addr) )
187 {
188 printk("[<%p>]", _p(addr));
189 print_symbol(" %s\n ", addr);
190 }
191 }
193 printk("\n");
194 }
196 #else
198 static void show_trace(struct cpu_user_regs *regs)
199 {
200 unsigned long *frame, next, addr, low, high;
202 printk("Xen call trace:\n ");
204 printk("[<%p>]", _p(regs->eip));
205 print_symbol(" %s\n ", regs->eip);
207 /* Bounds for range of valid frame pointer. */
208 low = (unsigned long)(ESP_BEFORE_EXCEPTION(regs) - 2);
209 high = (low & ~(STACK_SIZE - 1)) +
210 (STACK_SIZE - sizeof(struct cpu_info) - 2*sizeof(unsigned long));
212 /* The initial frame pointer. */
213 next = regs->ebp;
215 for ( ; ; )
216 {
217 /* Valid frame pointer? */
218 if ( (next < low) || (next >= high) )
219 {
220 /*
221 * Exception stack frames have a different layout, denoted by an
222 * inverted frame pointer.
223 */
224 next = ~next;
225 if ( (next < low) || (next >= high) )
226 break;
227 frame = (unsigned long *)next;
228 next = frame[0];
229 addr = frame[(offsetof(struct cpu_user_regs, eip) -
230 offsetof(struct cpu_user_regs, ebp))
231 / BYTES_PER_LONG];
232 }
233 else
234 {
235 /* Ordinary stack frame. */
236 frame = (unsigned long *)next;
237 next = frame[0];
238 addr = frame[1];
239 }
241 printk("[<%p>]", _p(addr));
242 print_symbol(" %s\n ", addr);
244 low = (unsigned long)&frame[2];
245 }
247 printk("\n");
248 }
250 #endif
252 void show_stack(struct cpu_user_regs *regs)
253 {
254 unsigned long *stack = ESP_BEFORE_EXCEPTION(regs), addr;
255 int i;
257 if ( GUEST_MODE(regs) )
258 return show_guest_stack(regs);
260 printk("Xen stack trace from "__OP"sp=%p:\n ", stack);
262 for ( i = 0; i < (debug_stack_lines*stack_words_per_line); i++ )
263 {
264 if ( ((long)stack & (STACK_SIZE-BYTES_PER_LONG)) == 0 )
265 break;
266 if ( (i != 0) && ((i % stack_words_per_line) == 0) )
267 printk("\n ");
268 addr = *stack++;
269 printk("%p ", _p(addr));
270 }
271 if ( i == 0 )
272 printk("Stack empty.");
273 printk("\n");
275 show_trace(regs);
276 }
278 /*
279 * This is called for faults at very unexpected times (e.g., when interrupts
280 * are disabled). In such situations we can't do much that is safe. We try to
281 * print out some tracing and then we just spin.
282 */
283 asmlinkage void fatal_trap(int trapnr, struct cpu_user_regs *regs)
284 {
285 int cpu = smp_processor_id();
286 unsigned long cr2;
287 static char *trapstr[] = {
288 "divide error", "debug", "nmi", "bkpt", "overflow", "bounds",
289 "invalid operation", "device not available", "double fault",
290 "coprocessor segment", "invalid tss", "segment not found",
291 "stack error", "general protection fault", "page fault",
292 "spurious interrupt", "coprocessor error", "alignment check",
293 "machine check", "simd error"
294 };
296 watchdog_disable();
297 console_start_sync();
299 show_registers(regs);
301 if ( trapnr == TRAP_page_fault )
302 {
303 __asm__ __volatile__ ("mov %%cr2,%0" : "=r" (cr2) : );
304 printk("Faulting linear address: %p\n", _p(cr2));
305 show_page_walk(cr2);
306 }
308 printk("************************************\n");
309 printk("CPU%d FATAL TRAP %d (%s), ERROR_CODE %04x%s.\n",
310 cpu, trapnr, trapstr[trapnr], regs->error_code,
311 (regs->eflags & X86_EFLAGS_IF) ? "" : ", IN INTERRUPT CONTEXT");
312 printk("System shutting down -- need manual reset.\n");
313 printk("************************************\n");
315 (void)debugger_trap_fatal(trapnr, regs);
317 /* Lock up the console to prevent spurious output from other CPUs. */
318 console_force_lock();
320 /* Wait for manual reset. */
321 for ( ; ; )
322 __asm__ __volatile__ ( "hlt" );
323 }
325 static inline int do_trap(int trapnr, char *str,
326 struct cpu_user_regs *regs,
327 int use_error_code)
328 {
329 struct vcpu *v = current;
330 struct trap_bounce *tb = &v->arch.trap_bounce;
331 struct trap_info *ti;
332 unsigned long fixup;
334 DEBUGGER_trap_entry(trapnr, regs);
336 if ( !GUEST_MODE(regs) )
337 goto xen_fault;
339 ti = &current->arch.guest_context.trap_ctxt[trapnr];
340 tb->flags = TBF_EXCEPTION;
341 tb->cs = ti->cs;
342 tb->eip = ti->address;
343 if ( use_error_code )
344 {
345 tb->flags |= TBF_EXCEPTION_ERRCODE;
346 tb->error_code = regs->error_code;
347 }
348 if ( TI_GET_IF(ti) )
349 tb->flags |= TBF_INTERRUPT;
350 return 0;
352 xen_fault:
354 if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
355 {
356 DPRINTK("Trap %d: %p -> %p\n", trapnr, _p(regs->eip), _p(fixup));
357 regs->eip = fixup;
358 return 0;
359 }
361 DEBUGGER_trap_fatal(trapnr, regs);
363 show_registers(regs);
364 panic("CPU%d FATAL TRAP: vector = %d (%s)\n"
365 "[error_code=%04x]\n",
366 smp_processor_id(), trapnr, str, regs->error_code);
367 return 0;
368 }
370 #define DO_ERROR_NOCODE(trapnr, str, name) \
371 asmlinkage int do_##name(struct cpu_user_regs *regs) \
372 { \
373 return do_trap(trapnr, str, regs, 0); \
374 }
376 #define DO_ERROR(trapnr, str, name) \
377 asmlinkage int do_##name(struct cpu_user_regs *regs) \
378 { \
379 return do_trap(trapnr, str, regs, 1); \
380 }
382 DO_ERROR_NOCODE( 0, "divide error", divide_error)
383 DO_ERROR_NOCODE( 4, "overflow", overflow)
384 DO_ERROR_NOCODE( 5, "bounds", bounds)
385 DO_ERROR_NOCODE( 6, "invalid operand", invalid_op)
386 DO_ERROR_NOCODE( 9, "coprocessor segment overrun", coprocessor_segment_overrun)
387 DO_ERROR(10, "invalid TSS", invalid_TSS)
388 DO_ERROR(11, "segment not present", segment_not_present)
389 DO_ERROR(12, "stack segment", stack_segment)
390 DO_ERROR_NOCODE(16, "fpu error", coprocessor_error)
391 DO_ERROR(17, "alignment check", alignment_check)
392 DO_ERROR_NOCODE(19, "simd error", simd_coprocessor_error)
394 asmlinkage int do_int3(struct cpu_user_regs *regs)
395 {
396 struct vcpu *v = current;
397 struct trap_bounce *tb = &v->arch.trap_bounce;
398 struct trap_info *ti;
400 DEBUGGER_trap_entry(TRAP_int3, regs);
402 if ( !GUEST_MODE(regs) )
403 {
404 DEBUGGER_trap_fatal(TRAP_int3, regs);
405 show_registers(regs);
406 panic("CPU%d FATAL TRAP: vector = 3 (Int3)\n", smp_processor_id());
407 }
409 ti = &current->arch.guest_context.trap_ctxt[TRAP_int3];
410 tb->flags = TBF_EXCEPTION;
411 tb->cs = ti->cs;
412 tb->eip = ti->address;
413 if ( TI_GET_IF(ti) )
414 tb->flags |= TBF_INTERRUPT;
416 return 0;
417 }
419 asmlinkage int do_machine_check(struct cpu_user_regs *regs)
420 {
421 fatal_trap(TRAP_machine_check, regs);
422 return 0;
423 }
425 void propagate_page_fault(unsigned long addr, u16 error_code)
426 {
427 struct trap_info *ti;
428 struct vcpu *v = current;
429 struct trap_bounce *tb = &v->arch.trap_bounce;
431 v->arch.guest_context.ctrlreg[2] = addr;
432 v->vcpu_info->arch.cr2 = addr;
434 ti = &v->arch.guest_context.trap_ctxt[TRAP_page_fault];
435 tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE;
436 tb->error_code = error_code;
437 tb->cs = ti->cs;
438 tb->eip = ti->address;
439 if ( TI_GET_IF(ti) )
440 tb->flags |= TBF_INTERRUPT;
441 }
443 static int handle_gdt_ldt_mapping_fault(
444 unsigned long offset, struct cpu_user_regs *regs)
445 {
446 extern int map_ldt_shadow_page(unsigned int);
448 struct vcpu *v = current;
449 struct domain *d = v->domain;
450 int ret;
452 /* Which vcpu's area did we fault in, and is it in the ldt sub-area? */
453 unsigned int is_ldt_area = (offset >> (GDT_LDT_VCPU_VA_SHIFT-1)) & 1;
454 unsigned int vcpu_area = (offset >> GDT_LDT_VCPU_VA_SHIFT);
456 /* Should never fault in another vcpu's area. */
457 BUG_ON(vcpu_area != current->vcpu_id);
459 /* Byte offset within the gdt/ldt sub-area. */
460 offset &= (1UL << (GDT_LDT_VCPU_VA_SHIFT-1)) - 1UL;
462 if ( likely(is_ldt_area) )
463 {
464 /* LDT fault: Copy a mapping from the guest's LDT, if it is valid. */
465 LOCK_BIGLOCK(d);
466 ret = map_ldt_shadow_page(offset >> PAGE_SHIFT);
467 UNLOCK_BIGLOCK(d);
469 if ( unlikely(ret == 0) )
470 {
471 /* In hypervisor mode? Leave it to the #PF handler to fix up. */
472 if ( !GUEST_MODE(regs) )
473 return 0;
474 /* In guest mode? Propagate #PF to guest, with adjusted %cr2. */
475 propagate_page_fault(
476 v->arch.guest_context.ldt_base + offset, regs->error_code);
477 }
478 }
479 else
480 {
481 /* GDT fault: handle the fault as #GP(selector). */
482 regs->error_code = (u16)offset & ~7;
483 (void)do_general_protection(regs);
484 }
486 return EXCRET_fault_fixed;
487 }
489 #ifdef HYPERVISOR_VIRT_END
490 #define IN_HYPERVISOR_RANGE(va) \
491 (((va) >= HYPERVISOR_VIRT_START) && ((va) < HYPERVISOR_VIRT_END))
492 #else
493 #define IN_HYPERVISOR_RANGE(va) \
494 (((va) >= HYPERVISOR_VIRT_START))
495 #endif
497 static int fixup_page_fault(unsigned long addr, struct cpu_user_regs *regs)
498 {
499 struct vcpu *v = current;
500 struct domain *d = v->domain;
502 if ( unlikely(IN_HYPERVISOR_RANGE(addr)) )
503 {
504 if ( shadow_mode_external(d) && GUEST_MODE(regs) )
505 return shadow_fault(addr, regs);
506 if ( (addr >= GDT_LDT_VIRT_START) && (addr < GDT_LDT_VIRT_END) )
507 return handle_gdt_ldt_mapping_fault(
508 addr - GDT_LDT_VIRT_START, regs);
509 }
510 else if ( unlikely(shadow_mode_enabled(d)) )
511 {
512 return shadow_fault(addr, regs);
513 }
514 else if ( likely(VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) )
515 {
516 LOCK_BIGLOCK(d);
517 if ( unlikely(d->arch.ptwr[PTWR_PT_ACTIVE].l1va) &&
518 unlikely(l2_linear_offset(addr) ==
519 d->arch.ptwr[PTWR_PT_ACTIVE].l2_idx) )
520 {
521 ptwr_flush(d, PTWR_PT_ACTIVE);
522 UNLOCK_BIGLOCK(d);
523 return EXCRET_fault_fixed;
524 }
526 if ( KERNEL_MODE(v, regs) &&
527 /* Protection violation on write? No reserved-bit violation? */
528 ((regs->error_code & 0xb) == 0x3) &&
529 ptwr_do_page_fault(d, addr, regs) )
530 {
531 UNLOCK_BIGLOCK(d);
532 return EXCRET_fault_fixed;
533 }
534 UNLOCK_BIGLOCK(d);
535 }
537 return 0;
538 }
540 /*
541 * #PF error code:
542 * Bit 0: Protection violation (=1) ; Page not present (=0)
543 * Bit 1: Write access
544 * Bit 2: Supervisor mode
545 * Bit 3: Reserved bit violation
546 * Bit 4: Instruction fetch
547 */
548 asmlinkage int do_page_fault(struct cpu_user_regs *regs)
549 {
550 unsigned long addr, fixup;
551 int rc;
553 __asm__ __volatile__ ("mov %%cr2,%0" : "=r" (addr) : );
555 DEBUGGER_trap_entry(TRAP_page_fault, regs);
557 perfc_incrc(page_faults);
559 if ( unlikely((rc = fixup_page_fault(addr, regs)) != 0) )
560 return rc;
562 if ( unlikely(!GUEST_MODE(regs)) )
563 {
564 if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
565 {
566 perfc_incrc(copy_user_faults);
567 regs->eip = fixup;
568 return 0;
569 }
571 DEBUGGER_trap_fatal(TRAP_page_fault, regs);
573 show_registers(regs);
574 show_page_walk(addr);
575 panic("CPU%d FATAL PAGE FAULT\n"
576 "[error_code=%04x]\n"
577 "Faulting linear address: %p\n",
578 smp_processor_id(), regs->error_code, addr);
579 }
581 propagate_page_fault(addr, regs->error_code);
582 return 0;
583 }
585 long do_fpu_taskswitch(int set)
586 {
587 struct vcpu *v = current;
589 if ( set )
590 {
591 v->arch.guest_context.ctrlreg[0] |= X86_CR0_TS;
592 stts();
593 }
594 else
595 {
596 v->arch.guest_context.ctrlreg[0] &= ~X86_CR0_TS;
597 if ( test_bit(_VCPUF_fpu_dirtied, &v->vcpu_flags) )
598 clts();
599 }
601 return 0;
602 }
604 /* Has the guest requested sufficient permission for this I/O access? */
605 static inline int guest_io_okay(
606 unsigned int port, unsigned int bytes,
607 struct vcpu *v, struct cpu_user_regs *regs)
608 {
609 u16 x;
610 #if defined(__x86_64__)
611 /* If in user mode, switch to kernel mode just to read I/O bitmap. */
612 int user_mode = !(v->arch.flags & TF_kernel_mode);
613 #define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v)
614 #elif defined(__i386__)
615 #define TOGGLE_MODE() ((void)0)
616 #endif
618 if ( v->arch.iopl >= (KERNEL_MODE(v, regs) ? 1 : 3) )
619 return 1;
621 if ( v->arch.iobmp_limit > (port + bytes) )
622 {
623 TOGGLE_MODE();
624 __get_user(x, (u16 *)(v->arch.iobmp+(port>>3)));
625 TOGGLE_MODE();
626 if ( (x & (((1<<bytes)-1) << (port&7))) == 0 )
627 return 1;
628 }
630 return 0;
631 }
633 /* Has the administrator granted sufficient permission for this I/O access? */
634 static inline int admin_io_okay(
635 unsigned int port, unsigned int bytes,
636 struct vcpu *v, struct cpu_user_regs *regs)
637 {
638 return ioports_access_permitted(v->domain, port, port + bytes - 1);
639 }
641 /* Check admin limits. Silently fail the access if it is disallowed. */
642 #define inb_user(_p, _d, _r) (admin_io_okay(_p, 1, _d, _r) ? inb(_p) : ~0)
643 #define inw_user(_p, _d, _r) (admin_io_okay(_p, 2, _d, _r) ? inw(_p) : ~0)
644 #define inl_user(_p, _d, _r) (admin_io_okay(_p, 4, _d, _r) ? inl(_p) : ~0)
645 #define outb_user(_v, _p, _d, _r) \
646 (admin_io_okay(_p, 1, _d, _r) ? outb(_v, _p) : ((void)0))
647 #define outw_user(_v, _p, _d, _r) \
648 (admin_io_okay(_p, 2, _d, _r) ? outw(_v, _p) : ((void)0))
649 #define outl_user(_v, _p, _d, _r) \
650 (admin_io_okay(_p, 4, _d, _r) ? outl(_v, _p) : ((void)0))
652 /* Propagate a fault back to the guest kernel. */
653 #define USER_READ_FAULT 4 /* user mode, read fault */
654 #define USER_WRITE_FAULT 6 /* user mode, write fault */
655 #define PAGE_FAULT(_faultaddr, _errcode) \
656 ({ propagate_page_fault(_faultaddr, _errcode); \
657 return EXCRET_fault_fixed; \
658 })
660 /* Isntruction fetch with error handling. */
661 #define insn_fetch(_type, _size, _ptr) \
662 ({ unsigned long _x; \
663 if ( get_user(_x, (_type *)eip) ) \
664 PAGE_FAULT(eip, USER_READ_FAULT); \
665 eip += _size; (_type)_x; })
667 static int emulate_privileged_op(struct cpu_user_regs *regs)
668 {
669 struct vcpu *v = current;
670 unsigned long *reg, eip = regs->eip, res;
671 u8 opcode, modrm_reg = 0, modrm_rm = 0, rep_prefix = 0;
672 unsigned int port, i, op_bytes = 4, data;
673 u32 l, h;
675 /* Legacy prefixes. */
676 for ( i = 0; i < 8; i++ )
677 {
678 switch ( opcode = insn_fetch(u8, 1, eip) )
679 {
680 case 0x66: /* operand-size override */
681 op_bytes ^= 6; /* switch between 2/4 bytes */
682 break;
683 case 0x67: /* address-size override */
684 case 0x2e: /* CS override */
685 case 0x3e: /* DS override */
686 case 0x26: /* ES override */
687 case 0x64: /* FS override */
688 case 0x65: /* GS override */
689 case 0x36: /* SS override */
690 case 0xf0: /* LOCK */
691 case 0xf2: /* REPNE/REPNZ */
692 break;
693 case 0xf3: /* REP/REPE/REPZ */
694 rep_prefix = 1;
695 break;
696 default:
697 goto done_prefixes;
698 }
699 }
700 done_prefixes:
702 #ifdef __x86_64__
703 /* REX prefix. */
704 if ( (opcode & 0xf0) == 0x40 )
705 {
706 modrm_reg = (opcode & 4) << 1; /* REX.R */
707 modrm_rm = (opcode & 1) << 3; /* REX.B */
709 /* REX.W and REX.X do not need to be decoded. */
710 opcode = insn_fetch(u8, 1, eip);
711 }
712 #endif
714 /* Input/Output String instructions. */
715 if ( (opcode >= 0x6c) && (opcode <= 0x6f) )
716 {
717 if ( rep_prefix && (regs->ecx == 0) )
718 goto done;
720 continue_io_string:
721 switch ( opcode )
722 {
723 case 0x6c: /* INSB */
724 op_bytes = 1;
725 case 0x6d: /* INSW/INSL */
726 if ( !guest_io_okay((u16)regs->edx, op_bytes, v, regs) )
727 goto fail;
728 switch ( op_bytes )
729 {
730 case 1:
731 data = (u8)inb_user((u16)regs->edx, v, regs);
732 if ( put_user((u8)data, (u8 *)regs->edi) )
733 PAGE_FAULT(regs->edi, USER_WRITE_FAULT);
734 break;
735 case 2:
736 data = (u16)inw_user((u16)regs->edx, v, regs);
737 if ( put_user((u16)data, (u16 *)regs->edi) )
738 PAGE_FAULT(regs->edi, USER_WRITE_FAULT);
739 break;
740 case 4:
741 data = (u32)inl_user((u16)regs->edx, v, regs);
742 if ( put_user((u32)data, (u32 *)regs->edi) )
743 PAGE_FAULT(regs->edi, USER_WRITE_FAULT);
744 break;
745 }
746 regs->edi += (regs->eflags & EF_DF) ? -op_bytes : op_bytes;
747 break;
749 case 0x6e: /* OUTSB */
750 op_bytes = 1;
751 case 0x6f: /* OUTSW/OUTSL */
752 if ( !guest_io_okay((u16)regs->edx, op_bytes, v, regs) )
753 goto fail;
754 switch ( op_bytes )
755 {
756 case 1:
757 if ( get_user(data, (u8 *)regs->esi) )
758 PAGE_FAULT(regs->esi, USER_READ_FAULT);
759 outb_user((u8)data, (u16)regs->edx, v, regs);
760 break;
761 case 2:
762 if ( get_user(data, (u16 *)regs->esi) )
763 PAGE_FAULT(regs->esi, USER_READ_FAULT);
764 outw_user((u16)data, (u16)regs->edx, v, regs);
765 break;
766 case 4:
767 if ( get_user(data, (u32 *)regs->esi) )
768 PAGE_FAULT(regs->esi, USER_READ_FAULT);
769 outl_user((u32)data, (u16)regs->edx, v, regs);
770 break;
771 }
772 regs->esi += (regs->eflags & EF_DF) ? -op_bytes : op_bytes;
773 break;
774 }
776 if ( rep_prefix && (--regs->ecx != 0) )
777 {
778 if ( !hypercall_preempt_check() )
779 goto continue_io_string;
780 eip = regs->eip;
781 }
783 goto done;
784 }
786 /* I/O Port and Interrupt Flag instructions. */
787 switch ( opcode )
788 {
789 case 0xe4: /* IN imm8,%al */
790 op_bytes = 1;
791 case 0xe5: /* IN imm8,%eax */
792 port = insn_fetch(u8, 1, eip);
793 exec_in:
794 if ( !guest_io_okay(port, op_bytes, v, regs) )
795 goto fail;
796 switch ( op_bytes )
797 {
798 case 1:
799 regs->eax &= ~0xffUL;
800 regs->eax |= (u8)inb_user(port, v, regs);
801 break;
802 case 2:
803 regs->eax &= ~0xffffUL;
804 regs->eax |= (u16)inw_user(port, v, regs);
805 break;
806 case 4:
807 regs->eax = (u32)inl_user(port, v, regs);
808 break;
809 }
810 goto done;
812 case 0xec: /* IN %dx,%al */
813 op_bytes = 1;
814 case 0xed: /* IN %dx,%eax */
815 port = (u16)regs->edx;
816 goto exec_in;
818 case 0xe6: /* OUT %al,imm8 */
819 op_bytes = 1;
820 case 0xe7: /* OUT %eax,imm8 */
821 port = insn_fetch(u8, 1, eip);
822 exec_out:
823 if ( !guest_io_okay(port, op_bytes, v, regs) )
824 goto fail;
825 switch ( op_bytes )
826 {
827 case 1:
828 outb_user((u8)regs->eax, port, v, regs);
829 break;
830 case 2:
831 outw_user((u16)regs->eax, port, v, regs);
832 break;
833 case 4:
834 outl_user((u32)regs->eax, port, v, regs);
835 break;
836 }
837 goto done;
839 case 0xee: /* OUT %al,%dx */
840 op_bytes = 1;
841 case 0xef: /* OUT %eax,%dx */
842 port = (u16)regs->edx;
843 goto exec_out;
845 case 0xfa: /* CLI */
846 case 0xfb: /* STI */
847 if ( v->arch.iopl < (KERNEL_MODE(v, regs) ? 1 : 3) )
848 goto fail;
849 /*
850 * This is just too dangerous to allow, in my opinion. Consider if the
851 * caller then tries to reenable interrupts using POPF: we can't trap
852 * that and we'll end up with hard-to-debug lockups. Fast & loose will
853 * do for us. :-)
854 */
855 /*v->vcpu_info->evtchn_upcall_mask = (opcode == 0xfa);*/
856 goto done;
858 case 0x0f: /* Two-byte opcode */
859 break;
861 default:
862 goto fail;
863 }
865 /* Remaining instructions only emulated from guest kernel. */
866 if ( !KERNEL_MODE(v, regs) )
867 goto fail;
869 /* Privileged (ring 0) instructions. */
870 opcode = insn_fetch(u8, 1, eip);
871 switch ( opcode )
872 {
873 case 0x06: /* CLTS */
874 (void)do_fpu_taskswitch(0);
875 break;
877 case 0x09: /* WBINVD */
878 /* Ignore the instruction if unprivileged. */
879 if ( !cache_flush_permitted(v->domain) )
880 DPRINTK("Non-physdev domain attempted WBINVD.\n");
881 else
882 wbinvd();
883 break;
885 case 0x20: /* MOV CR?,<reg> */
886 opcode = insn_fetch(u8, 1, eip);
887 modrm_reg |= (opcode >> 3) & 7;
888 modrm_rm |= (opcode >> 0) & 7;
889 reg = decode_register(modrm_rm, regs, 0);
890 switch ( modrm_reg )
891 {
892 case 0: /* Read CR0 */
893 *reg = (read_cr0() & ~X86_CR0_TS) |
894 v->arch.guest_context.ctrlreg[0];
895 break;
897 case 2: /* Read CR2 */
898 *reg = v->arch.guest_context.ctrlreg[2];
899 break;
901 case 3: /* Read CR3 */
902 *reg = pagetable_get_paddr(v->arch.guest_table);
903 break;
905 case 4: /* Read CR4 */
906 /*
907 * Guests can read CR4 to see what features Xen has enabled. We
908 * therefore lie about PGE & PSE as they are unavailable to guests.
909 */
910 *reg = read_cr4() & ~(X86_CR4_PGE|X86_CR4_PSE);
911 break;
913 default:
914 goto fail;
915 }
916 break;
918 case 0x21: /* MOV DR?,<reg> */
919 opcode = insn_fetch(u8, 1, eip);
920 modrm_reg |= (opcode >> 3) & 7;
921 modrm_rm |= (opcode >> 0) & 7;
922 reg = decode_register(modrm_rm, regs, 0);
923 if ( (res = do_get_debugreg(modrm_reg)) > (unsigned long)-256 )
924 goto fail;
925 *reg = res;
926 break;
928 case 0x22: /* MOV <reg>,CR? */
929 opcode = insn_fetch(u8, 1, eip);
930 modrm_reg |= (opcode >> 3) & 7;
931 modrm_rm |= (opcode >> 0) & 7;
932 reg = decode_register(modrm_rm, regs, 0);
933 switch ( modrm_reg )
934 {
935 case 0: /* Write CR0 */
936 if ( (*reg ^ read_cr0()) & ~X86_CR0_TS )
937 {
938 DPRINTK("Attempt to change unmodifiable CR0 flags.\n");
939 goto fail;
940 }
941 (void)do_fpu_taskswitch(!!(*reg & X86_CR0_TS));
942 break;
944 case 2: /* Write CR2 */
945 v->arch.guest_context.ctrlreg[2] = *reg;
946 v->vcpu_info->arch.cr2 = *reg;
947 break;
949 case 3: /* Write CR3 */
950 LOCK_BIGLOCK(v->domain);
951 (void)new_guest_cr3(*reg);
952 UNLOCK_BIGLOCK(v->domain);
953 break;
955 case 4:
956 if ( *reg != (read_cr4() & ~(X86_CR4_PGE|X86_CR4_PSE)) )
957 {
958 DPRINTK("Attempt to change CR4 flags.\n");
959 goto fail;
960 }
961 break;
963 default:
964 goto fail;
965 }
966 break;
968 case 0x23: /* MOV <reg>,DR? */
969 opcode = insn_fetch(u8, 1, eip);
970 modrm_reg |= (opcode >> 3) & 7;
971 modrm_rm |= (opcode >> 0) & 7;
972 reg = decode_register(modrm_rm, regs, 0);
973 if ( do_set_debugreg(modrm_reg, *reg) != 0 )
974 goto fail;
975 break;
977 case 0x30: /* WRMSR */
978 switch ( regs->ecx )
979 {
980 #ifdef CONFIG_X86_64
981 case MSR_FS_BASE:
982 if ( wrmsr_user(MSR_FS_BASE, regs->eax, regs->edx) )
983 goto fail;
984 v->arch.guest_context.fs_base =
985 ((u64)regs->edx << 32) | regs->eax;
986 break;
987 case MSR_GS_BASE:
988 if ( wrmsr_user(MSR_GS_BASE, regs->eax, regs->edx) )
989 goto fail;
990 v->arch.guest_context.gs_base_kernel =
991 ((u64)regs->edx << 32) | regs->eax;
992 break;
993 case MSR_SHADOW_GS_BASE:
994 if ( wrmsr_user(MSR_SHADOW_GS_BASE, regs->eax, regs->edx) )
995 goto fail;
996 v->arch.guest_context.gs_base_user =
997 ((u64)regs->edx << 32) | regs->eax;
998 break;
999 #endif
1000 default:
1001 if ( (rdmsr_user(regs->ecx, l, h) != 0) ||
1002 (regs->ecx != MSR_EFER) ||
1003 (regs->eax != l) || (regs->edx != h) )
1004 DPRINTK("Domain attempted WRMSR %p from "
1005 "%08x:%08x to %08lx:%08lx.\n",
1006 _p(regs->ecx), h, l, (long)regs->edx, (long)regs->eax);
1007 break;
1009 break;
1011 case 0x32: /* RDMSR */
1012 switch ( regs->ecx )
1014 #ifdef CONFIG_X86_64
1015 case MSR_FS_BASE:
1016 regs->eax = v->arch.guest_context.fs_base & 0xFFFFFFFFUL;
1017 regs->edx = v->arch.guest_context.fs_base >> 32;
1018 break;
1019 case MSR_GS_BASE:
1020 regs->eax = v->arch.guest_context.gs_base_kernel & 0xFFFFFFFFUL;
1021 regs->edx = v->arch.guest_context.gs_base_kernel >> 32;
1022 break;
1023 case MSR_SHADOW_GS_BASE:
1024 regs->eax = v->arch.guest_context.gs_base_user & 0xFFFFFFFFUL;
1025 regs->edx = v->arch.guest_context.gs_base_user >> 32;
1026 break;
1027 #endif
1028 case MSR_EFER:
1029 if ( rdmsr_user(regs->ecx, regs->eax, regs->edx) )
1030 goto fail;
1031 break;
1032 default:
1033 DPRINTK("Domain attempted RDMSR %p.\n", _p(regs->ecx));
1034 /* Everyone can read the MSR space. */
1035 if ( rdmsr_user(regs->ecx, regs->eax, regs->edx) )
1036 goto fail;
1037 break;
1039 break;
1041 default:
1042 goto fail;
1045 done:
1046 regs->eip = eip;
1047 return EXCRET_fault_fixed;
1049 fail:
1050 return 0;
1053 asmlinkage int do_general_protection(struct cpu_user_regs *regs)
1055 struct vcpu *v = current;
1056 struct trap_bounce *tb = &v->arch.trap_bounce;
1057 struct trap_info *ti;
1058 unsigned long fixup;
1060 DEBUGGER_trap_entry(TRAP_gp_fault, regs);
1062 if ( regs->error_code & 1 )
1063 goto hardware_gp;
1065 if ( !GUEST_MODE(regs) )
1066 goto gp_in_kernel;
1068 /*
1069 * Cunning trick to allow arbitrary "INT n" handling.
1071 * We set DPL == 0 on all vectors in the IDT. This prevents any INT <n>
1072 * instruction from trapping to the appropriate vector, when that might not
1073 * be expected by Xen or the guest OS. For example, that entry might be for
1074 * a fault handler (unlike traps, faults don't increment EIP), or might
1075 * expect an error code on the stack (which a software trap never
1076 * provides), or might be a hardware interrupt handler that doesn't like
1077 * being called spuriously.
1079 * Instead, a GPF occurs with the faulting IDT vector in the error code.
1080 * Bit 1 is set to indicate that an IDT entry caused the fault. Bit 0 is
1081 * clear to indicate that it's a software fault, not hardware.
1083 * NOTE: Vectors 3 and 4 are dealt with from their own handler. This is
1084 * okay because they can only be triggered by an explicit DPL-checked
1085 * instruction. The DPL specified by the guest OS for these vectors is NOT
1086 * CHECKED!!
1087 */
1088 if ( (regs->error_code & 3) == 2 )
1090 /* This fault must be due to <INT n> instruction. */
1091 ti = &current->arch.guest_context.trap_ctxt[regs->error_code>>3];
1092 if ( PERMIT_SOFTINT(TI_GET_DPL(ti), v, regs) )
1094 tb->flags = TBF_EXCEPTION;
1095 regs->eip += 2;
1096 goto finish_propagation;
1100 /* Emulate some simple privileged and I/O instructions. */
1101 if ( (regs->error_code == 0) &&
1102 emulate_privileged_op(regs) )
1103 return 0;
1105 #if defined(__i386__)
1106 if ( VM_ASSIST(v->domain, VMASST_TYPE_4gb_segments) &&
1107 (regs->error_code == 0) &&
1108 gpf_emulate_4gb(regs) )
1109 return 0;
1110 #endif
1112 /* Pass on GPF as is. */
1113 ti = &current->arch.guest_context.trap_ctxt[TRAP_gp_fault];
1114 tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE;
1115 tb->error_code = regs->error_code;
1116 finish_propagation:
1117 tb->cs = ti->cs;
1118 tb->eip = ti->address;
1119 if ( TI_GET_IF(ti) )
1120 tb->flags |= TBF_INTERRUPT;
1121 return 0;
1123 gp_in_kernel:
1125 if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
1127 DPRINTK("GPF (%04x): %p -> %p\n",
1128 regs->error_code, _p(regs->eip), _p(fixup));
1129 regs->eip = fixup;
1130 return 0;
1133 DEBUGGER_trap_fatal(TRAP_gp_fault, regs);
1135 hardware_gp:
1136 show_registers(regs);
1137 panic("CPU%d GENERAL PROTECTION FAULT\n[error_code=%04x]\n",
1138 smp_processor_id(), regs->error_code);
1139 return 0;
1142 static void nmi_softirq(void)
1144 /* Only used to defer wakeup of dom0,vcpu0 to a safe (non-NMI) context. */
1145 evtchn_notify(dom0->vcpu[0]);
1148 static void nmi_dom0_report(unsigned int reason_idx)
1150 struct domain *d;
1152 if ( (d = dom0) == NULL )
1153 return;
1155 set_bit(reason_idx, &d->shared_info->arch.nmi_reason);
1157 if ( test_and_set_bit(_VCPUF_nmi_pending, &d->vcpu[0]->vcpu_flags) )
1158 raise_softirq(NMI_SOFTIRQ); /* not safe to wake up a vcpu here */
1161 asmlinkage void mem_parity_error(struct cpu_user_regs *regs)
1163 switch ( opt_nmi[0] )
1165 case 'd': /* 'dom0' */
1166 nmi_dom0_report(_XEN_NMIREASON_parity_error);
1167 case 'i': /* 'ignore' */
1168 break;
1169 default: /* 'fatal' */
1170 console_force_unlock();
1171 printk("\n\nNMI - MEMORY ERROR\n");
1172 fatal_trap(TRAP_nmi, regs);
1175 outb((inb(0x61) & 0x0f) | 0x04, 0x61); /* clear-and-disable parity check */
1176 mdelay(1);
1177 outb((inb(0x61) & 0x0b) | 0x00, 0x61); /* enable parity check */
1180 asmlinkage void io_check_error(struct cpu_user_regs *regs)
1182 switch ( opt_nmi[0] )
1184 case 'd': /* 'dom0' */
1185 nmi_dom0_report(_XEN_NMIREASON_io_error);
1186 case 'i': /* 'ignore' */
1187 break;
1188 default: /* 'fatal' */
1189 console_force_unlock();
1190 printk("\n\nNMI - I/O ERROR\n");
1191 fatal_trap(TRAP_nmi, regs);
1194 outb((inb(0x61) & 0x0f) | 0x08, 0x61); /* clear-and-disable IOCK */
1195 mdelay(1);
1196 outb((inb(0x61) & 0x07) | 0x00, 0x61); /* enable IOCK */
1199 static void unknown_nmi_error(unsigned char reason)
1201 switch ( opt_nmi[0] )
1203 case 'd': /* 'dom0' */
1204 nmi_dom0_report(_XEN_NMIREASON_unknown);
1205 case 'i': /* 'ignore' */
1206 break;
1207 default: /* 'fatal' */
1208 printk("Uhhuh. NMI received for unknown reason %02x.\n", reason);
1209 printk("Dazed and confused, but trying to continue\n");
1210 printk("Do you have a strange power saving mode enabled?\n");
1214 static int dummy_nmi_callback(struct cpu_user_regs *regs, int cpu)
1216 return 0;
1219 static nmi_callback_t nmi_callback = dummy_nmi_callback;
1221 asmlinkage void do_nmi(struct cpu_user_regs *regs)
1223 unsigned int cpu = smp_processor_id();
1224 unsigned char reason;
1226 ++nmi_count(cpu);
1228 if ( nmi_callback(regs, cpu) )
1229 return;
1231 if ( nmi_watchdog )
1232 nmi_watchdog_tick(regs);
1234 /* Only the BSP gets external NMIs from the system. */
1235 if ( cpu == 0 )
1237 reason = inb(0x61);
1238 if ( reason & 0x80 )
1239 mem_parity_error(regs);
1240 else if ( reason & 0x40 )
1241 io_check_error(regs);
1242 else if ( !nmi_watchdog )
1243 unknown_nmi_error((unsigned char)(reason&0xff));
1247 void set_nmi_callback(nmi_callback_t callback)
1249 nmi_callback = callback;
1252 void unset_nmi_callback(void)
1254 nmi_callback = dummy_nmi_callback;
1257 asmlinkage int math_state_restore(struct cpu_user_regs *regs)
1259 struct trap_bounce *tb;
1260 struct trap_info *ti;
1262 /* Prevent recursion. */
1263 clts();
1265 setup_fpu(current);
1267 if ( current->arch.guest_context.ctrlreg[0] & X86_CR0_TS )
1269 tb = &current->arch.trap_bounce;
1270 ti = &current->arch.guest_context.trap_ctxt[TRAP_no_device];
1272 tb->flags = TBF_EXCEPTION;
1273 tb->cs = ti->cs;
1274 tb->eip = ti->address;
1275 if ( TI_GET_IF(ti) )
1276 tb->flags |= TBF_INTERRUPT;
1278 current->arch.guest_context.ctrlreg[0] &= ~X86_CR0_TS;
1281 return EXCRET_fault_fixed;
1284 asmlinkage int do_debug(struct cpu_user_regs *regs)
1286 unsigned long condition;
1287 struct vcpu *v = current;
1288 struct trap_bounce *tb = &v->arch.trap_bounce;
1289 struct trap_info *ti;
1291 __asm__ __volatile__("mov %%db6,%0" : "=r" (condition));
1293 /* Mask out spurious debug traps due to lazy DR7 setting */
1294 if ( (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) &&
1295 (v->arch.guest_context.debugreg[7] == 0) )
1297 __asm__("mov %0,%%db7" : : "r" (0UL));
1298 goto out;
1301 DEBUGGER_trap_entry(TRAP_debug, regs);
1303 if ( !GUEST_MODE(regs) )
1305 /* Clear TF just for absolute sanity. */
1306 regs->eflags &= ~EF_TF;
1307 /*
1308 * We ignore watchpoints when they trigger within Xen. This may happen
1309 * when a buffer is passed to us which previously had a watchpoint set
1310 * on it. No need to bump EIP; the only faulting trap is an instruction
1311 * breakpoint, which can't happen to us.
1312 */
1313 goto out;
1316 /* Save debug status register where guest OS can peek at it */
1317 v->arch.guest_context.debugreg[6] = condition;
1319 ti = &v->arch.guest_context.trap_ctxt[TRAP_debug];
1320 tb->flags = TBF_EXCEPTION;
1321 tb->cs = ti->cs;
1322 tb->eip = ti->address;
1323 if ( TI_GET_IF(ti) )
1324 tb->flags |= TBF_INTERRUPT;
1326 out:
1327 return EXCRET_not_a_fault;
1330 asmlinkage int do_spurious_interrupt_bug(struct cpu_user_regs *regs)
1332 return EXCRET_not_a_fault;
1335 void set_intr_gate(unsigned int n, void *addr)
1337 #ifdef __i386__
1338 int i;
1339 /* Keep secondary tables in sync with IRQ updates. */
1340 for ( i = 1; i < NR_CPUS; i++ )
1341 if ( idt_tables[i] != NULL )
1342 _set_gate(&idt_tables[i][n], 14, 0, addr);
1343 #endif
1344 _set_gate(&idt_table[n], 14, 0, addr);
1347 void set_system_gate(unsigned int n, void *addr)
1349 _set_gate(idt_table+n,14,3,addr);
1352 void set_task_gate(unsigned int n, unsigned int sel)
1354 idt_table[n].a = sel << 16;
1355 idt_table[n].b = 0x8500;
1358 void set_tss_desc(unsigned int n, void *addr)
1360 _set_tssldt_desc(
1361 gdt_table + __TSS(n) - FIRST_RESERVED_GDT_ENTRY,
1362 (unsigned long)addr,
1363 offsetof(struct tss_struct, __cacheline_filler) - 1,
1364 9);
1367 void __init trap_init(void)
1369 extern void percpu_traps_init(void);
1371 /*
1372 * Note that interrupt gates are always used, rather than trap gates. We
1373 * must have interrupts disabled until DS/ES/FS/GS are saved because the
1374 * first activation must have the "bad" value(s) for these registers and
1375 * we may lose them if another activation is installed before they are
1376 * saved. The page-fault handler also needs interrupts disabled until %cr2
1377 * has been read and saved on the stack.
1378 */
1379 set_intr_gate(TRAP_divide_error,&divide_error);
1380 set_intr_gate(TRAP_debug,&debug);
1381 set_intr_gate(TRAP_nmi,&nmi);
1382 set_system_gate(TRAP_int3,&int3); /* usable from all privileges */
1383 set_system_gate(TRAP_overflow,&overflow); /* usable from all privileges */
1384 set_intr_gate(TRAP_bounds,&bounds);
1385 set_intr_gate(TRAP_invalid_op,&invalid_op);
1386 set_intr_gate(TRAP_no_device,&device_not_available);
1387 set_intr_gate(TRAP_copro_seg,&coprocessor_segment_overrun);
1388 set_intr_gate(TRAP_invalid_tss,&invalid_TSS);
1389 set_intr_gate(TRAP_no_segment,&segment_not_present);
1390 set_intr_gate(TRAP_stack_error,&stack_segment);
1391 set_intr_gate(TRAP_gp_fault,&general_protection);
1392 set_intr_gate(TRAP_page_fault,&page_fault);
1393 set_intr_gate(TRAP_spurious_int,&spurious_interrupt_bug);
1394 set_intr_gate(TRAP_copro_error,&coprocessor_error);
1395 set_intr_gate(TRAP_alignment_check,&alignment_check);
1396 set_intr_gate(TRAP_machine_check,&machine_check);
1397 set_intr_gate(TRAP_simd_error,&simd_coprocessor_error);
1399 percpu_traps_init();
1401 cpu_init();
1403 open_softirq(NMI_SOFTIRQ, nmi_softirq);
1407 long do_set_trap_table(struct trap_info *traps)
1409 struct trap_info cur;
1410 struct trap_info *dst = current->arch.guest_context.trap_ctxt;
1411 long rc = 0;
1413 LOCK_BIGLOCK(current->domain);
1415 for ( ; ; )
1417 if ( hypercall_preempt_check() )
1419 rc = hypercall1_create_continuation(
1420 __HYPERVISOR_set_trap_table, traps);
1421 break;
1424 if ( copy_from_user(&cur, traps, sizeof(cur)) )
1426 rc = -EFAULT;
1427 break;
1430 if ( cur.address == 0 )
1431 break;
1433 if ( !VALID_CODESEL(cur.cs) )
1435 rc = -EPERM;
1436 break;
1439 memcpy(&dst[cur.vector], &cur, sizeof(cur));
1441 if ( cur.vector == 0x80 )
1442 init_int80_direct_trap(current);
1444 traps++;
1447 UNLOCK_BIGLOCK(current->domain);
1449 return rc;
1453 long set_debugreg(struct vcpu *p, int reg, unsigned long value)
1455 int i;
1457 switch ( reg )
1459 case 0:
1460 if ( !access_ok(value, sizeof(long)) )
1461 return -EPERM;
1462 if ( p == current )
1463 __asm__ ( "mov %0, %%db0" : : "r" (value) );
1464 break;
1465 case 1:
1466 if ( !access_ok(value, sizeof(long)) )
1467 return -EPERM;
1468 if ( p == current )
1469 __asm__ ( "mov %0, %%db1" : : "r" (value) );
1470 break;
1471 case 2:
1472 if ( !access_ok(value, sizeof(long)) )
1473 return -EPERM;
1474 if ( p == current )
1475 __asm__ ( "mov %0, %%db2" : : "r" (value) );
1476 break;
1477 case 3:
1478 if ( !access_ok(value, sizeof(long)) )
1479 return -EPERM;
1480 if ( p == current )
1481 __asm__ ( "mov %0, %%db3" : : "r" (value) );
1482 break;
1483 case 6:
1484 /*
1485 * DR6: Bits 4-11,16-31 reserved (set to 1).
1486 * Bit 12 reserved (set to 0).
1487 */
1488 value &= 0xffffefff; /* reserved bits => 0 */
1489 value |= 0xffff0ff0; /* reserved bits => 1 */
1490 if ( p == current )
1491 __asm__ ( "mov %0, %%db6" : : "r" (value) );
1492 break;
1493 case 7:
1494 /*
1495 * DR7: Bit 10 reserved (set to 1).
1496 * Bits 11-12,14-15 reserved (set to 0).
1497 * Privileged bits:
1498 * GD (bit 13): must be 0.
1499 * R/Wn (bits 16-17,20-21,24-25,28-29): mustn't be 10.
1500 * LENn (bits 18-19,22-23,26-27,30-31): mustn't be 10.
1501 */
1502 /* DR7 == 0 => debugging disabled for this domain. */
1503 if ( value != 0 )
1505 value &= 0xffff27ff; /* reserved bits => 0 */
1506 value |= 0x00000400; /* reserved bits => 1 */
1507 if ( (value & (1<<13)) != 0 ) return -EPERM;
1508 for ( i = 0; i < 16; i += 2 )
1509 if ( ((value >> (i+16)) & 3) == 2 ) return -EPERM;
1511 if ( p == current )
1512 __asm__ ( "mov %0, %%db7" : : "r" (value) );
1513 break;
1514 default:
1515 return -EINVAL;
1518 p->arch.guest_context.debugreg[reg] = value;
1519 return 0;
1522 long do_set_debugreg(int reg, unsigned long value)
1524 return set_debugreg(current, reg, value);
1527 unsigned long do_get_debugreg(int reg)
1529 if ( (reg < 0) || (reg > 7) ) return -EINVAL;
1530 return current->arch.guest_context.debugreg[reg];
1533 /*
1534 * Local variables:
1535 * mode: C
1536 * c-set-style: "BSD"
1537 * c-basic-offset: 4
1538 * tab-width: 4
1539 * indent-tabs-mode: nil
1540 * End:
1541 */