direct-io.hg

view xen/arch/x86/traps.c @ 8438:829517be689f

Modify CR0 access emulation -- return physical CR0 (except
for TS) and allow only the same physical flags to be written
back to CR0 by a guest.

Add write-to-CR4 emulation, but check that the write does not
modify any CR4 flags.

Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Fri Dec 23 16:42:46 2005 +0100 (2005-12-23)
parents 931acb64fbaf
children b54e981957eb
line source
1 /******************************************************************************
2 * arch/x86/traps.c
3 *
4 * Modifications to Linux original are copyright (c) 2002-2004, K A Fraser
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
21 /*
22 * Copyright (C) 1991, 1992 Linus Torvalds
23 *
24 * Pentium III FXSR, SSE support
25 * Gareth Hughes <gareth@valinux.com>, May 2000
26 */
28 #include <xen/config.h>
29 #include <xen/init.h>
30 #include <xen/sched.h>
31 #include <xen/lib.h>
32 #include <xen/errno.h>
33 #include <xen/mm.h>
34 #include <xen/console.h>
35 #include <asm/regs.h>
36 #include <xen/delay.h>
37 #include <xen/event.h>
38 #include <xen/spinlock.h>
39 #include <xen/irq.h>
40 #include <xen/perfc.h>
41 #include <xen/softirq.h>
42 #include <xen/domain_page.h>
43 #include <xen/symbols.h>
44 #include <asm/shadow.h>
45 #include <asm/system.h>
46 #include <asm/io.h>
47 #include <asm/atomic.h>
48 #include <asm/desc.h>
49 #include <asm/debugreg.h>
50 #include <asm/smp.h>
51 #include <asm/flushtlb.h>
52 #include <asm/uaccess.h>
53 #include <asm/i387.h>
54 #include <asm/debugger.h>
55 #include <asm/msr.h>
56 #include <asm/x86_emulate.h>
57 #include <asm/nmi.h>
59 /*
60 * opt_nmi: one of 'ignore', 'dom0', or 'fatal'.
61 * fatal: Xen prints diagnostic message and then hangs.
62 * dom0: The NMI is virtualised to DOM0.
63 * ignore: The NMI error is cleared and ignored.
64 */
65 #ifdef NDEBUG
66 char opt_nmi[10] = "dom0";
67 #else
68 char opt_nmi[10] = "fatal";
69 #endif
70 string_param("nmi", opt_nmi);
72 /* Master table, used by all CPUs on x86/64, and by CPU0 on x86/32.*/
73 idt_entry_t idt_table[IDT_ENTRIES];
75 #define DECLARE_TRAP_HANDLER(_name) \
76 asmlinkage void _name(void); \
77 asmlinkage int do_ ## _name(struct cpu_user_regs *regs)
79 asmlinkage void nmi(void);
80 DECLARE_TRAP_HANDLER(divide_error);
81 DECLARE_TRAP_HANDLER(debug);
82 DECLARE_TRAP_HANDLER(int3);
83 DECLARE_TRAP_HANDLER(overflow);
84 DECLARE_TRAP_HANDLER(bounds);
85 DECLARE_TRAP_HANDLER(invalid_op);
86 DECLARE_TRAP_HANDLER(device_not_available);
87 DECLARE_TRAP_HANDLER(coprocessor_segment_overrun);
88 DECLARE_TRAP_HANDLER(invalid_TSS);
89 DECLARE_TRAP_HANDLER(segment_not_present);
90 DECLARE_TRAP_HANDLER(stack_segment);
91 DECLARE_TRAP_HANDLER(general_protection);
92 DECLARE_TRAP_HANDLER(page_fault);
93 DECLARE_TRAP_HANDLER(coprocessor_error);
94 DECLARE_TRAP_HANDLER(simd_coprocessor_error);
95 DECLARE_TRAP_HANDLER(alignment_check);
96 DECLARE_TRAP_HANDLER(spurious_interrupt_bug);
97 DECLARE_TRAP_HANDLER(machine_check);
99 long do_set_debugreg(int reg, unsigned long value);
100 unsigned long do_get_debugreg(int reg);
102 static int debug_stack_lines = 20;
103 integer_param("debug_stack_lines", debug_stack_lines);
105 #ifdef CONFIG_X86_32
106 #define stack_words_per_line 8
107 #define ESP_BEFORE_EXCEPTION(regs) ((unsigned long *)&regs->esp)
108 #else
109 #define stack_words_per_line 4
110 #define ESP_BEFORE_EXCEPTION(regs) ((unsigned long *)regs->rsp)
111 #endif
113 int is_kernel_text(unsigned long addr)
114 {
115 extern char _stext, _etext;
116 if (addr >= (unsigned long) &_stext &&
117 addr <= (unsigned long) &_etext)
118 return 1;
119 return 0;
121 }
123 unsigned long kernel_text_end(void)
124 {
125 extern char _etext;
126 return (unsigned long) &_etext;
127 }
129 static void show_guest_stack(struct cpu_user_regs *regs)
130 {
131 int i;
132 unsigned long *stack = (unsigned long *)regs->esp, addr;
134 printk("Guest stack trace from "__OP"sp=%p:\n ", stack);
136 for ( i = 0; i < (debug_stack_lines*stack_words_per_line); i++ )
137 {
138 if ( ((long)stack & (STACK_SIZE-BYTES_PER_LONG)) == 0 )
139 break;
140 if ( get_user(addr, stack) )
141 {
142 if ( i != 0 )
143 printk("\n ");
144 printk("Fault while accessing guest memory.");
145 i = 1;
146 break;
147 }
148 if ( (i != 0) && ((i % stack_words_per_line) == 0) )
149 printk("\n ");
150 printk("%p ", _p(addr));
151 stack++;
152 }
153 if ( i == 0 )
154 printk("Stack empty.");
155 printk("\n");
156 }
158 #ifdef NDEBUG
160 static void show_trace(struct cpu_user_regs *regs)
161 {
162 unsigned long *stack = ESP_BEFORE_EXCEPTION(regs), addr;
164 printk("Xen call trace:\n ");
166 printk("[<%p>]", _p(regs->eip));
167 print_symbol(" %s\n ", regs->eip);
169 while ( ((long)stack & (STACK_SIZE-BYTES_PER_LONG)) != 0 )
170 {
171 addr = *stack++;
172 if ( is_kernel_text(addr) )
173 {
174 printk("[<%p>]", _p(addr));
175 print_symbol(" %s\n ", addr);
176 }
177 }
179 printk("\n");
180 }
182 #else
184 static void show_trace(struct cpu_user_regs *regs)
185 {
186 unsigned long *frame, next, addr, low, high;
188 printk("Xen call trace:\n ");
190 printk("[<%p>]", _p(regs->eip));
191 print_symbol(" %s\n ", regs->eip);
193 /* Bounds for range of valid frame pointer. */
194 low = (unsigned long)(ESP_BEFORE_EXCEPTION(regs) - 2);
195 high = (low & ~(STACK_SIZE - 1)) + (STACK_SIZE - sizeof(struct cpu_info));
197 /* The initial frame pointer. */
198 next = regs->ebp;
200 for ( ; ; )
201 {
202 /* Valid frame pointer? */
203 if ( (next < low) || (next > high) )
204 {
205 /*
206 * Exception stack frames have a different layout, denoted by an
207 * inverted frame pointer.
208 */
209 next = ~next;
210 if ( (next < low) || (next > high) )
211 break;
212 frame = (unsigned long *)next;
213 next = frame[0];
214 addr = frame[(offsetof(struct cpu_user_regs, eip) -
215 offsetof(struct cpu_user_regs, ebp))
216 / BYTES_PER_LONG];
217 }
218 else
219 {
220 /* Ordinary stack frame. */
221 frame = (unsigned long *)next;
222 next = frame[0];
223 addr = frame[1];
224 }
226 printk("[<%p>]", _p(addr));
227 print_symbol(" %s\n ", addr);
229 low = (unsigned long)&frame[2];
230 }
232 printk("\n");
233 }
235 #endif
237 void show_stack(struct cpu_user_regs *regs)
238 {
239 unsigned long *stack = ESP_BEFORE_EXCEPTION(regs), addr;
240 int i;
242 if ( GUEST_CONTEXT(current, regs) )
243 return show_guest_stack(regs);
245 printk("Xen stack trace from "__OP"sp=%p:\n ", stack);
247 for ( i = 0; i < (debug_stack_lines*stack_words_per_line); i++ )
248 {
249 if ( ((long)stack & (STACK_SIZE-BYTES_PER_LONG)) == 0 )
250 break;
251 if ( (i != 0) && ((i % stack_words_per_line) == 0) )
252 printk("\n ");
253 addr = *stack++;
254 printk("%p ", _p(addr));
255 }
256 if ( i == 0 )
257 printk("Stack empty.");
258 printk("\n");
260 show_trace(regs);
261 }
263 /*
264 * This is called for faults at very unexpected times (e.g., when interrupts
265 * are disabled). In such situations we can't do much that is safe. We try to
266 * print out some tracing and then we just spin.
267 */
268 asmlinkage void fatal_trap(int trapnr, struct cpu_user_regs *regs)
269 {
270 int cpu = smp_processor_id();
271 unsigned long cr2;
272 static char *trapstr[] = {
273 "divide error", "debug", "nmi", "bkpt", "overflow", "bounds",
274 "invalid operation", "device not available", "double fault",
275 "coprocessor segment", "invalid tss", "segment not found",
276 "stack error", "general protection fault", "page fault",
277 "spurious interrupt", "coprocessor error", "alignment check",
278 "machine check", "simd error"
279 };
281 watchdog_disable();
282 console_start_sync();
284 show_registers(regs);
286 if ( trapnr == TRAP_page_fault )
287 {
288 __asm__ __volatile__ ("mov %%cr2,%0" : "=r" (cr2) : );
289 printk("Faulting linear address: %p\n", _p(cr2));
290 show_page_walk(cr2);
291 }
293 printk("************************************\n");
294 printk("CPU%d FATAL TRAP %d (%s), ERROR_CODE %04x%s.\n",
295 cpu, trapnr, trapstr[trapnr], regs->error_code,
296 (regs->eflags & X86_EFLAGS_IF) ? "" : ", IN INTERRUPT CONTEXT");
297 printk("System shutting down -- need manual reset.\n");
298 printk("************************************\n");
300 (void)debugger_trap_fatal(trapnr, regs);
302 /* Lock up the console to prevent spurious output from other CPUs. */
303 console_force_lock();
305 /* Wait for manual reset. */
306 for ( ; ; )
307 __asm__ __volatile__ ( "hlt" );
308 }
310 static inline int do_trap(int trapnr, char *str,
311 struct cpu_user_regs *regs,
312 int use_error_code)
313 {
314 struct vcpu *v = current;
315 struct trap_bounce *tb = &v->arch.trap_bounce;
316 trap_info_t *ti;
317 unsigned long fixup;
319 DEBUGGER_trap_entry(trapnr, regs);
321 if ( !GUEST_MODE(regs) )
322 goto xen_fault;
324 ti = &current->arch.guest_context.trap_ctxt[trapnr];
325 tb->flags = TBF_EXCEPTION;
326 tb->cs = ti->cs;
327 tb->eip = ti->address;
328 if ( use_error_code )
329 {
330 tb->flags |= TBF_EXCEPTION_ERRCODE;
331 tb->error_code = regs->error_code;
332 }
333 if ( TI_GET_IF(ti) )
334 tb->flags |= TBF_INTERRUPT;
335 return 0;
337 xen_fault:
339 if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
340 {
341 DPRINTK("Trap %d: %p -> %p\n", trapnr, _p(regs->eip), _p(fixup));
342 regs->eip = fixup;
343 return 0;
344 }
346 DEBUGGER_trap_fatal(trapnr, regs);
348 show_registers(regs);
349 panic("CPU%d FATAL TRAP: vector = %d (%s)\n"
350 "[error_code=%04x]\n",
351 smp_processor_id(), trapnr, str, regs->error_code);
352 return 0;
353 }
355 #define DO_ERROR_NOCODE(trapnr, str, name) \
356 asmlinkage int do_##name(struct cpu_user_regs *regs) \
357 { \
358 return do_trap(trapnr, str, regs, 0); \
359 }
361 #define DO_ERROR(trapnr, str, name) \
362 asmlinkage int do_##name(struct cpu_user_regs *regs) \
363 { \
364 return do_trap(trapnr, str, regs, 1); \
365 }
367 DO_ERROR_NOCODE( 0, "divide error", divide_error)
368 DO_ERROR_NOCODE( 4, "overflow", overflow)
369 DO_ERROR_NOCODE( 5, "bounds", bounds)
370 DO_ERROR_NOCODE( 6, "invalid operand", invalid_op)
371 DO_ERROR_NOCODE( 9, "coprocessor segment overrun", coprocessor_segment_overrun)
372 DO_ERROR(10, "invalid TSS", invalid_TSS)
373 DO_ERROR(11, "segment not present", segment_not_present)
374 DO_ERROR(12, "stack segment", stack_segment)
375 DO_ERROR_NOCODE(16, "fpu error", coprocessor_error)
376 DO_ERROR(17, "alignment check", alignment_check)
377 DO_ERROR_NOCODE(19, "simd error", simd_coprocessor_error)
379 asmlinkage int do_int3(struct cpu_user_regs *regs)
380 {
381 struct vcpu *v = current;
382 struct trap_bounce *tb = &v->arch.trap_bounce;
383 trap_info_t *ti;
385 DEBUGGER_trap_entry(TRAP_int3, regs);
387 if ( !GUEST_MODE(regs) )
388 {
389 DEBUGGER_trap_fatal(TRAP_int3, regs);
390 show_registers(regs);
391 panic("CPU%d FATAL TRAP: vector = 3 (Int3)\n", smp_processor_id());
392 }
394 ti = &current->arch.guest_context.trap_ctxt[TRAP_int3];
395 tb->flags = TBF_EXCEPTION;
396 tb->cs = ti->cs;
397 tb->eip = ti->address;
398 if ( TI_GET_IF(ti) )
399 tb->flags |= TBF_INTERRUPT;
401 return 0;
402 }
404 asmlinkage int do_machine_check(struct cpu_user_regs *regs)
405 {
406 fatal_trap(TRAP_machine_check, regs);
407 return 0;
408 }
410 void propagate_page_fault(unsigned long addr, u16 error_code)
411 {
412 trap_info_t *ti;
413 struct vcpu *v = current;
414 struct trap_bounce *tb = &v->arch.trap_bounce;
416 v->arch.guest_context.ctrlreg[2] = addr;
417 v->vcpu_info->arch.cr2 = addr;
419 ti = &v->arch.guest_context.trap_ctxt[TRAP_page_fault];
420 tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE;
421 tb->error_code = error_code;
422 tb->cs = ti->cs;
423 tb->eip = ti->address;
424 if ( TI_GET_IF(ti) )
425 tb->flags |= TBF_INTERRUPT;
426 }
428 static int handle_perdomain_mapping_fault(
429 unsigned long offset, struct cpu_user_regs *regs)
430 {
431 extern int map_ldt_shadow_page(unsigned int);
433 struct vcpu *v = current;
434 struct domain *d = v->domain;
435 int ret;
437 /* Which vcpu's area did we fault in, and is it in the ldt sub-area? */
438 unsigned int is_ldt_area = (offset >> (PDPT_VCPU_VA_SHIFT-1)) & 1;
439 unsigned int vcpu_area = (offset >> PDPT_VCPU_VA_SHIFT);
441 /* Should never fault in another vcpu's area. */
442 BUG_ON(vcpu_area != current->vcpu_id);
444 /* Byte offset within the gdt/ldt sub-area. */
445 offset &= (1UL << (PDPT_VCPU_VA_SHIFT-1)) - 1UL;
447 if ( likely(is_ldt_area) )
448 {
449 /* LDT fault: Copy a mapping from the guest's LDT, if it is valid. */
450 LOCK_BIGLOCK(d);
451 ret = map_ldt_shadow_page(offset >> PAGE_SHIFT);
452 UNLOCK_BIGLOCK(d);
454 if ( unlikely(ret == 0) )
455 {
456 /* In hypervisor mode? Leave it to the #PF handler to fix up. */
457 if ( !GUEST_MODE(regs) )
458 return 0;
459 /* In guest mode? Propagate #PF to guest, with adjusted %cr2. */
460 propagate_page_fault(
461 v->arch.guest_context.ldt_base + offset, regs->error_code);
462 }
463 }
464 else
465 {
466 /* GDT fault: handle the fault as #GP(selector). */
467 regs->error_code = (u16)offset & ~7;
468 (void)do_general_protection(regs);
469 }
471 return EXCRET_fault_fixed;
472 }
474 #ifdef HYPERVISOR_VIRT_END
475 #define IN_HYPERVISOR_RANGE(va) \
476 (((va) >= HYPERVISOR_VIRT_START) && ((va) < HYPERVISOR_VIRT_END))
477 #else
478 #define IN_HYPERVISOR_RANGE(va) \
479 (((va) >= HYPERVISOR_VIRT_START))
480 #endif
482 static int fixup_page_fault(unsigned long addr, struct cpu_user_regs *regs)
483 {
484 struct vcpu *v = current;
485 struct domain *d = v->domain;
487 if ( unlikely(IN_HYPERVISOR_RANGE(addr)) )
488 {
489 if ( shadow_mode_external(d) && GUEST_CONTEXT(v, regs) )
490 return shadow_fault(addr, regs);
491 if ( (addr >= PERDOMAIN_VIRT_START) && (addr < PERDOMAIN_VIRT_END) )
492 return handle_perdomain_mapping_fault(
493 addr - PERDOMAIN_VIRT_START, regs);
494 }
495 else if ( unlikely(shadow_mode_enabled(d)) )
496 {
497 return shadow_fault(addr, regs);
498 }
499 else if ( likely(VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) )
500 {
501 LOCK_BIGLOCK(d);
502 if ( unlikely(d->arch.ptwr[PTWR_PT_ACTIVE].l1va) &&
503 unlikely(l2_linear_offset(addr) ==
504 d->arch.ptwr[PTWR_PT_ACTIVE].l2_idx) )
505 {
506 ptwr_flush(d, PTWR_PT_ACTIVE);
507 UNLOCK_BIGLOCK(d);
508 return EXCRET_fault_fixed;
509 }
511 if ( KERNEL_MODE(v, regs) &&
512 /* Protection violation on write? No reserved-bit violation? */
513 ((regs->error_code & 0xb) == 0x3) &&
514 ptwr_do_page_fault(d, addr, regs) )
515 {
516 UNLOCK_BIGLOCK(d);
517 return EXCRET_fault_fixed;
518 }
519 UNLOCK_BIGLOCK(d);
520 }
522 return 0;
523 }
525 /*
526 * #PF error code:
527 * Bit 0: Protection violation (=1) ; Page not present (=0)
528 * Bit 1: Write access
529 * Bit 2: Supervisor mode
530 * Bit 3: Reserved bit violation
531 * Bit 4: Instruction fetch
532 */
533 asmlinkage int do_page_fault(struct cpu_user_regs *regs)
534 {
535 unsigned long addr, fixup;
536 int rc;
538 __asm__ __volatile__ ("mov %%cr2,%0" : "=r" (addr) : );
540 DEBUGGER_trap_entry(TRAP_page_fault, regs);
542 perfc_incrc(page_faults);
544 if ( unlikely((rc = fixup_page_fault(addr, regs)) != 0) )
545 return rc;
547 if ( unlikely(!GUEST_MODE(regs)) )
548 {
549 if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
550 {
551 perfc_incrc(copy_user_faults);
552 regs->eip = fixup;
553 return 0;
554 }
556 DEBUGGER_trap_fatal(TRAP_page_fault, regs);
558 show_registers(regs);
559 show_page_walk(addr);
560 panic("CPU%d FATAL PAGE FAULT\n"
561 "[error_code=%04x]\n"
562 "Faulting linear address: %p\n",
563 smp_processor_id(), regs->error_code, addr);
564 }
566 propagate_page_fault(addr, regs->error_code);
567 return 0;
568 }
570 long do_fpu_taskswitch(int set)
571 {
572 struct vcpu *v = current;
574 if ( set )
575 {
576 v->arch.guest_context.ctrlreg[0] |= X86_CR0_TS;
577 stts();
578 }
579 else
580 {
581 v->arch.guest_context.ctrlreg[0] &= ~X86_CR0_TS;
582 if ( test_bit(_VCPUF_fpu_dirtied, &v->vcpu_flags) )
583 clts();
584 }
586 return 0;
587 }
589 /* Has the guest requested sufficient permission for this I/O access? */
590 static inline int guest_io_okay(
591 unsigned int port, unsigned int bytes,
592 struct vcpu *v, struct cpu_user_regs *regs)
593 {
594 u16 x;
595 #if defined(__x86_64__)
596 /* If in user mode, switch to kernel mode just to read I/O bitmap. */
597 extern void toggle_guest_mode(struct vcpu *);
598 int user_mode = !(v->arch.flags & TF_kernel_mode);
599 #define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v)
600 #elif defined(__i386__)
601 #define TOGGLE_MODE() ((void)0)
602 #endif
604 if ( v->arch.iopl >= (KERNEL_MODE(v, regs) ? 1 : 3) )
605 return 1;
607 if ( v->arch.iobmp_limit > (port + bytes) )
608 {
609 TOGGLE_MODE();
610 __get_user(x, (u16 *)(v->arch.iobmp+(port>>3)));
611 TOGGLE_MODE();
612 if ( (x & (((1<<bytes)-1) << (port&7))) == 0 )
613 return 1;
614 }
616 return 0;
617 }
619 /* Has the administrator granted sufficient permission for this I/O access? */
620 static inline int admin_io_okay(
621 unsigned int port, unsigned int bytes,
622 struct vcpu *v, struct cpu_user_regs *regs)
623 {
624 struct domain *d = v->domain;
625 u16 x;
627 if ( d->arch.iobmp_mask != NULL )
628 {
629 x = *(u16 *)(d->arch.iobmp_mask + (port >> 3));
630 if ( (x & (((1<<bytes)-1) << (port&7))) == 0 )
631 return 1;
632 }
634 return 0;
635 }
637 /* Check admin limits. Silently fail the access if it is disallowed. */
638 #define inb_user(_p, _d, _r) (admin_io_okay(_p, 1, _d, _r) ? inb(_p) : ~0)
639 #define inw_user(_p, _d, _r) (admin_io_okay(_p, 2, _d, _r) ? inw(_p) : ~0)
640 #define inl_user(_p, _d, _r) (admin_io_okay(_p, 4, _d, _r) ? inl(_p) : ~0)
641 #define outb_user(_v, _p, _d, _r) \
642 (admin_io_okay(_p, 1, _d, _r) ? outb(_v, _p) : ((void)0))
643 #define outw_user(_v, _p, _d, _r) \
644 (admin_io_okay(_p, 2, _d, _r) ? outw(_v, _p) : ((void)0))
645 #define outl_user(_v, _p, _d, _r) \
646 (admin_io_okay(_p, 4, _d, _r) ? outl(_v, _p) : ((void)0))
648 /* Propagate a fault back to the guest kernel. */
649 #define USER_READ_FAULT 4 /* user mode, read fault */
650 #define USER_WRITE_FAULT 6 /* user mode, write fault */
651 #define PAGE_FAULT(_faultaddr, _errcode) \
652 ({ propagate_page_fault(_faultaddr, _errcode); \
653 return EXCRET_fault_fixed; \
654 })
656 /* Isntruction fetch with error handling. */
657 #define insn_fetch(_type, _size, _ptr) \
658 ({ unsigned long _x; \
659 if ( get_user(_x, (_type *)eip) ) \
660 PAGE_FAULT(eip, USER_READ_FAULT); \
661 eip += _size; (_type)_x; })
663 static int emulate_privileged_op(struct cpu_user_regs *regs)
664 {
665 struct vcpu *v = current;
666 unsigned long *reg, eip = regs->eip, res;
667 u8 opcode, modrm_reg = 0, modrm_rm = 0, rep_prefix = 0;
668 unsigned int port, i, op_bytes = 4, data;
670 /* Legacy prefixes. */
671 for ( i = 0; i < 8; i++ )
672 {
673 switch ( opcode = insn_fetch(u8, 1, eip) )
674 {
675 case 0x66: /* operand-size override */
676 op_bytes ^= 6; /* switch between 2/4 bytes */
677 break;
678 case 0x67: /* address-size override */
679 case 0x2e: /* CS override */
680 case 0x3e: /* DS override */
681 case 0x26: /* ES override */
682 case 0x64: /* FS override */
683 case 0x65: /* GS override */
684 case 0x36: /* SS override */
685 case 0xf0: /* LOCK */
686 case 0xf2: /* REPNE/REPNZ */
687 break;
688 case 0xf3: /* REP/REPE/REPZ */
689 rep_prefix = 1;
690 break;
691 default:
692 goto done_prefixes;
693 }
694 }
695 done_prefixes:
697 #ifdef __x86_64__
698 /* REX prefix. */
699 if ( (opcode & 0xf0) == 0x40 )
700 {
701 modrm_reg = (opcode & 4) << 1; /* REX.R */
702 modrm_rm = (opcode & 1) << 3; /* REX.B */
704 /* REX.W and REX.X do not need to be decoded. */
705 opcode = insn_fetch(u8, 1, eip);
706 }
707 #endif
709 /* Input/Output String instructions. */
710 if ( (opcode >= 0x6c) && (opcode <= 0x6f) )
711 {
712 if ( rep_prefix && (regs->ecx == 0) )
713 goto done;
715 continue_io_string:
716 switch ( opcode )
717 {
718 case 0x6c: /* INSB */
719 op_bytes = 1;
720 case 0x6d: /* INSW/INSL */
721 if ( !guest_io_okay((u16)regs->edx, op_bytes, v, regs) )
722 goto fail;
723 switch ( op_bytes )
724 {
725 case 1:
726 data = (u8)inb_user((u16)regs->edx, v, regs);
727 if ( put_user((u8)data, (u8 *)regs->edi) )
728 PAGE_FAULT(regs->edi, USER_WRITE_FAULT);
729 break;
730 case 2:
731 data = (u16)inw_user((u16)regs->edx, v, regs);
732 if ( put_user((u16)data, (u16 *)regs->edi) )
733 PAGE_FAULT(regs->edi, USER_WRITE_FAULT);
734 break;
735 case 4:
736 data = (u32)inl_user((u16)regs->edx, v, regs);
737 if ( put_user((u32)data, (u32 *)regs->edi) )
738 PAGE_FAULT(regs->edi, USER_WRITE_FAULT);
739 break;
740 }
741 regs->edi += (regs->eflags & EF_DF) ? -op_bytes : op_bytes;
742 break;
744 case 0x6e: /* OUTSB */
745 op_bytes = 1;
746 case 0x6f: /* OUTSW/OUTSL */
747 if ( !guest_io_okay((u16)regs->edx, op_bytes, v, regs) )
748 goto fail;
749 switch ( op_bytes )
750 {
751 case 1:
752 if ( get_user(data, (u8 *)regs->esi) )
753 PAGE_FAULT(regs->esi, USER_READ_FAULT);
754 outb_user((u8)data, (u16)regs->edx, v, regs);
755 break;
756 case 2:
757 if ( get_user(data, (u16 *)regs->esi) )
758 PAGE_FAULT(regs->esi, USER_READ_FAULT);
759 outw_user((u16)data, (u16)regs->edx, v, regs);
760 break;
761 case 4:
762 if ( get_user(data, (u32 *)regs->esi) )
763 PAGE_FAULT(regs->esi, USER_READ_FAULT);
764 outl_user((u32)data, (u16)regs->edx, v, regs);
765 break;
766 }
767 regs->esi += (regs->eflags & EF_DF) ? -op_bytes : op_bytes;
768 break;
769 }
771 if ( rep_prefix && (--regs->ecx != 0) )
772 {
773 if ( !hypercall_preempt_check() )
774 goto continue_io_string;
775 eip = regs->eip;
776 }
778 goto done;
779 }
781 /* I/O Port and Interrupt Flag instructions. */
782 switch ( opcode )
783 {
784 case 0xe4: /* IN imm8,%al */
785 op_bytes = 1;
786 case 0xe5: /* IN imm8,%eax */
787 port = insn_fetch(u8, 1, eip);
788 exec_in:
789 if ( !guest_io_okay(port, op_bytes, v, regs) )
790 goto fail;
791 switch ( op_bytes )
792 {
793 case 1:
794 regs->eax &= ~0xffUL;
795 regs->eax |= (u8)inb_user(port, v, regs);
796 break;
797 case 2:
798 regs->eax &= ~0xffffUL;
799 regs->eax |= (u16)inw_user(port, v, regs);
800 break;
801 case 4:
802 regs->eax = (u32)inl_user(port, v, regs);
803 break;
804 }
805 goto done;
807 case 0xec: /* IN %dx,%al */
808 op_bytes = 1;
809 case 0xed: /* IN %dx,%eax */
810 port = (u16)regs->edx;
811 goto exec_in;
813 case 0xe6: /* OUT %al,imm8 */
814 op_bytes = 1;
815 case 0xe7: /* OUT %eax,imm8 */
816 port = insn_fetch(u8, 1, eip);
817 exec_out:
818 if ( !guest_io_okay(port, op_bytes, v, regs) )
819 goto fail;
820 switch ( op_bytes )
821 {
822 case 1:
823 outb_user((u8)regs->eax, port, v, regs);
824 break;
825 case 2:
826 outw_user((u16)regs->eax, port, v, regs);
827 break;
828 case 4:
829 outl_user((u32)regs->eax, port, v, regs);
830 break;
831 }
832 goto done;
834 case 0xee: /* OUT %al,%dx */
835 op_bytes = 1;
836 case 0xef: /* OUT %eax,%dx */
837 port = (u16)regs->edx;
838 goto exec_out;
840 case 0xfa: /* CLI */
841 case 0xfb: /* STI */
842 if ( v->arch.iopl < (KERNEL_MODE(v, regs) ? 1 : 3) )
843 goto fail;
844 /*
845 * This is just too dangerous to allow, in my opinion. Consider if the
846 * caller then tries to reenable interrupts using POPF: we can't trap
847 * that and we'll end up with hard-to-debug lockups. Fast & loose will
848 * do for us. :-)
849 */
850 /*v->vcpu_info->evtchn_upcall_mask = (opcode == 0xfa);*/
851 goto done;
853 case 0x0f: /* Two-byte opcode */
854 break;
856 default:
857 goto fail;
858 }
860 /* Remaining instructions only emulated from guest kernel. */
861 if ( !KERNEL_MODE(v, regs) )
862 goto fail;
864 /* Privileged (ring 0) instructions. */
865 opcode = insn_fetch(u8, 1, eip);
866 switch ( opcode )
867 {
868 case 0x06: /* CLTS */
869 (void)do_fpu_taskswitch(0);
870 break;
872 case 0x09: /* WBINVD */
873 /* Ignore the instruction if unprivileged. */
874 if ( !IS_CAPABLE_PHYSDEV(v->domain) )
875 DPRINTK("Non-physdev domain attempted WBINVD.\n");
876 else
877 wbinvd();
878 break;
880 case 0x20: /* MOV CR?,<reg> */
881 opcode = insn_fetch(u8, 1, eip);
882 modrm_reg |= (opcode >> 3) & 7;
883 modrm_rm |= (opcode >> 0) & 7;
884 reg = decode_register(modrm_rm, regs, 0);
885 switch ( modrm_reg )
886 {
887 case 0: /* Read CR0 */
888 *reg = (read_cr0() & ~X86_CR0_TS) |
889 v->arch.guest_context.ctrlreg[0];
890 break;
892 case 2: /* Read CR2 */
893 *reg = v->arch.guest_context.ctrlreg[2];
894 break;
896 case 3: /* Read CR3 */
897 *reg = pagetable_get_paddr(v->arch.guest_table);
898 break;
900 case 4: /* Read CR4 */
901 /*
902 * Guests can read CR4 to see what features Xen has enabled. We
903 * therefore lie about PGE & PSE as they are unavailable to guests.
904 */
905 *reg = read_cr4() & ~(X86_CR4_PGE|X86_CR4_PSE);
906 break;
908 default:
909 goto fail;
910 }
911 break;
913 case 0x21: /* MOV DR?,<reg> */
914 opcode = insn_fetch(u8, 1, eip);
915 modrm_reg |= (opcode >> 3) & 7;
916 modrm_rm |= (opcode >> 0) & 7;
917 reg = decode_register(modrm_rm, regs, 0);
918 if ( (res = do_get_debugreg(modrm_reg)) > (unsigned long)-256 )
919 goto fail;
920 *reg = res;
921 break;
923 case 0x22: /* MOV <reg>,CR? */
924 opcode = insn_fetch(u8, 1, eip);
925 modrm_reg |= (opcode >> 3) & 7;
926 modrm_rm |= (opcode >> 0) & 7;
927 reg = decode_register(modrm_rm, regs, 0);
928 switch ( modrm_reg )
929 {
930 case 0: /* Write CR0 */
931 if ( (*reg ^ read_cr0()) & ~X86_CR0_TS )
932 {
933 DPRINTK("Attempt to change unmodifiable CR0 flags.\n");
934 goto fail;
935 }
936 (void)do_fpu_taskswitch(!!(*reg & X86_CR0_TS));
937 break;
939 case 2: /* Write CR2 */
940 v->arch.guest_context.ctrlreg[2] = *reg;
941 v->vcpu_info->arch.cr2 = *reg;
942 break;
944 case 3: /* Write CR3 */
945 LOCK_BIGLOCK(v->domain);
946 (void)new_guest_cr3(*reg);
947 UNLOCK_BIGLOCK(v->domain);
948 break;
950 case 4:
951 if ( *reg != (read_cr4() & ~(X86_CR4_PGE|X86_CR4_PSE)) )
952 {
953 DPRINTK("Attempt to change CR4 flags.\n");
954 goto fail;
955 }
956 break;
958 default:
959 goto fail;
960 }
961 break;
963 case 0x23: /* MOV <reg>,DR? */
964 opcode = insn_fetch(u8, 1, eip);
965 modrm_reg |= (opcode >> 3) & 7;
966 modrm_rm |= (opcode >> 0) & 7;
967 reg = decode_register(modrm_rm, regs, 0);
968 if ( do_set_debugreg(modrm_reg, *reg) != 0 )
969 goto fail;
970 break;
972 case 0x30: /* WRMSR */
973 /* Ignore the instruction if unprivileged. */
974 if ( !IS_PRIV(v->domain) )
975 DPRINTK("Non-priv domain attempted WRMSR(%p,%08lx,%08lx).\n",
976 _p(regs->ecx), (long)regs->eax, (long)regs->edx);
977 else if ( wrmsr_user(regs->ecx, regs->eax, regs->edx) )
978 goto fail;
979 break;
981 case 0x32: /* RDMSR */
982 if ( !IS_PRIV(v->domain) )
983 DPRINTK("Non-priv domain attempted RDMSR(%p,%08lx,%08lx).\n",
984 _p(regs->ecx), (long)regs->eax, (long)regs->edx);
985 /* Everyone can read the MSR space. */
986 if ( rdmsr_user(regs->ecx, regs->eax, regs->edx) )
987 goto fail;
988 break;
990 default:
991 goto fail;
992 }
994 done:
995 regs->eip = eip;
996 return EXCRET_fault_fixed;
998 fail:
999 return 0;
1002 asmlinkage int do_general_protection(struct cpu_user_regs *regs)
1004 struct vcpu *v = current;
1005 struct trap_bounce *tb = &v->arch.trap_bounce;
1006 trap_info_t *ti;
1007 unsigned long fixup;
1009 DEBUGGER_trap_entry(TRAP_gp_fault, regs);
1011 if ( regs->error_code & 1 )
1012 goto hardware_gp;
1014 if ( !GUEST_MODE(regs) )
1015 goto gp_in_kernel;
1017 /*
1018 * Cunning trick to allow arbitrary "INT n" handling.
1020 * We set DPL == 0 on all vectors in the IDT. This prevents any INT <n>
1021 * instruction from trapping to the appropriate vector, when that might not
1022 * be expected by Xen or the guest OS. For example, that entry might be for
1023 * a fault handler (unlike traps, faults don't increment EIP), or might
1024 * expect an error code on the stack (which a software trap never
1025 * provides), or might be a hardware interrupt handler that doesn't like
1026 * being called spuriously.
1028 * Instead, a GPF occurs with the faulting IDT vector in the error code.
1029 * Bit 1 is set to indicate that an IDT entry caused the fault. Bit 0 is
1030 * clear to indicate that it's a software fault, not hardware.
1032 * NOTE: Vectors 3 and 4 are dealt with from their own handler. This is
1033 * okay because they can only be triggered by an explicit DPL-checked
1034 * instruction. The DPL specified by the guest OS for these vectors is NOT
1035 * CHECKED!!
1036 */
1037 if ( (regs->error_code & 3) == 2 )
1039 /* This fault must be due to <INT n> instruction. */
1040 ti = &current->arch.guest_context.trap_ctxt[regs->error_code>>3];
1041 if ( PERMIT_SOFTINT(TI_GET_DPL(ti), v, regs) )
1043 tb->flags = TBF_EXCEPTION;
1044 regs->eip += 2;
1045 goto finish_propagation;
1049 /* Emulate some simple privileged and I/O instructions. */
1050 if ( (regs->error_code == 0) &&
1051 emulate_privileged_op(regs) )
1052 return 0;
1054 #if defined(__i386__)
1055 if ( VM_ASSIST(v->domain, VMASST_TYPE_4gb_segments) &&
1056 (regs->error_code == 0) &&
1057 gpf_emulate_4gb(regs) )
1058 return 0;
1059 #endif
1061 /* Pass on GPF as is. */
1062 ti = &current->arch.guest_context.trap_ctxt[TRAP_gp_fault];
1063 tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE;
1064 tb->error_code = regs->error_code;
1065 finish_propagation:
1066 tb->cs = ti->cs;
1067 tb->eip = ti->address;
1068 if ( TI_GET_IF(ti) )
1069 tb->flags |= TBF_INTERRUPT;
1070 return 0;
1072 gp_in_kernel:
1074 if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
1076 DPRINTK("GPF (%04x): %p -> %p\n",
1077 regs->error_code, _p(regs->eip), _p(fixup));
1078 regs->eip = fixup;
1079 return 0;
1082 DEBUGGER_trap_fatal(TRAP_gp_fault, regs);
1084 hardware_gp:
1085 show_registers(regs);
1086 panic("CPU%d GENERAL PROTECTION FAULT\n[error_code=%04x]\n",
1087 smp_processor_id(), regs->error_code);
1088 return 0;
1092 /* Defer dom0 notification to softirq context (unsafe in NMI context). */
1093 static unsigned long nmi_dom0_softirq_reason;
1094 #define NMI_DOM0_PARITY_ERR 0
1095 #define NMI_DOM0_IO_ERR 1
1096 #define NMI_DOM0_UNKNOWN 2
1098 static void nmi_dom0_softirq(void)
1100 if ( dom0 == NULL )
1101 return;
1103 if ( test_and_clear_bit(NMI_DOM0_PARITY_ERR, &nmi_dom0_softirq_reason) )
1104 send_guest_virq(dom0->vcpu[0], VIRQ_PARITY_ERR);
1106 if ( test_and_clear_bit(NMI_DOM0_IO_ERR, &nmi_dom0_softirq_reason) )
1107 send_guest_virq(dom0->vcpu[0], VIRQ_IO_ERR);
1109 if ( test_and_clear_bit(NMI_DOM0_UNKNOWN, &nmi_dom0_softirq_reason) )
1110 send_guest_virq(dom0->vcpu[0], VIRQ_NMI);
1113 asmlinkage void mem_parity_error(struct cpu_user_regs *regs)
1115 switch ( opt_nmi[0] )
1117 case 'd': /* 'dom0' */
1118 set_bit(NMI_DOM0_PARITY_ERR, &nmi_dom0_softirq_reason);
1119 raise_softirq(NMI_DOM0_SOFTIRQ);
1120 case 'i': /* 'ignore' */
1121 break;
1122 default: /* 'fatal' */
1123 console_force_unlock();
1124 printk("\n\nNMI - MEMORY ERROR\n");
1125 fatal_trap(TRAP_nmi, regs);
1128 outb((inb(0x61) & 0x0f) | 0x04, 0x61); /* clear-and-disable parity check */
1129 mdelay(1);
1130 outb((inb(0x61) & 0x0b) | 0x00, 0x61); /* enable parity check */
1133 asmlinkage void io_check_error(struct cpu_user_regs *regs)
1135 switch ( opt_nmi[0] )
1137 case 'd': /* 'dom0' */
1138 set_bit(NMI_DOM0_IO_ERR, &nmi_dom0_softirq_reason);
1139 raise_softirq(NMI_DOM0_SOFTIRQ);
1140 case 'i': /* 'ignore' */
1141 break;
1142 default: /* 'fatal' */
1143 console_force_unlock();
1144 printk("\n\nNMI - I/O ERROR\n");
1145 fatal_trap(TRAP_nmi, regs);
1148 outb((inb(0x61) & 0x0f) | 0x08, 0x61); /* clear-and-disable IOCK */
1149 mdelay(1);
1150 outb((inb(0x61) & 0x07) | 0x00, 0x61); /* enable IOCK */
1153 static void unknown_nmi_error(unsigned char reason)
1155 switch ( opt_nmi[0] )
1157 case 'd': /* 'dom0' */
1158 set_bit(NMI_DOM0_UNKNOWN, &nmi_dom0_softirq_reason);
1159 raise_softirq(NMI_DOM0_SOFTIRQ);
1160 case 'i': /* 'ignore' */
1161 break;
1162 default: /* 'fatal' */
1163 printk("Uhhuh. NMI received for unknown reason %02x.\n", reason);
1164 printk("Dazed and confused, but trying to continue\n");
1165 printk("Do you have a strange power saving mode enabled?\n");
1169 static int dummy_nmi_callback(struct cpu_user_regs *regs, int cpu)
1171 return 0;
1174 static nmi_callback_t nmi_callback = dummy_nmi_callback;
1176 asmlinkage void do_nmi(struct cpu_user_regs *regs)
1178 unsigned int cpu = smp_processor_id();
1179 unsigned char reason;
1181 ++nmi_count(cpu);
1183 if ( nmi_callback(regs, cpu) )
1184 return;
1186 if ( nmi_watchdog )
1187 nmi_watchdog_tick(regs);
1189 /* Only the BSP gets external NMIs from the system. */
1190 if ( cpu == 0 )
1192 reason = inb(0x61);
1193 if ( reason & 0x80 )
1194 mem_parity_error(regs);
1195 else if ( reason & 0x40 )
1196 io_check_error(regs);
1197 else if ( !nmi_watchdog )
1198 unknown_nmi_error((unsigned char)(reason&0xff));
1202 void set_nmi_callback(nmi_callback_t callback)
1204 nmi_callback = callback;
1207 void unset_nmi_callback(void)
1209 nmi_callback = dummy_nmi_callback;
1212 asmlinkage int math_state_restore(struct cpu_user_regs *regs)
1214 struct trap_bounce *tb;
1215 trap_info_t *ti;
1217 /* Prevent recursion. */
1218 clts();
1220 setup_fpu(current);
1222 if ( current->arch.guest_context.ctrlreg[0] & X86_CR0_TS )
1224 tb = &current->arch.trap_bounce;
1225 ti = &current->arch.guest_context.trap_ctxt[TRAP_no_device];
1227 tb->flags = TBF_EXCEPTION;
1228 tb->cs = ti->cs;
1229 tb->eip = ti->address;
1230 if ( TI_GET_IF(ti) )
1231 tb->flags |= TBF_INTERRUPT;
1233 current->arch.guest_context.ctrlreg[0] &= ~X86_CR0_TS;
1236 return EXCRET_fault_fixed;
1239 asmlinkage int do_debug(struct cpu_user_regs *regs)
1241 unsigned long condition;
1242 struct vcpu *v = current;
1243 struct trap_bounce *tb = &v->arch.trap_bounce;
1244 trap_info_t *ti;
1246 __asm__ __volatile__("mov %%db6,%0" : "=r" (condition));
1248 /* Mask out spurious debug traps due to lazy DR7 setting */
1249 if ( (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) &&
1250 (v->arch.guest_context.debugreg[7] == 0) )
1252 __asm__("mov %0,%%db7" : : "r" (0UL));
1253 goto out;
1256 DEBUGGER_trap_entry(TRAP_debug, regs);
1258 if ( !GUEST_MODE(regs) )
1260 /* Clear TF just for absolute sanity. */
1261 regs->eflags &= ~EF_TF;
1262 /*
1263 * We ignore watchpoints when they trigger within Xen. This may happen
1264 * when a buffer is passed to us which previously had a watchpoint set
1265 * on it. No need to bump EIP; the only faulting trap is an instruction
1266 * breakpoint, which can't happen to us.
1267 */
1268 goto out;
1271 /* Save debug status register where guest OS can peek at it */
1272 v->arch.guest_context.debugreg[6] = condition;
1274 ti = &v->arch.guest_context.trap_ctxt[TRAP_debug];
1275 tb->flags = TBF_EXCEPTION;
1276 tb->cs = ti->cs;
1277 tb->eip = ti->address;
1278 if ( TI_GET_IF(ti) )
1279 tb->flags |= TBF_INTERRUPT;
1281 out:
1282 return EXCRET_not_a_fault;
1285 asmlinkage int do_spurious_interrupt_bug(struct cpu_user_regs *regs)
1287 return EXCRET_not_a_fault;
1290 void set_intr_gate(unsigned int n, void *addr)
1292 #ifdef __i386__
1293 int i;
1294 /* Keep secondary tables in sync with IRQ updates. */
1295 for ( i = 1; i < NR_CPUS; i++ )
1296 if ( idt_tables[i] != NULL )
1297 _set_gate(&idt_tables[i][n], 14, 0, addr);
1298 #endif
1299 _set_gate(&idt_table[n], 14, 0, addr);
1302 void set_system_gate(unsigned int n, void *addr)
1304 _set_gate(idt_table+n,14,3,addr);
1307 void set_task_gate(unsigned int n, unsigned int sel)
1309 idt_table[n].a = sel << 16;
1310 idt_table[n].b = 0x8500;
1313 void set_tss_desc(unsigned int n, void *addr)
1315 _set_tssldt_desc(
1316 gdt_table + __TSS(n) - FIRST_RESERVED_GDT_ENTRY,
1317 (unsigned long)addr,
1318 offsetof(struct tss_struct, __cacheline_filler) - 1,
1319 9);
1322 void __init trap_init(void)
1324 extern void percpu_traps_init(void);
1326 /*
1327 * Note that interrupt gates are always used, rather than trap gates. We
1328 * must have interrupts disabled until DS/ES/FS/GS are saved because the
1329 * first activation must have the "bad" value(s) for these registers and
1330 * we may lose them if another activation is installed before they are
1331 * saved. The page-fault handler also needs interrupts disabled until %cr2
1332 * has been read and saved on the stack.
1333 */
1334 set_intr_gate(TRAP_divide_error,&divide_error);
1335 set_intr_gate(TRAP_debug,&debug);
1336 set_intr_gate(TRAP_nmi,&nmi);
1337 set_system_gate(TRAP_int3,&int3); /* usable from all privileges */
1338 set_system_gate(TRAP_overflow,&overflow); /* usable from all privileges */
1339 set_intr_gate(TRAP_bounds,&bounds);
1340 set_intr_gate(TRAP_invalid_op,&invalid_op);
1341 set_intr_gate(TRAP_no_device,&device_not_available);
1342 set_intr_gate(TRAP_copro_seg,&coprocessor_segment_overrun);
1343 set_intr_gate(TRAP_invalid_tss,&invalid_TSS);
1344 set_intr_gate(TRAP_no_segment,&segment_not_present);
1345 set_intr_gate(TRAP_stack_error,&stack_segment);
1346 set_intr_gate(TRAP_gp_fault,&general_protection);
1347 set_intr_gate(TRAP_page_fault,&page_fault);
1348 set_intr_gate(TRAP_spurious_int,&spurious_interrupt_bug);
1349 set_intr_gate(TRAP_copro_error,&coprocessor_error);
1350 set_intr_gate(TRAP_alignment_check,&alignment_check);
1351 set_intr_gate(TRAP_machine_check,&machine_check);
1352 set_intr_gate(TRAP_simd_error,&simd_coprocessor_error);
1354 percpu_traps_init();
1356 cpu_init();
1358 open_softirq(NMI_DOM0_SOFTIRQ, nmi_dom0_softirq);
1362 long do_set_trap_table(trap_info_t *traps)
1364 trap_info_t cur;
1365 trap_info_t *dst = current->arch.guest_context.trap_ctxt;
1366 long rc = 0;
1368 LOCK_BIGLOCK(current->domain);
1370 for ( ; ; )
1372 if ( hypercall_preempt_check() )
1374 rc = hypercall1_create_continuation(
1375 __HYPERVISOR_set_trap_table, traps);
1376 break;
1379 if ( copy_from_user(&cur, traps, sizeof(cur)) )
1381 rc = -EFAULT;
1382 break;
1385 if ( cur.address == 0 )
1386 break;
1388 if ( !VALID_CODESEL(cur.cs) )
1390 rc = -EPERM;
1391 break;
1394 memcpy(&dst[cur.vector], &cur, sizeof(cur));
1396 if ( cur.vector == 0x80 )
1397 init_int80_direct_trap(current);
1399 traps++;
1402 UNLOCK_BIGLOCK(current->domain);
1404 return rc;
1408 long set_debugreg(struct vcpu *p, int reg, unsigned long value)
1410 int i;
1412 switch ( reg )
1414 case 0:
1415 if ( !access_ok(value, sizeof(long)) )
1416 return -EPERM;
1417 if ( p == current )
1418 __asm__ ( "mov %0, %%db0" : : "r" (value) );
1419 break;
1420 case 1:
1421 if ( !access_ok(value, sizeof(long)) )
1422 return -EPERM;
1423 if ( p == current )
1424 __asm__ ( "mov %0, %%db1" : : "r" (value) );
1425 break;
1426 case 2:
1427 if ( !access_ok(value, sizeof(long)) )
1428 return -EPERM;
1429 if ( p == current )
1430 __asm__ ( "mov %0, %%db2" : : "r" (value) );
1431 break;
1432 case 3:
1433 if ( !access_ok(value, sizeof(long)) )
1434 return -EPERM;
1435 if ( p == current )
1436 __asm__ ( "mov %0, %%db3" : : "r" (value) );
1437 break;
1438 case 6:
1439 /*
1440 * DR6: Bits 4-11,16-31 reserved (set to 1).
1441 * Bit 12 reserved (set to 0).
1442 */
1443 value &= 0xffffefff; /* reserved bits => 0 */
1444 value |= 0xffff0ff0; /* reserved bits => 1 */
1445 if ( p == current )
1446 __asm__ ( "mov %0, %%db6" : : "r" (value) );
1447 break;
1448 case 7:
1449 /*
1450 * DR7: Bit 10 reserved (set to 1).
1451 * Bits 11-12,14-15 reserved (set to 0).
1452 * Privileged bits:
1453 * GD (bit 13): must be 0.
1454 * R/Wn (bits 16-17,20-21,24-25,28-29): mustn't be 10.
1455 * LENn (bits 18-19,22-23,26-27,30-31): mustn't be 10.
1456 */
1457 /* DR7 == 0 => debugging disabled for this domain. */
1458 if ( value != 0 )
1460 value &= 0xffff27ff; /* reserved bits => 0 */
1461 value |= 0x00000400; /* reserved bits => 1 */
1462 if ( (value & (1<<13)) != 0 ) return -EPERM;
1463 for ( i = 0; i < 16; i += 2 )
1464 if ( ((value >> (i+16)) & 3) == 2 ) return -EPERM;
1466 if ( p == current )
1467 __asm__ ( "mov %0, %%db7" : : "r" (value) );
1468 break;
1469 default:
1470 return -EINVAL;
1473 p->arch.guest_context.debugreg[reg] = value;
1474 return 0;
1477 long do_set_debugreg(int reg, unsigned long value)
1479 return set_debugreg(current, reg, value);
1482 unsigned long do_get_debugreg(int reg)
1484 if ( (reg < 0) || (reg > 7) ) return -EINVAL;
1485 return current->arch.guest_context.debugreg[reg];
1488 /*
1489 * Local variables:
1490 * mode: C
1491 * c-set-style: "BSD"
1492 * c-basic-offset: 4
1493 * tab-width: 4
1494 * indent-tabs-mode: nil
1495 * End:
1496 */