ia64/xen-unstable

view xen/arch/x86/traps.c @ 5723:d332d4df452e

Fix stupid switch stmt mess-up that broke emulation
of move to/from debug register.
author kaf24@firebug.cl.cam.ac.uk
date Mon Jul 11 09:22:15 2005 +0000 (2005-07-11)
parents f261f14b9781
children a83ac0806d6b
line source
1 /******************************************************************************
2 * arch/x86/traps.c
3 *
4 * Modifications to Linux original are copyright (c) 2002-2004, K A Fraser
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
21 /*
22 * Copyright (C) 1991, 1992 Linus Torvalds
23 *
24 * Pentium III FXSR, SSE support
25 * Gareth Hughes <gareth@valinux.com>, May 2000
26 */
28 #include <xen/config.h>
29 #include <xen/init.h>
30 #include <xen/sched.h>
31 #include <xen/lib.h>
32 #include <xen/errno.h>
33 #include <xen/mm.h>
34 #include <xen/console.h>
35 #include <asm/regs.h>
36 #include <xen/delay.h>
37 #include <xen/event.h>
38 #include <xen/spinlock.h>
39 #include <xen/irq.h>
40 #include <xen/perfc.h>
41 #include <xen/softirq.h>
42 #include <xen/domain_page.h>
43 #include <asm/shadow.h>
44 #include <asm/system.h>
45 #include <asm/io.h>
46 #include <asm/atomic.h>
47 #include <asm/desc.h>
48 #include <asm/debugreg.h>
49 #include <asm/smp.h>
50 #include <asm/flushtlb.h>
51 #include <asm/uaccess.h>
52 #include <asm/i387.h>
53 #include <asm/debugger.h>
54 #include <asm/msr.h>
55 #include <asm/x86_emulate.h>
57 /*
58 * opt_nmi: one of 'ignore', 'dom0', or 'fatal'.
59 * fatal: Xen prints diagnostic message and then hangs.
60 * dom0: The NMI is virtualised to DOM0.
61 * ignore: The NMI error is cleared and ignored.
62 */
63 #ifdef NDEBUG
64 char opt_nmi[10] = "dom0";
65 #else
66 char opt_nmi[10] = "fatal";
67 #endif
68 string_param("nmi", opt_nmi);
70 /* Master table, used by all CPUs on x86/64, and by CPU0 on x86/32.*/
71 idt_entry_t idt_table[IDT_ENTRIES];
73 #define DECLARE_TRAP_HANDLER(_name) \
74 asmlinkage void _name(void); \
75 asmlinkage int do_ ## _name(struct cpu_user_regs *regs)
77 asmlinkage void nmi(void);
78 DECLARE_TRAP_HANDLER(divide_error);
79 DECLARE_TRAP_HANDLER(debug);
80 DECLARE_TRAP_HANDLER(int3);
81 DECLARE_TRAP_HANDLER(overflow);
82 DECLARE_TRAP_HANDLER(bounds);
83 DECLARE_TRAP_HANDLER(invalid_op);
84 DECLARE_TRAP_HANDLER(device_not_available);
85 DECLARE_TRAP_HANDLER(coprocessor_segment_overrun);
86 DECLARE_TRAP_HANDLER(invalid_TSS);
87 DECLARE_TRAP_HANDLER(segment_not_present);
88 DECLARE_TRAP_HANDLER(stack_segment);
89 DECLARE_TRAP_HANDLER(general_protection);
90 DECLARE_TRAP_HANDLER(page_fault);
91 DECLARE_TRAP_HANDLER(coprocessor_error);
92 DECLARE_TRAP_HANDLER(simd_coprocessor_error);
93 DECLARE_TRAP_HANDLER(alignment_check);
94 DECLARE_TRAP_HANDLER(spurious_interrupt_bug);
95 DECLARE_TRAP_HANDLER(machine_check);
97 long do_set_debugreg(int reg, unsigned long value);
98 unsigned long do_get_debugreg(int reg);
100 static int debug_stack_lines = 20;
101 integer_param("debug_stack_lines", debug_stack_lines);
103 static inline int kernel_text_address(unsigned long addr)
104 {
105 extern char _stext, _etext;
106 if (addr >= (unsigned long) &_stext &&
107 addr <= (unsigned long) &_etext)
108 return 1;
109 return 0;
111 }
113 void show_guest_stack(void)
114 {
115 int i;
116 struct cpu_user_regs *regs = guest_cpu_user_regs();
117 unsigned long *stack = (unsigned long *)regs->esp, addr;
119 printk("Guest stack trace from "__OP"sp=%p:\n ", stack);
121 for ( i = 0; i < (debug_stack_lines*8); i++ )
122 {
123 if ( ((long)stack & (STACK_SIZE-1)) == 0 )
124 break;
125 if ( get_user(addr, stack) )
126 {
127 if ( i != 0 )
128 printk("\n ");
129 printk("Fault while accessing guest memory.");
130 i = 1;
131 break;
132 }
133 if ( (i != 0) && ((i % 8) == 0) )
134 printk("\n ");
135 printk("%p ", _p(addr));
136 stack++;
137 }
138 if ( i == 0 )
139 printk("Stack empty.");
140 printk("\n");
141 }
143 void show_trace(unsigned long *esp)
144 {
145 unsigned long *stack = esp, addr;
146 int i = 0;
148 printk("Xen call trace from "__OP"sp=%p:\n ", stack);
150 while ( ((long) stack & (STACK_SIZE-1)) != 0 )
151 {
152 addr = *stack++;
153 if ( kernel_text_address(addr) )
154 {
155 if ( (i != 0) && ((i % 6) == 0) )
156 printk("\n ");
157 printk("[<%p>] ", _p(addr));
158 i++;
159 }
160 }
161 if ( i == 0 )
162 printk("Trace empty.");
163 printk("\n");
164 }
166 void show_stack(unsigned long *esp)
167 {
168 unsigned long *stack = esp, addr;
169 int i;
171 printk("Xen stack trace from "__OP"sp=%p:\n ", stack);
173 for ( i = 0; i < (debug_stack_lines*8); i++ )
174 {
175 if ( ((long)stack & (STACK_SIZE-1)) == 0 )
176 break;
177 if ( (i != 0) && ((i % 8) == 0) )
178 printk("\n ");
179 addr = *stack++;
180 if ( kernel_text_address(addr) )
181 printk("[%p] ", _p(addr));
182 else
183 printk("%p ", _p(addr));
184 }
185 if ( i == 0 )
186 printk("Stack empty.");
187 printk("\n");
189 show_trace(esp);
190 }
192 /*
193 * This is called for faults at very unexpected times (e.g., when interrupts
194 * are disabled). In such situations we can't do much that is safe. We try to
195 * print out some tracing and then we just spin.
196 */
197 asmlinkage void fatal_trap(int trapnr, struct cpu_user_regs *regs)
198 {
199 int cpu = smp_processor_id();
200 unsigned long cr2;
201 static char *trapstr[] = {
202 "divide error", "debug", "nmi", "bkpt", "overflow", "bounds",
203 "invalid operation", "device not available", "double fault",
204 "coprocessor segment", "invalid tss", "segment not found",
205 "stack error", "general protection fault", "page fault",
206 "spurious interrupt", "coprocessor error", "alignment check",
207 "machine check", "simd error"
208 };
210 watchdog_disable();
211 console_start_sync();
213 show_registers(regs);
215 if ( trapnr == TRAP_page_fault )
216 {
217 __asm__ __volatile__ ("mov %%cr2,%0" : "=r" (cr2) : );
218 printk("Faulting linear address: %p\n", _p(cr2));
219 show_page_walk(cr2);
220 }
222 printk("************************************\n");
223 printk("CPU%d FATAL TRAP %d (%s), ERROR_CODE %04x%s.\n",
224 cpu, trapnr, trapstr[trapnr], regs->error_code,
225 (regs->eflags & X86_EFLAGS_IF) ? "" : ", IN INTERRUPT CONTEXT");
226 printk("System shutting down -- need manual reset.\n");
227 printk("************************************\n");
229 (void)debugger_trap_fatal(trapnr, regs);
231 /* Lock up the console to prevent spurious output from other CPUs. */
232 console_force_lock();
234 /* Wait for manual reset. */
235 for ( ; ; )
236 __asm__ __volatile__ ( "hlt" );
237 }
239 static inline int do_trap(int trapnr, char *str,
240 struct cpu_user_regs *regs,
241 int use_error_code)
242 {
243 struct vcpu *v = current;
244 struct trap_bounce *tb = &v->arch.trap_bounce;
245 trap_info_t *ti;
246 unsigned long fixup;
248 DEBUGGER_trap_entry(trapnr, regs);
250 if ( !GUEST_MODE(regs) )
251 goto xen_fault;
253 ti = &current->arch.guest_context.trap_ctxt[trapnr];
254 tb->flags = TBF_EXCEPTION;
255 tb->cs = ti->cs;
256 tb->eip = ti->address;
257 if ( use_error_code )
258 {
259 tb->flags |= TBF_EXCEPTION_ERRCODE;
260 tb->error_code = regs->error_code;
261 }
262 if ( TI_GET_IF(ti) )
263 tb->flags |= TBF_INTERRUPT;
264 return 0;
266 xen_fault:
268 if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
269 {
270 DPRINTK("Trap %d: %p -> %p\n", trapnr, _p(regs->eip), _p(fixup));
271 regs->eip = fixup;
272 return 0;
273 }
275 DEBUGGER_trap_fatal(trapnr, regs);
277 show_registers(regs);
278 panic("CPU%d FATAL TRAP: vector = %d (%s)\n"
279 "[error_code=%04x]\n",
280 smp_processor_id(), trapnr, str, regs->error_code);
281 return 0;
282 }
284 #define DO_ERROR_NOCODE(trapnr, str, name) \
285 asmlinkage int do_##name(struct cpu_user_regs *regs) \
286 { \
287 return do_trap(trapnr, str, regs, 0); \
288 }
290 #define DO_ERROR(trapnr, str, name) \
291 asmlinkage int do_##name(struct cpu_user_regs *regs) \
292 { \
293 return do_trap(trapnr, str, regs, 1); \
294 }
296 DO_ERROR_NOCODE( 0, "divide error", divide_error)
297 DO_ERROR_NOCODE( 4, "overflow", overflow)
298 DO_ERROR_NOCODE( 5, "bounds", bounds)
299 DO_ERROR_NOCODE( 6, "invalid operand", invalid_op)
300 DO_ERROR_NOCODE( 9, "coprocessor segment overrun", coprocessor_segment_overrun)
301 DO_ERROR(10, "invalid TSS", invalid_TSS)
302 DO_ERROR(11, "segment not present", segment_not_present)
303 DO_ERROR(12, "stack segment", stack_segment)
304 DO_ERROR_NOCODE(16, "fpu error", coprocessor_error)
305 DO_ERROR(17, "alignment check", alignment_check)
306 DO_ERROR_NOCODE(19, "simd error", simd_coprocessor_error)
308 asmlinkage int do_int3(struct cpu_user_regs *regs)
309 {
310 struct vcpu *v = current;
311 struct trap_bounce *tb = &v->arch.trap_bounce;
312 trap_info_t *ti;
314 DEBUGGER_trap_entry(TRAP_int3, regs);
316 if ( !GUEST_MODE(regs) )
317 {
318 DEBUGGER_trap_fatal(TRAP_int3, regs);
319 show_registers(regs);
320 panic("CPU%d FATAL TRAP: vector = 3 (Int3)\n", smp_processor_id());
321 }
323 ti = &current->arch.guest_context.trap_ctxt[TRAP_int3];
324 tb->flags = TBF_EXCEPTION;
325 tb->cs = ti->cs;
326 tb->eip = ti->address;
327 if ( TI_GET_IF(ti) )
328 tb->flags |= TBF_INTERRUPT;
330 return 0;
331 }
333 asmlinkage int do_machine_check(struct cpu_user_regs *regs)
334 {
335 fatal_trap(TRAP_machine_check, regs);
336 return 0;
337 }
339 void propagate_page_fault(unsigned long addr, u16 error_code)
340 {
341 trap_info_t *ti;
342 struct vcpu *v = current;
343 struct trap_bounce *tb = &v->arch.trap_bounce;
345 ti = &v->arch.guest_context.trap_ctxt[TRAP_page_fault];
346 tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE | TBF_EXCEPTION_CR2;
347 tb->cr2 = addr;
348 tb->error_code = error_code;
349 tb->cs = ti->cs;
350 tb->eip = ti->address;
351 if ( TI_GET_IF(ti) )
352 tb->flags |= TBF_INTERRUPT;
354 v->arch.guest_context.ctrlreg[2] = addr;
355 }
357 static int handle_perdomain_mapping_fault(
358 unsigned long offset, struct cpu_user_regs *regs)
359 {
360 extern int map_ldt_shadow_page(unsigned int);
362 struct vcpu *v = current;
363 struct domain *d = v->domain;
364 int ret;
366 /* Which vcpu's area did we fault in, and is it in the ldt sub-area? */
367 unsigned int is_ldt_area = (offset >> (PDPT_VCPU_VA_SHIFT-1)) & 1;
368 unsigned int vcpu_area = (offset >> PDPT_VCPU_VA_SHIFT);
370 /* Should never fault in another vcpu's area. */
371 BUG_ON(vcpu_area != current->vcpu_id);
373 /* Byte offset within the gdt/ldt sub-area. */
374 offset &= (1UL << (PDPT_VCPU_VA_SHIFT-1)) - 1UL;
376 if ( likely(is_ldt_area) )
377 {
378 /* LDT fault: Copy a mapping from the guest's LDT, if it is valid. */
379 LOCK_BIGLOCK(d);
380 ret = map_ldt_shadow_page(offset >> PAGE_SHIFT);
381 UNLOCK_BIGLOCK(d);
383 if ( unlikely(ret == 0) )
384 {
385 /* In hypervisor mode? Leave it to the #PF handler to fix up. */
386 if ( !GUEST_MODE(regs) )
387 return 0;
388 /* In guest mode? Propagate #PF to guest, with adjusted %cr2. */
389 propagate_page_fault(
390 v->arch.guest_context.ldt_base + offset, regs->error_code);
391 }
392 }
393 else
394 {
395 /* GDT fault: handle the fault as #GP(selector). */
396 regs->error_code = (u16)offset & ~7;
397 (void)do_general_protection(regs);
398 }
400 return EXCRET_fault_fixed;
401 }
403 asmlinkage int do_page_fault(struct cpu_user_regs *regs)
404 {
405 unsigned long addr, fixup;
406 struct vcpu *v = current;
407 struct domain *d = v->domain;
409 __asm__ __volatile__ ("mov %%cr2,%0" : "=r" (addr) : );
411 DEBUGGER_trap_entry(TRAP_page_fault, regs);
413 perfc_incrc(page_faults);
415 if ( likely(VM_ASSIST(d, VMASST_TYPE_writable_pagetables) &&
416 !shadow_mode_enabled(d)) )
417 {
418 LOCK_BIGLOCK(d);
419 if ( unlikely(d->arch.ptwr[PTWR_PT_ACTIVE].l1va) &&
420 unlikely((addr >> L2_PAGETABLE_SHIFT) ==
421 d->arch.ptwr[PTWR_PT_ACTIVE].l2_idx) )
422 {
423 ptwr_flush(d, PTWR_PT_ACTIVE);
424 UNLOCK_BIGLOCK(d);
425 return EXCRET_fault_fixed;
426 }
428 if ( (addr < HYPERVISOR_VIRT_START) &&
429 KERNEL_MODE(v, regs) &&
430 ((regs->error_code & 3) == 3) && /* write-protection fault */
431 ptwr_do_page_fault(d, addr) )
432 {
433 UNLOCK_BIGLOCK(d);
434 return EXCRET_fault_fixed;
435 }
436 UNLOCK_BIGLOCK(d);
437 }
439 if ( unlikely(shadow_mode_enabled(d)) &&
440 ((addr < HYPERVISOR_VIRT_START) ||
441 #if defined(__x86_64__)
442 (addr >= HYPERVISOR_VIRT_END) ||
443 #endif
444 (shadow_mode_external(d) && GUEST_CONTEXT(v, regs))) &&
445 shadow_fault(addr, regs) )
446 return EXCRET_fault_fixed;
448 if ( unlikely(addr >= PERDOMAIN_VIRT_START) &&
449 unlikely(addr < PERDOMAIN_VIRT_END) &&
450 handle_perdomain_mapping_fault(addr - PERDOMAIN_VIRT_START, regs) )
451 return EXCRET_fault_fixed;
453 if ( !GUEST_MODE(regs) )
454 goto xen_fault;
456 propagate_page_fault(addr, regs->error_code);
457 return 0;
459 xen_fault:
461 if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
462 {
463 perfc_incrc(copy_user_faults);
464 if ( !shadow_mode_enabled(d) )
465 DPRINTK("Page fault: %p -> %p\n", _p(regs->eip), _p(fixup));
466 regs->eip = fixup;
467 return 0;
468 }
470 DEBUGGER_trap_fatal(TRAP_page_fault, regs);
472 show_registers(regs);
473 show_page_walk(addr);
474 panic("CPU%d FATAL PAGE FAULT\n"
475 "[error_code=%04x]\n"
476 "Faulting linear address: %p\n",
477 smp_processor_id(), regs->error_code, addr);
478 return 0;
479 }
481 long do_fpu_taskswitch(int set)
482 {
483 struct vcpu *v = current;
485 if ( set )
486 {
487 v->arch.guest_context.ctrlreg[0] |= X86_CR0_TS;
488 stts();
489 }
490 else
491 {
492 v->arch.guest_context.ctrlreg[0] &= ~X86_CR0_TS;
493 if ( test_bit(_VCPUF_fpu_dirtied, &v->vcpu_flags) )
494 clts();
495 }
497 return 0;
498 }
500 /* Has the guest requested sufficient permission for this I/O access? */
501 static inline int guest_io_okay(
502 unsigned int port, unsigned int bytes,
503 struct vcpu *v, struct cpu_user_regs *regs)
504 {
505 u16 x;
506 #if defined(__x86_64__)
507 /* If in user mode, switch to kernel mode just to read I/O bitmap. */
508 extern void toggle_guest_mode(struct vcpu *);
509 int user_mode = !(v->arch.flags & TF_kernel_mode);
510 #define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v)
511 #elif defined(__i386__)
512 #define TOGGLE_MODE() ((void)0)
513 #endif
515 if ( v->arch.iopl >= (KERNEL_MODE(v, regs) ? 1 : 3) )
516 return 1;
518 if ( v->arch.iobmp_limit > (port + bytes) )
519 {
520 TOGGLE_MODE();
521 __get_user(x, (u16 *)(v->arch.iobmp+(port>>3)));
522 TOGGLE_MODE();
523 if ( (x & (((1<<bytes)-1) << (port&7))) == 0 )
524 return 1;
525 }
527 return 0;
528 }
530 /* Has the administrator granted sufficient permission for this I/O access? */
531 static inline int admin_io_okay(
532 unsigned int port, unsigned int bytes,
533 struct vcpu *v, struct cpu_user_regs *regs)
534 {
535 struct domain *d = v->domain;
536 u16 x;
538 if ( d->arch.iobmp_mask != NULL )
539 {
540 x = *(u16 *)(d->arch.iobmp_mask + (port >> 3));
541 if ( (x & (((1<<bytes)-1) << (port&7))) == 0 )
542 return 1;
543 }
545 return 0;
546 }
548 /* Check admin limits. Silently fail the access if it is disallowed. */
549 #define inb_user(_p, _d, _r) (admin_io_okay(_p, 1, _d, _r) ? inb(_p) : ~0)
550 #define inw_user(_p, _d, _r) (admin_io_okay(_p, 2, _d, _r) ? inw(_p) : ~0)
551 #define inl_user(_p, _d, _r) (admin_io_okay(_p, 4, _d, _r) ? inl(_p) : ~0)
552 #define outb_user(_v, _p, _d, _r) \
553 (admin_io_okay(_p, 1, _d, _r) ? outb(_v, _p) : ((void)0))
554 #define outw_user(_v, _p, _d, _r) \
555 (admin_io_okay(_p, 2, _d, _r) ? outw(_v, _p) : ((void)0))
556 #define outl_user(_v, _p, _d, _r) \
557 (admin_io_okay(_p, 4, _d, _r) ? outl(_v, _p) : ((void)0))
559 /* Propagate a fault back to the guest kernel. */
560 #define USER_READ_FAULT 4 /* user mode, read fault */
561 #define USER_WRITE_FAULT 6 /* user mode, write fault */
562 #define PAGE_FAULT(_faultaddr, _errcode) \
563 ({ propagate_page_fault(_faultaddr, _errcode); \
564 return EXCRET_fault_fixed; \
565 })
567 /* Isntruction fetch with error handling. */
568 #define insn_fetch(_type, _size, _ptr) \
569 ({ unsigned long _x; \
570 if ( get_user(_x, (_type *)eip) ) \
571 PAGE_FAULT(eip, USER_READ_FAULT); \
572 eip += _size; (_type)_x; })
574 static int emulate_privileged_op(struct cpu_user_regs *regs)
575 {
576 struct vcpu *v = current;
577 unsigned long *reg, eip = regs->eip, res;
578 u8 opcode, modrm_reg = 0, modrm_rm = 0, rep_prefix = 0;
579 unsigned int port, i, op_bytes = 4, data;
581 /* Legacy prefixes. */
582 for ( i = 0; i < 8; i++ )
583 {
584 switch ( opcode = insn_fetch(u8, 1, eip) )
585 {
586 case 0x66: /* operand-size override */
587 op_bytes ^= 6; /* switch between 2/4 bytes */
588 break;
589 case 0x67: /* address-size override */
590 case 0x2e: /* CS override */
591 case 0x3e: /* DS override */
592 case 0x26: /* ES override */
593 case 0x64: /* FS override */
594 case 0x65: /* GS override */
595 case 0x36: /* SS override */
596 case 0xf0: /* LOCK */
597 case 0xf2: /* REPNE/REPNZ */
598 break;
599 case 0xf3: /* REP/REPE/REPZ */
600 rep_prefix = 1;
601 break;
602 default:
603 goto done_prefixes;
604 }
605 }
606 done_prefixes:
608 #ifdef __x86_64__
609 /* REX prefix. */
610 if ( (opcode & 0xf0) == 0x40 )
611 {
612 modrm_reg = (opcode & 4) << 1; /* REX.R */
613 modrm_rm = (opcode & 1) << 3; /* REX.B */
615 /* REX.W and REX.X do not need to be decoded. */
616 opcode = insn_fetch(u8, 1, eip);
617 }
618 #endif
620 /* Input/Output String instructions. */
621 if ( (opcode >= 0x6c) && (opcode <= 0x6f) )
622 {
623 if ( rep_prefix && (regs->ecx == 0) )
624 goto done;
626 continue_io_string:
627 switch ( opcode )
628 {
629 case 0x6c: /* INSB */
630 op_bytes = 1;
631 case 0x6d: /* INSW/INSL */
632 if ( !guest_io_okay((u16)regs->edx, op_bytes, v, regs) )
633 goto fail;
634 switch ( op_bytes )
635 {
636 case 1:
637 data = (u8)inb_user((u16)regs->edx, v, regs);
638 if ( put_user((u8)data, (u8 *)regs->edi) )
639 PAGE_FAULT(regs->edi, USER_WRITE_FAULT);
640 break;
641 case 2:
642 data = (u16)inw_user((u16)regs->edx, v, regs);
643 if ( put_user((u16)data, (u16 *)regs->edi) )
644 PAGE_FAULT(regs->edi, USER_WRITE_FAULT);
645 break;
646 case 4:
647 data = (u32)inl_user((u16)regs->edx, v, regs);
648 if ( put_user((u32)data, (u32 *)regs->edi) )
649 PAGE_FAULT(regs->edi, USER_WRITE_FAULT);
650 break;
651 }
652 regs->edi += (regs->eflags & EF_DF) ? -op_bytes : op_bytes;
653 break;
655 case 0x6e: /* OUTSB */
656 op_bytes = 1;
657 case 0x6f: /* OUTSW/OUTSL */
658 if ( !guest_io_okay((u16)regs->edx, op_bytes, v, regs) )
659 goto fail;
660 switch ( op_bytes )
661 {
662 case 1:
663 if ( get_user(data, (u8 *)regs->esi) )
664 PAGE_FAULT(regs->esi, USER_READ_FAULT);
665 outb_user((u8)data, (u16)regs->edx, v, regs);
666 break;
667 case 2:
668 if ( get_user(data, (u16 *)regs->esi) )
669 PAGE_FAULT(regs->esi, USER_READ_FAULT);
670 outw_user((u16)data, (u16)regs->edx, v, regs);
671 break;
672 case 4:
673 if ( get_user(data, (u32 *)regs->esi) )
674 PAGE_FAULT(regs->esi, USER_READ_FAULT);
675 outl_user((u32)data, (u16)regs->edx, v, regs);
676 break;
677 }
678 regs->esi += (regs->eflags & EF_DF) ? -op_bytes : op_bytes;
679 break;
680 }
682 if ( rep_prefix && (--regs->ecx != 0) )
683 {
684 if ( !hypercall_preempt_check() )
685 goto continue_io_string;
686 eip = regs->eip;
687 }
689 goto done;
690 }
692 /* I/O Port and Interrupt Flag instructions. */
693 switch ( opcode )
694 {
695 case 0xe4: /* IN imm8,%al */
696 op_bytes = 1;
697 case 0xe5: /* IN imm8,%eax */
698 port = insn_fetch(u8, 1, eip);
699 exec_in:
700 if ( !guest_io_okay(port, op_bytes, v, regs) )
701 goto fail;
702 switch ( op_bytes )
703 {
704 case 1:
705 regs->eax &= ~0xffUL;
706 regs->eax |= (u8)inb_user(port, v, regs);
707 break;
708 case 2:
709 regs->eax &= ~0xffffUL;
710 regs->eax |= (u16)inw_user(port, v, regs);
711 break;
712 case 4:
713 regs->eax = (u32)inl_user(port, v, regs);
714 break;
715 }
716 goto done;
718 case 0xec: /* IN %dx,%al */
719 op_bytes = 1;
720 case 0xed: /* IN %dx,%eax */
721 port = (u16)regs->edx;
722 goto exec_in;
724 case 0xe6: /* OUT %al,imm8 */
725 op_bytes = 1;
726 case 0xe7: /* OUT %eax,imm8 */
727 port = insn_fetch(u8, 1, eip);
728 exec_out:
729 if ( !guest_io_okay(port, op_bytes, v, regs) )
730 goto fail;
731 switch ( op_bytes )
732 {
733 case 1:
734 outb_user((u8)regs->eax, port, v, regs);
735 break;
736 case 2:
737 outw_user((u16)regs->eax, port, v, regs);
738 break;
739 case 4:
740 outl_user((u32)regs->eax, port, v, regs);
741 break;
742 }
743 goto done;
745 case 0xee: /* OUT %al,%dx */
746 op_bytes = 1;
747 case 0xef: /* OUT %eax,%dx */
748 port = (u16)regs->edx;
749 goto exec_out;
751 case 0xfa: /* CLI */
752 case 0xfb: /* STI */
753 if ( v->arch.iopl < (KERNEL_MODE(v, regs) ? 1 : 3) )
754 goto fail;
755 /*
756 * This is just too dangerous to allow, in my opinion. Consider if the
757 * caller then tries to reenable interrupts using POPF: we can't trap
758 * that and we'll end up with hard-to-debug lockups. Fast & loose will
759 * do for us. :-)
760 */
761 /*v->vcpu_info->evtchn_upcall_mask = (opcode == 0xfa);*/
762 goto done;
764 case 0x0f: /* Two-byte opcode */
765 break;
767 default:
768 goto fail;
769 }
771 /* Remaining instructions only emulated from guest kernel. */
772 if ( !KERNEL_MODE(v, regs) )
773 goto fail;
775 /* Privileged (ring 0) instructions. */
776 opcode = insn_fetch(u8, 1, eip);
777 switch ( opcode )
778 {
779 case 0x06: /* CLTS */
780 (void)do_fpu_taskswitch(0);
781 break;
783 case 0x09: /* WBINVD */
784 /* Ignore the instruction if unprivileged. */
785 if ( !IS_CAPABLE_PHYSDEV(v->domain) )
786 DPRINTK("Non-physdev domain attempted WBINVD.\n");
787 else
788 wbinvd();
789 break;
791 case 0x20: /* MOV CR?,<reg> */
792 opcode = insn_fetch(u8, 1, eip);
793 modrm_reg |= (opcode >> 3) & 7;
794 modrm_rm |= (opcode >> 0) & 7;
795 reg = decode_register(modrm_rm, regs, 0);
796 switch ( modrm_reg )
797 {
798 case 0: /* Read CR0 */
799 *reg = v->arch.guest_context.ctrlreg[0];
800 break;
802 case 2: /* Read CR2 */
803 *reg = v->arch.guest_context.ctrlreg[2];
804 break;
806 case 3: /* Read CR3 */
807 *reg = pagetable_get_paddr(v->arch.guest_table);
808 break;
810 default:
811 goto fail;
812 }
813 break;
815 case 0x21: /* MOV DR?,<reg> */
816 opcode = insn_fetch(u8, 1, eip);
817 modrm_reg |= (opcode >> 3) & 7;
818 modrm_rm |= (opcode >> 0) & 7;
819 reg = decode_register(modrm_rm, regs, 0);
820 if ( (res = do_get_debugreg(modrm_reg)) > (unsigned long)-256 )
821 goto fail;
822 *reg = res;
823 break;
825 case 0x22: /* MOV <reg>,CR? */
826 opcode = insn_fetch(u8, 1, eip);
827 modrm_reg |= (opcode >> 3) & 7;
828 modrm_rm |= (opcode >> 0) & 7;
829 reg = decode_register(modrm_rm, regs, 0);
830 switch ( modrm_reg )
831 {
832 case 0: /* Write CR0 */
833 (void)do_fpu_taskswitch(!!(*reg & X86_CR0_TS));
834 break;
836 case 2: /* Write CR2 */
837 v->arch.guest_context.ctrlreg[2] = *reg;
838 break;
840 case 3: /* Write CR3 */
841 LOCK_BIGLOCK(v->domain);
842 (void)new_guest_cr3(*reg);
843 UNLOCK_BIGLOCK(v->domain);
844 break;
846 default:
847 goto fail;
848 }
849 break;
851 case 0x23: /* MOV <reg>,DR? */
852 opcode = insn_fetch(u8, 1, eip);
853 modrm_reg |= (opcode >> 3) & 7;
854 modrm_rm |= (opcode >> 0) & 7;
855 reg = decode_register(modrm_rm, regs, 0);
856 if ( do_set_debugreg(modrm_reg, *reg) != 0 )
857 goto fail;
858 break;
860 case 0x30: /* WRMSR */
861 /* Ignore the instruction if unprivileged. */
862 if ( !IS_PRIV(v->domain) )
863 DPRINTK("Non-priv domain attempted WRMSR(%p,%08lx,%08lx).\n",
864 _p(regs->ecx), (long)regs->eax, (long)regs->edx);
865 else if ( wrmsr_user(regs->ecx, regs->eax, regs->edx) )
866 goto fail;
867 break;
869 case 0x32: /* RDMSR */
870 if ( !IS_PRIV(v->domain) )
871 DPRINTK("Non-priv domain attempted RDMSR(%p,%08lx,%08lx).\n",
872 _p(regs->ecx), (long)regs->eax, (long)regs->edx);
873 /* Everyone can read the MSR space. */
874 if ( rdmsr_user(regs->ecx, regs->eax, regs->edx) )
875 goto fail;
876 break;
878 default:
879 goto fail;
880 }
882 done:
883 regs->eip = eip;
884 return EXCRET_fault_fixed;
886 fail:
887 return 0;
888 }
890 asmlinkage int do_general_protection(struct cpu_user_regs *regs)
891 {
892 struct vcpu *v = current;
893 struct trap_bounce *tb = &v->arch.trap_bounce;
894 trap_info_t *ti;
895 unsigned long fixup;
897 DEBUGGER_trap_entry(TRAP_gp_fault, regs);
899 if ( regs->error_code & 1 )
900 goto hardware_gp;
902 if ( !GUEST_MODE(regs) )
903 goto gp_in_kernel;
905 /*
906 * Cunning trick to allow arbitrary "INT n" handling.
907 *
908 * We set DPL == 0 on all vectors in the IDT. This prevents any INT <n>
909 * instruction from trapping to the appropriate vector, when that might not
910 * be expected by Xen or the guest OS. For example, that entry might be for
911 * a fault handler (unlike traps, faults don't increment EIP), or might
912 * expect an error code on the stack (which a software trap never
913 * provides), or might be a hardware interrupt handler that doesn't like
914 * being called spuriously.
915 *
916 * Instead, a GPF occurs with the faulting IDT vector in the error code.
917 * Bit 1 is set to indicate that an IDT entry caused the fault. Bit 0 is
918 * clear to indicate that it's a software fault, not hardware.
919 *
920 * NOTE: Vectors 3 and 4 are dealt with from their own handler. This is
921 * okay because they can only be triggered by an explicit DPL-checked
922 * instruction. The DPL specified by the guest OS for these vectors is NOT
923 * CHECKED!!
924 */
925 if ( (regs->error_code & 3) == 2 )
926 {
927 /* This fault must be due to <INT n> instruction. */
928 ti = &current->arch.guest_context.trap_ctxt[regs->error_code>>3];
929 if ( PERMIT_SOFTINT(TI_GET_DPL(ti), v, regs) )
930 {
931 tb->flags = TBF_EXCEPTION;
932 regs->eip += 2;
933 goto finish_propagation;
934 }
935 }
937 /* Emulate some simple privileged and I/O instructions. */
938 if ( (regs->error_code == 0) &&
939 emulate_privileged_op(regs) )
940 return 0;
942 #if defined(__i386__)
943 if ( VM_ASSIST(v->domain, VMASST_TYPE_4gb_segments) &&
944 (regs->error_code == 0) &&
945 gpf_emulate_4gb(regs) )
946 return 0;
947 #endif
949 /* Pass on GPF as is. */
950 ti = &current->arch.guest_context.trap_ctxt[TRAP_gp_fault];
951 tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE;
952 tb->error_code = regs->error_code;
953 finish_propagation:
954 tb->cs = ti->cs;
955 tb->eip = ti->address;
956 if ( TI_GET_IF(ti) )
957 tb->flags |= TBF_INTERRUPT;
958 return 0;
960 gp_in_kernel:
962 if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
963 {
964 DPRINTK("GPF (%04x): %p -> %p\n",
965 regs->error_code, _p(regs->eip), _p(fixup));
966 regs->eip = fixup;
967 return 0;
968 }
970 DEBUGGER_trap_fatal(TRAP_gp_fault, regs);
972 hardware_gp:
973 show_registers(regs);
974 panic("CPU%d GENERAL PROTECTION FAULT\n[error_code=%04x]\n",
975 smp_processor_id(), regs->error_code);
976 return 0;
977 }
979 unsigned long nmi_softirq_reason;
980 static void nmi_softirq(void)
981 {
982 if ( dom0 == NULL )
983 return;
985 if ( test_and_clear_bit(0, &nmi_softirq_reason) )
986 send_guest_virq(dom0->vcpu[0], VIRQ_PARITY_ERR);
988 if ( test_and_clear_bit(1, &nmi_softirq_reason) )
989 send_guest_virq(dom0->vcpu[0], VIRQ_IO_ERR);
990 }
992 asmlinkage void mem_parity_error(struct cpu_user_regs *regs)
993 {
994 /* Clear and disable the parity-error line. */
995 outb((inb(0x61)&15)|4,0x61);
997 switch ( opt_nmi[0] )
998 {
999 case 'd': /* 'dom0' */
1000 set_bit(0, &nmi_softirq_reason);
1001 raise_softirq(NMI_SOFTIRQ);
1002 case 'i': /* 'ignore' */
1003 break;
1004 default: /* 'fatal' */
1005 console_force_unlock();
1006 printk("\n\nNMI - MEMORY ERROR\n");
1007 fatal_trap(TRAP_nmi, regs);
1011 asmlinkage void io_check_error(struct cpu_user_regs *regs)
1013 /* Clear and disable the I/O-error line. */
1014 outb((inb(0x61)&15)|8,0x61);
1016 switch ( opt_nmi[0] )
1018 case 'd': /* 'dom0' */
1019 set_bit(0, &nmi_softirq_reason);
1020 raise_softirq(NMI_SOFTIRQ);
1021 case 'i': /* 'ignore' */
1022 break;
1023 default: /* 'fatal' */
1024 console_force_unlock();
1025 printk("\n\nNMI - I/O ERROR\n");
1026 fatal_trap(TRAP_nmi, regs);
1030 static void unknown_nmi_error(unsigned char reason)
1032 printk("Uhhuh. NMI received for unknown reason %02x.\n", reason);
1033 printk("Dazed and confused, but trying to continue\n");
1034 printk("Do you have a strange power saving mode enabled?\n");
1037 asmlinkage void do_nmi(struct cpu_user_regs *regs, unsigned long reason)
1039 ++nmi_count(smp_processor_id());
1041 if ( nmi_watchdog )
1042 nmi_watchdog_tick(regs);
1044 if ( reason & 0x80 )
1045 mem_parity_error(regs);
1046 else if ( reason & 0x40 )
1047 io_check_error(regs);
1048 else if ( !nmi_watchdog )
1049 unknown_nmi_error((unsigned char)(reason&0xff));
1052 asmlinkage int math_state_restore(struct cpu_user_regs *regs)
1054 /* Prevent recursion. */
1055 clts();
1057 setup_fpu(current);
1059 if ( current->arch.guest_context.ctrlreg[0] & X86_CR0_TS )
1061 struct trap_bounce *tb = &current->arch.trap_bounce;
1062 tb->flags = TBF_EXCEPTION;
1063 tb->cs = current->arch.guest_context.trap_ctxt[7].cs;
1064 tb->eip = current->arch.guest_context.trap_ctxt[7].address;
1065 current->arch.guest_context.ctrlreg[0] &= ~X86_CR0_TS;
1068 return EXCRET_fault_fixed;
1071 asmlinkage int do_debug(struct cpu_user_regs *regs)
1073 unsigned long condition;
1074 struct vcpu *v = current;
1075 struct trap_bounce *tb = &v->arch.trap_bounce;
1077 __asm__ __volatile__("mov %%db6,%0" : "=r" (condition));
1079 /* Mask out spurious debug traps due to lazy DR7 setting */
1080 if ( (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) &&
1081 (v->arch.guest_context.debugreg[7] == 0) )
1083 __asm__("mov %0,%%db7" : : "r" (0UL));
1084 goto out;
1087 DEBUGGER_trap_entry(TRAP_debug, regs);
1089 if ( !GUEST_MODE(regs) )
1091 /* Clear TF just for absolute sanity. */
1092 regs->eflags &= ~EF_TF;
1093 /*
1094 * We ignore watchpoints when they trigger within Xen. This may happen
1095 * when a buffer is passed to us which previously had a watchpoint set
1096 * on it. No need to bump EIP; the only faulting trap is an instruction
1097 * breakpoint, which can't happen to us.
1098 */
1099 goto out;
1102 /* Save debug status register where guest OS can peek at it */
1103 v->arch.guest_context.debugreg[6] = condition;
1105 tb->flags = TBF_EXCEPTION;
1106 tb->cs = v->arch.guest_context.trap_ctxt[TRAP_debug].cs;
1107 tb->eip = v->arch.guest_context.trap_ctxt[TRAP_debug].address;
1109 out:
1110 return EXCRET_not_a_fault;
1113 asmlinkage int do_spurious_interrupt_bug(struct cpu_user_regs *regs)
1115 return EXCRET_not_a_fault;
1118 void set_intr_gate(unsigned int n, void *addr)
1120 #ifdef __i386__
1121 int i;
1122 /* Keep secondary tables in sync with IRQ updates. */
1123 for ( i = 1; i < NR_CPUS; i++ )
1124 if ( idt_tables[i] != NULL )
1125 _set_gate(&idt_tables[i][n], 14, 0, addr);
1126 #endif
1127 _set_gate(&idt_table[n], 14, 0, addr);
1130 void set_system_gate(unsigned int n, void *addr)
1132 _set_gate(idt_table+n,14,3,addr);
1135 void set_task_gate(unsigned int n, unsigned int sel)
1137 idt_table[n].a = sel << 16;
1138 idt_table[n].b = 0x8500;
1141 void set_tss_desc(unsigned int n, void *addr)
1143 _set_tssldt_desc(
1144 gdt_table + __TSS(n) - FIRST_RESERVED_GDT_ENTRY,
1145 (unsigned long)addr,
1146 offsetof(struct tss_struct, __cacheline_filler) - 1,
1147 9);
1150 void __init trap_init(void)
1152 extern void percpu_traps_init(void);
1153 extern void cpu_init(void);
1155 /*
1156 * Note that interrupt gates are always used, rather than trap gates. We
1157 * must have interrupts disabled until DS/ES/FS/GS are saved because the
1158 * first activation must have the "bad" value(s) for these registers and
1159 * we may lose them if another activation is installed before they are
1160 * saved. The page-fault handler also needs interrupts disabled until %cr2
1161 * has been read and saved on the stack.
1162 */
1163 set_intr_gate(TRAP_divide_error,&divide_error);
1164 set_intr_gate(TRAP_debug,&debug);
1165 set_intr_gate(TRAP_nmi,&nmi);
1166 set_system_gate(TRAP_int3,&int3); /* usable from all privileges */
1167 set_system_gate(TRAP_overflow,&overflow); /* usable from all privileges */
1168 set_intr_gate(TRAP_bounds,&bounds);
1169 set_intr_gate(TRAP_invalid_op,&invalid_op);
1170 set_intr_gate(TRAP_no_device,&device_not_available);
1171 set_intr_gate(TRAP_copro_seg,&coprocessor_segment_overrun);
1172 set_intr_gate(TRAP_invalid_tss,&invalid_TSS);
1173 set_intr_gate(TRAP_no_segment,&segment_not_present);
1174 set_intr_gate(TRAP_stack_error,&stack_segment);
1175 set_intr_gate(TRAP_gp_fault,&general_protection);
1176 set_intr_gate(TRAP_page_fault,&page_fault);
1177 set_intr_gate(TRAP_spurious_int,&spurious_interrupt_bug);
1178 set_intr_gate(TRAP_copro_error,&coprocessor_error);
1179 set_intr_gate(TRAP_alignment_check,&alignment_check);
1180 set_intr_gate(TRAP_machine_check,&machine_check);
1181 set_intr_gate(TRAP_simd_error,&simd_coprocessor_error);
1183 percpu_traps_init();
1185 cpu_init();
1187 open_softirq(NMI_SOFTIRQ, nmi_softirq);
1191 long do_set_trap_table(trap_info_t *traps)
1193 trap_info_t cur;
1194 trap_info_t *dst = current->arch.guest_context.trap_ctxt;
1195 long rc = 0;
1197 LOCK_BIGLOCK(current->domain);
1199 for ( ; ; )
1201 if ( hypercall_preempt_check() )
1203 rc = hypercall1_create_continuation(
1204 __HYPERVISOR_set_trap_table, traps);
1205 break;
1208 if ( copy_from_user(&cur, traps, sizeof(cur)) )
1210 rc = -EFAULT;
1211 break;
1214 if ( cur.address == 0 )
1215 break;
1217 if ( !VALID_CODESEL(cur.cs) )
1219 rc = -EPERM;
1220 break;
1223 memcpy(&dst[cur.vector], &cur, sizeof(cur));
1225 if ( cur.vector == 0x80 )
1226 init_int80_direct_trap(current);
1228 traps++;
1231 UNLOCK_BIGLOCK(current->domain);
1233 return rc;
1237 long set_debugreg(struct vcpu *p, int reg, unsigned long value)
1239 int i;
1241 switch ( reg )
1243 case 0:
1244 if ( !access_ok(value, sizeof(long)) )
1245 return -EPERM;
1246 if ( p == current )
1247 __asm__ ( "mov %0, %%db0" : : "r" (value) );
1248 break;
1249 case 1:
1250 if ( !access_ok(value, sizeof(long)) )
1251 return -EPERM;
1252 if ( p == current )
1253 __asm__ ( "mov %0, %%db1" : : "r" (value) );
1254 break;
1255 case 2:
1256 if ( !access_ok(value, sizeof(long)) )
1257 return -EPERM;
1258 if ( p == current )
1259 __asm__ ( "mov %0, %%db2" : : "r" (value) );
1260 break;
1261 case 3:
1262 if ( !access_ok(value, sizeof(long)) )
1263 return -EPERM;
1264 if ( p == current )
1265 __asm__ ( "mov %0, %%db3" : : "r" (value) );
1266 break;
1267 case 6:
1268 /*
1269 * DR6: Bits 4-11,16-31 reserved (set to 1).
1270 * Bit 12 reserved (set to 0).
1271 */
1272 value &= 0xffffefff; /* reserved bits => 0 */
1273 value |= 0xffff0ff0; /* reserved bits => 1 */
1274 if ( p == current )
1275 __asm__ ( "mov %0, %%db6" : : "r" (value) );
1276 break;
1277 case 7:
1278 /*
1279 * DR7: Bit 10 reserved (set to 1).
1280 * Bits 11-12,14-15 reserved (set to 0).
1281 * Privileged bits:
1282 * GD (bit 13): must be 0.
1283 * R/Wn (bits 16-17,20-21,24-25,28-29): mustn't be 10.
1284 * LENn (bits 18-19,22-23,26-27,30-31): mustn't be 10.
1285 */
1286 /* DR7 == 0 => debugging disabled for this domain. */
1287 if ( value != 0 )
1289 value &= 0xffff27ff; /* reserved bits => 0 */
1290 value |= 0x00000400; /* reserved bits => 1 */
1291 if ( (value & (1<<13)) != 0 ) return -EPERM;
1292 for ( i = 0; i < 16; i += 2 )
1293 if ( ((value >> (i+16)) & 3) == 2 ) return -EPERM;
1295 if ( p == current )
1296 __asm__ ( "mov %0, %%db7" : : "r" (value) );
1297 break;
1298 default:
1299 return -EINVAL;
1302 p->arch.guest_context.debugreg[reg] = value;
1303 return 0;
1306 long do_set_debugreg(int reg, unsigned long value)
1308 return set_debugreg(current, reg, value);
1311 unsigned long do_get_debugreg(int reg)
1313 if ( (reg < 0) || (reg > 7) ) return -EINVAL;
1314 return current->arch.guest_context.debugreg[reg];
1317 /*
1318 * Local variables:
1319 * mode: C
1320 * c-set-style: "BSD"
1321 * c-basic-offset: 4
1322 * tab-width: 4
1323 * indent-tabs-mode: nil
1324 * End:
1325 */