ia64/xen-unstable

view xen/arch/x86/traps.c @ 6552:a9873d384da4

Merge.
author adsharma@los-vmm.sc.intel.com
date Thu Aug 25 12:24:48 2005 -0700 (2005-08-25)
parents 112d44270733 fa0754a9f64f
children dfaf788ab18c
line source
1 /******************************************************************************
2 * arch/x86/traps.c
3 *
4 * Modifications to Linux original are copyright (c) 2002-2004, K A Fraser
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
21 /*
22 * Copyright (C) 1991, 1992 Linus Torvalds
23 *
24 * Pentium III FXSR, SSE support
25 * Gareth Hughes <gareth@valinux.com>, May 2000
26 */
28 #include <xen/config.h>
29 #include <xen/init.h>
30 #include <xen/sched.h>
31 #include <xen/lib.h>
32 #include <xen/errno.h>
33 #include <xen/mm.h>
34 #include <xen/console.h>
35 #include <asm/regs.h>
36 #include <xen/delay.h>
37 #include <xen/event.h>
38 #include <xen/spinlock.h>
39 #include <xen/irq.h>
40 #include <xen/perfc.h>
41 #include <xen/softirq.h>
42 #include <xen/domain_page.h>
43 #include <xen/symbols.h>
44 #include <asm/shadow.h>
45 #include <asm/system.h>
46 #include <asm/io.h>
47 #include <asm/atomic.h>
48 #include <asm/desc.h>
49 #include <asm/debugreg.h>
50 #include <asm/smp.h>
51 #include <asm/flushtlb.h>
52 #include <asm/uaccess.h>
53 #include <asm/i387.h>
54 #include <asm/debugger.h>
55 #include <asm/msr.h>
56 #include <asm/x86_emulate.h>
58 /*
59 * opt_nmi: one of 'ignore', 'dom0', or 'fatal'.
60 * fatal: Xen prints diagnostic message and then hangs.
61 * dom0: The NMI is virtualised to DOM0.
62 * ignore: The NMI error is cleared and ignored.
63 */
64 #ifdef NDEBUG
65 char opt_nmi[10] = "dom0";
66 #else
67 char opt_nmi[10] = "fatal";
68 #endif
69 string_param("nmi", opt_nmi);
71 /* Master table, used by all CPUs on x86/64, and by CPU0 on x86/32.*/
72 idt_entry_t idt_table[IDT_ENTRIES];
74 #define DECLARE_TRAP_HANDLER(_name) \
75 asmlinkage void _name(void); \
76 asmlinkage int do_ ## _name(struct cpu_user_regs *regs)
78 asmlinkage void nmi(void);
79 DECLARE_TRAP_HANDLER(divide_error);
80 DECLARE_TRAP_HANDLER(debug);
81 DECLARE_TRAP_HANDLER(int3);
82 DECLARE_TRAP_HANDLER(overflow);
83 DECLARE_TRAP_HANDLER(bounds);
84 DECLARE_TRAP_HANDLER(invalid_op);
85 DECLARE_TRAP_HANDLER(device_not_available);
86 DECLARE_TRAP_HANDLER(coprocessor_segment_overrun);
87 DECLARE_TRAP_HANDLER(invalid_TSS);
88 DECLARE_TRAP_HANDLER(segment_not_present);
89 DECLARE_TRAP_HANDLER(stack_segment);
90 DECLARE_TRAP_HANDLER(general_protection);
91 DECLARE_TRAP_HANDLER(page_fault);
92 DECLARE_TRAP_HANDLER(coprocessor_error);
93 DECLARE_TRAP_HANDLER(simd_coprocessor_error);
94 DECLARE_TRAP_HANDLER(alignment_check);
95 DECLARE_TRAP_HANDLER(spurious_interrupt_bug);
96 DECLARE_TRAP_HANDLER(machine_check);
98 long do_set_debugreg(int reg, unsigned long value);
99 unsigned long do_get_debugreg(int reg);
101 static int debug_stack_lines = 20;
102 integer_param("debug_stack_lines", debug_stack_lines);
104 int is_kernel_text(unsigned long addr)
105 {
106 extern char _stext, _etext;
107 if (addr >= (unsigned long) &_stext &&
108 addr <= (unsigned long) &_etext)
109 return 1;
110 return 0;
112 }
114 unsigned long kernel_text_end(void)
115 {
116 extern char _etext;
117 return (unsigned long) &_etext;
118 }
120 void show_guest_stack(void)
121 {
122 int i;
123 struct cpu_user_regs *regs = guest_cpu_user_regs();
124 unsigned long *stack = (unsigned long *)regs->esp, addr;
126 printk("Guest stack trace from "__OP"sp=%p:\n ", stack);
128 for ( i = 0; i < (debug_stack_lines*8); i++ )
129 {
130 if ( ((long)stack & (STACK_SIZE-1)) == 0 )
131 break;
132 if ( get_user(addr, stack) )
133 {
134 if ( i != 0 )
135 printk("\n ");
136 printk("Fault while accessing guest memory.");
137 i = 1;
138 break;
139 }
140 if ( (i != 0) && ((i % 8) == 0) )
141 printk("\n ");
142 printk("%p ", _p(addr));
143 stack++;
144 }
145 if ( i == 0 )
146 printk("Stack empty.");
147 printk("\n");
148 }
150 void show_trace(unsigned long *esp)
151 {
152 unsigned long *stack = esp, addr;
153 int i = 0;
155 printk("Xen call trace from "__OP"sp=%p:\n ", stack);
157 while ( ((long) stack & (STACK_SIZE-1)) != 0 )
158 {
159 addr = *stack++;
160 if ( is_kernel_text(addr) )
161 {
162 printk("[<%p>]", _p(addr));
163 print_symbol(" %s\n ", addr);
164 i++;
165 }
166 }
167 if ( i == 0 )
168 printk("Trace empty.");
169 printk("\n");
170 }
172 void show_stack(unsigned long *esp)
173 {
174 unsigned long *stack = esp, addr;
175 int i;
177 printk("Xen stack trace from "__OP"sp=%p:\n ", stack);
179 for ( i = 0; i < (debug_stack_lines*8); i++ )
180 {
181 if ( ((long)stack & (STACK_SIZE-1)) == 0 )
182 break;
183 if ( (i != 0) && ((i % 8) == 0) )
184 printk("\n ");
185 addr = *stack++;
186 printk("%p ", _p(addr));
187 }
188 if ( i == 0 )
189 printk("Stack empty.");
190 printk("\n");
192 show_trace(esp);
193 }
195 /*
196 * This is called for faults at very unexpected times (e.g., when interrupts
197 * are disabled). In such situations we can't do much that is safe. We try to
198 * print out some tracing and then we just spin.
199 */
200 asmlinkage void fatal_trap(int trapnr, struct cpu_user_regs *regs)
201 {
202 int cpu = smp_processor_id();
203 unsigned long cr2;
204 static char *trapstr[] = {
205 "divide error", "debug", "nmi", "bkpt", "overflow", "bounds",
206 "invalid operation", "device not available", "double fault",
207 "coprocessor segment", "invalid tss", "segment not found",
208 "stack error", "general protection fault", "page fault",
209 "spurious interrupt", "coprocessor error", "alignment check",
210 "machine check", "simd error"
211 };
213 watchdog_disable();
214 console_start_sync();
216 show_registers(regs);
218 if ( trapnr == TRAP_page_fault )
219 {
220 __asm__ __volatile__ ("mov %%cr2,%0" : "=r" (cr2) : );
221 printk("Faulting linear address: %p\n", _p(cr2));
222 show_page_walk(cr2);
223 }
225 printk("************************************\n");
226 printk("CPU%d FATAL TRAP %d (%s), ERROR_CODE %04x%s.\n",
227 cpu, trapnr, trapstr[trapnr], regs->error_code,
228 (regs->eflags & X86_EFLAGS_IF) ? "" : ", IN INTERRUPT CONTEXT");
229 printk("System shutting down -- need manual reset.\n");
230 printk("************************************\n");
232 (void)debugger_trap_fatal(trapnr, regs);
234 /* Lock up the console to prevent spurious output from other CPUs. */
235 console_force_lock();
237 /* Wait for manual reset. */
238 for ( ; ; )
239 __asm__ __volatile__ ( "hlt" );
240 }
242 static inline int do_trap(int trapnr, char *str,
243 struct cpu_user_regs *regs,
244 int use_error_code)
245 {
246 struct vcpu *v = current;
247 struct trap_bounce *tb = &v->arch.trap_bounce;
248 trap_info_t *ti;
249 unsigned long fixup;
251 DEBUGGER_trap_entry(trapnr, regs);
253 if ( !GUEST_MODE(regs) )
254 goto xen_fault;
256 ti = &current->arch.guest_context.trap_ctxt[trapnr];
257 tb->flags = TBF_EXCEPTION;
258 tb->cs = ti->cs;
259 tb->eip = ti->address;
260 if ( use_error_code )
261 {
262 tb->flags |= TBF_EXCEPTION_ERRCODE;
263 tb->error_code = regs->error_code;
264 }
265 if ( TI_GET_IF(ti) )
266 tb->flags |= TBF_INTERRUPT;
267 return 0;
269 xen_fault:
271 if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
272 {
273 DPRINTK("Trap %d: %p -> %p\n", trapnr, _p(regs->eip), _p(fixup));
274 regs->eip = fixup;
275 return 0;
276 }
278 DEBUGGER_trap_fatal(trapnr, regs);
280 show_registers(regs);
281 panic("CPU%d FATAL TRAP: vector = %d (%s)\n"
282 "[error_code=%04x]\n",
283 smp_processor_id(), trapnr, str, regs->error_code);
284 return 0;
285 }
287 #define DO_ERROR_NOCODE(trapnr, str, name) \
288 asmlinkage int do_##name(struct cpu_user_regs *regs) \
289 { \
290 return do_trap(trapnr, str, regs, 0); \
291 }
293 #define DO_ERROR(trapnr, str, name) \
294 asmlinkage int do_##name(struct cpu_user_regs *regs) \
295 { \
296 return do_trap(trapnr, str, regs, 1); \
297 }
299 DO_ERROR_NOCODE( 0, "divide error", divide_error)
300 DO_ERROR_NOCODE( 4, "overflow", overflow)
301 DO_ERROR_NOCODE( 5, "bounds", bounds)
302 DO_ERROR_NOCODE( 6, "invalid operand", invalid_op)
303 DO_ERROR_NOCODE( 9, "coprocessor segment overrun", coprocessor_segment_overrun)
304 DO_ERROR(10, "invalid TSS", invalid_TSS)
305 DO_ERROR(11, "segment not present", segment_not_present)
306 DO_ERROR(12, "stack segment", stack_segment)
307 DO_ERROR_NOCODE(16, "fpu error", coprocessor_error)
308 DO_ERROR(17, "alignment check", alignment_check)
309 DO_ERROR_NOCODE(19, "simd error", simd_coprocessor_error)
311 asmlinkage int do_int3(struct cpu_user_regs *regs)
312 {
313 struct vcpu *v = current;
314 struct trap_bounce *tb = &v->arch.trap_bounce;
315 trap_info_t *ti;
317 DEBUGGER_trap_entry(TRAP_int3, regs);
319 if ( !GUEST_MODE(regs) )
320 {
321 DEBUGGER_trap_fatal(TRAP_int3, regs);
322 show_registers(regs);
323 panic("CPU%d FATAL TRAP: vector = 3 (Int3)\n", smp_processor_id());
324 }
326 ti = &current->arch.guest_context.trap_ctxt[TRAP_int3];
327 tb->flags = TBF_EXCEPTION;
328 tb->cs = ti->cs;
329 tb->eip = ti->address;
330 if ( TI_GET_IF(ti) )
331 tb->flags |= TBF_INTERRUPT;
333 return 0;
334 }
336 asmlinkage int do_machine_check(struct cpu_user_regs *regs)
337 {
338 fatal_trap(TRAP_machine_check, regs);
339 return 0;
340 }
342 void propagate_page_fault(unsigned long addr, u16 error_code)
343 {
344 trap_info_t *ti;
345 struct vcpu *v = current;
346 struct trap_bounce *tb = &v->arch.trap_bounce;
348 ti = &v->arch.guest_context.trap_ctxt[TRAP_page_fault];
349 tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE | TBF_EXCEPTION_CR2;
350 tb->cr2 = addr;
351 tb->error_code = error_code;
352 tb->cs = ti->cs;
353 tb->eip = ti->address;
354 if ( TI_GET_IF(ti) )
355 tb->flags |= TBF_INTERRUPT;
357 v->arch.guest_context.ctrlreg[2] = addr;
358 }
360 static int handle_perdomain_mapping_fault(
361 unsigned long offset, struct cpu_user_regs *regs)
362 {
363 extern int map_ldt_shadow_page(unsigned int);
365 struct vcpu *v = current;
366 struct domain *d = v->domain;
367 int ret;
369 /* Which vcpu's area did we fault in, and is it in the ldt sub-area? */
370 unsigned int is_ldt_area = (offset >> (PDPT_VCPU_VA_SHIFT-1)) & 1;
371 unsigned int vcpu_area = (offset >> PDPT_VCPU_VA_SHIFT);
373 /* Should never fault in another vcpu's area. */
374 BUG_ON(vcpu_area != current->vcpu_id);
376 /* Byte offset within the gdt/ldt sub-area. */
377 offset &= (1UL << (PDPT_VCPU_VA_SHIFT-1)) - 1UL;
379 if ( likely(is_ldt_area) )
380 {
381 /* LDT fault: Copy a mapping from the guest's LDT, if it is valid. */
382 LOCK_BIGLOCK(d);
383 ret = map_ldt_shadow_page(offset >> PAGE_SHIFT);
384 UNLOCK_BIGLOCK(d);
386 if ( unlikely(ret == 0) )
387 {
388 /* In hypervisor mode? Leave it to the #PF handler to fix up. */
389 if ( !GUEST_MODE(regs) )
390 return 0;
391 /* In guest mode? Propagate #PF to guest, with adjusted %cr2. */
392 propagate_page_fault(
393 v->arch.guest_context.ldt_base + offset, regs->error_code);
394 }
395 }
396 else
397 {
398 /* GDT fault: handle the fault as #GP(selector). */
399 regs->error_code = (u16)offset & ~7;
400 (void)do_general_protection(regs);
401 }
403 return EXCRET_fault_fixed;
404 }
406 asmlinkage int do_page_fault(struct cpu_user_regs *regs)
407 {
408 unsigned long addr, fixup;
409 struct vcpu *v = current;
410 struct domain *d = v->domain;
412 __asm__ __volatile__ ("mov %%cr2,%0" : "=r" (addr) : );
414 DEBUGGER_trap_entry(TRAP_page_fault, regs);
416 perfc_incrc(page_faults);
418 if ( likely(VM_ASSIST(d, VMASST_TYPE_writable_pagetables) &&
419 !shadow_mode_enabled(d)) )
420 {
421 LOCK_BIGLOCK(d);
422 if ( unlikely(d->arch.ptwr[PTWR_PT_ACTIVE].l1va) &&
423 unlikely(l2_linear_offset(addr) ==
424 d->arch.ptwr[PTWR_PT_ACTIVE].l2_idx) )
425 {
426 ptwr_flush(d, PTWR_PT_ACTIVE);
427 UNLOCK_BIGLOCK(d);
428 return EXCRET_fault_fixed;
429 }
431 if ( ((addr < HYPERVISOR_VIRT_START)
432 #if defined(__x86_64__)
433 || (addr >= HYPERVISOR_VIRT_END)
434 #endif
435 )
436 &&
437 KERNEL_MODE(v, regs) &&
438 ((regs->error_code & 3) == 3) && /* write-protection fault */
439 ptwr_do_page_fault(d, addr, regs) )
440 {
441 UNLOCK_BIGLOCK(d);
442 return EXCRET_fault_fixed;
443 }
444 UNLOCK_BIGLOCK(d);
445 }
447 if ( unlikely(shadow_mode_enabled(d)) &&
448 ((addr < HYPERVISOR_VIRT_START) ||
449 #if defined(__x86_64__)
450 (addr >= HYPERVISOR_VIRT_END) ||
451 #endif
452 (shadow_mode_external(d) && GUEST_CONTEXT(v, regs))) &&
453 shadow_fault(addr, regs) )
454 return EXCRET_fault_fixed;
456 if ( unlikely(addr >= PERDOMAIN_VIRT_START) &&
457 unlikely(addr < PERDOMAIN_VIRT_END) &&
458 handle_perdomain_mapping_fault(addr - PERDOMAIN_VIRT_START, regs) )
459 return EXCRET_fault_fixed;
461 if ( !GUEST_MODE(regs) )
462 goto xen_fault;
464 propagate_page_fault(addr, regs->error_code);
465 return 0;
467 xen_fault:
469 if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
470 {
471 perfc_incrc(copy_user_faults);
472 regs->eip = fixup;
473 return 0;
474 }
476 DEBUGGER_trap_fatal(TRAP_page_fault, regs);
478 show_registers(regs);
479 show_page_walk(addr);
480 panic("CPU%d FATAL PAGE FAULT\n"
481 "[error_code=%04x]\n"
482 "Faulting linear address: %p\n",
483 smp_processor_id(), regs->error_code, addr);
484 return 0;
485 }
487 long do_fpu_taskswitch(int set)
488 {
489 struct vcpu *v = current;
491 if ( set )
492 {
493 v->arch.guest_context.ctrlreg[0] |= X86_CR0_TS;
494 stts();
495 }
496 else
497 {
498 v->arch.guest_context.ctrlreg[0] &= ~X86_CR0_TS;
499 if ( test_bit(_VCPUF_fpu_dirtied, &v->vcpu_flags) )
500 clts();
501 }
503 return 0;
504 }
506 /* Has the guest requested sufficient permission for this I/O access? */
507 static inline int guest_io_okay(
508 unsigned int port, unsigned int bytes,
509 struct vcpu *v, struct cpu_user_regs *regs)
510 {
511 u16 x;
512 #if defined(__x86_64__)
513 /* If in user mode, switch to kernel mode just to read I/O bitmap. */
514 extern void toggle_guest_mode(struct vcpu *);
515 int user_mode = !(v->arch.flags & TF_kernel_mode);
516 #define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v)
517 #elif defined(__i386__)
518 #define TOGGLE_MODE() ((void)0)
519 #endif
521 if ( v->arch.iopl >= (KERNEL_MODE(v, regs) ? 1 : 3) )
522 return 1;
524 if ( v->arch.iobmp_limit > (port + bytes) )
525 {
526 TOGGLE_MODE();
527 __get_user(x, (u16 *)(v->arch.iobmp+(port>>3)));
528 TOGGLE_MODE();
529 if ( (x & (((1<<bytes)-1) << (port&7))) == 0 )
530 return 1;
531 }
533 return 0;
534 }
536 /* Has the administrator granted sufficient permission for this I/O access? */
537 static inline int admin_io_okay(
538 unsigned int port, unsigned int bytes,
539 struct vcpu *v, struct cpu_user_regs *regs)
540 {
541 struct domain *d = v->domain;
542 u16 x;
544 if ( d->arch.iobmp_mask != NULL )
545 {
546 x = *(u16 *)(d->arch.iobmp_mask + (port >> 3));
547 if ( (x & (((1<<bytes)-1) << (port&7))) == 0 )
548 return 1;
549 }
551 return 0;
552 }
554 /* Check admin limits. Silently fail the access if it is disallowed. */
555 #define inb_user(_p, _d, _r) (admin_io_okay(_p, 1, _d, _r) ? inb(_p) : ~0)
556 #define inw_user(_p, _d, _r) (admin_io_okay(_p, 2, _d, _r) ? inw(_p) : ~0)
557 #define inl_user(_p, _d, _r) (admin_io_okay(_p, 4, _d, _r) ? inl(_p) : ~0)
558 #define outb_user(_v, _p, _d, _r) \
559 (admin_io_okay(_p, 1, _d, _r) ? outb(_v, _p) : ((void)0))
560 #define outw_user(_v, _p, _d, _r) \
561 (admin_io_okay(_p, 2, _d, _r) ? outw(_v, _p) : ((void)0))
562 #define outl_user(_v, _p, _d, _r) \
563 (admin_io_okay(_p, 4, _d, _r) ? outl(_v, _p) : ((void)0))
565 /* Propagate a fault back to the guest kernel. */
566 #define USER_READ_FAULT 4 /* user mode, read fault */
567 #define USER_WRITE_FAULT 6 /* user mode, write fault */
568 #define PAGE_FAULT(_faultaddr, _errcode) \
569 ({ propagate_page_fault(_faultaddr, _errcode); \
570 return EXCRET_fault_fixed; \
571 })
573 /* Isntruction fetch with error handling. */
574 #define insn_fetch(_type, _size, _ptr) \
575 ({ unsigned long _x; \
576 if ( get_user(_x, (_type *)eip) ) \
577 PAGE_FAULT(eip, USER_READ_FAULT); \
578 eip += _size; (_type)_x; })
580 static int emulate_privileged_op(struct cpu_user_regs *regs)
581 {
582 struct vcpu *v = current;
583 unsigned long *reg, eip = regs->eip, res;
584 u8 opcode, modrm_reg = 0, modrm_rm = 0, rep_prefix = 0;
585 unsigned int port, i, op_bytes = 4, data;
587 /* Legacy prefixes. */
588 for ( i = 0; i < 8; i++ )
589 {
590 switch ( opcode = insn_fetch(u8, 1, eip) )
591 {
592 case 0x66: /* operand-size override */
593 op_bytes ^= 6; /* switch between 2/4 bytes */
594 break;
595 case 0x67: /* address-size override */
596 case 0x2e: /* CS override */
597 case 0x3e: /* DS override */
598 case 0x26: /* ES override */
599 case 0x64: /* FS override */
600 case 0x65: /* GS override */
601 case 0x36: /* SS override */
602 case 0xf0: /* LOCK */
603 case 0xf2: /* REPNE/REPNZ */
604 break;
605 case 0xf3: /* REP/REPE/REPZ */
606 rep_prefix = 1;
607 break;
608 default:
609 goto done_prefixes;
610 }
611 }
612 done_prefixes:
614 #ifdef __x86_64__
615 /* REX prefix. */
616 if ( (opcode & 0xf0) == 0x40 )
617 {
618 modrm_reg = (opcode & 4) << 1; /* REX.R */
619 modrm_rm = (opcode & 1) << 3; /* REX.B */
621 /* REX.W and REX.X do not need to be decoded. */
622 opcode = insn_fetch(u8, 1, eip);
623 }
624 #endif
626 /* Input/Output String instructions. */
627 if ( (opcode >= 0x6c) && (opcode <= 0x6f) )
628 {
629 if ( rep_prefix && (regs->ecx == 0) )
630 goto done;
632 continue_io_string:
633 switch ( opcode )
634 {
635 case 0x6c: /* INSB */
636 op_bytes = 1;
637 case 0x6d: /* INSW/INSL */
638 if ( !guest_io_okay((u16)regs->edx, op_bytes, v, regs) )
639 goto fail;
640 switch ( op_bytes )
641 {
642 case 1:
643 data = (u8)inb_user((u16)regs->edx, v, regs);
644 if ( put_user((u8)data, (u8 *)regs->edi) )
645 PAGE_FAULT(regs->edi, USER_WRITE_FAULT);
646 break;
647 case 2:
648 data = (u16)inw_user((u16)regs->edx, v, regs);
649 if ( put_user((u16)data, (u16 *)regs->edi) )
650 PAGE_FAULT(regs->edi, USER_WRITE_FAULT);
651 break;
652 case 4:
653 data = (u32)inl_user((u16)regs->edx, v, regs);
654 if ( put_user((u32)data, (u32 *)regs->edi) )
655 PAGE_FAULT(regs->edi, USER_WRITE_FAULT);
656 break;
657 }
658 regs->edi += (regs->eflags & EF_DF) ? -op_bytes : op_bytes;
659 break;
661 case 0x6e: /* OUTSB */
662 op_bytes = 1;
663 case 0x6f: /* OUTSW/OUTSL */
664 if ( !guest_io_okay((u16)regs->edx, op_bytes, v, regs) )
665 goto fail;
666 switch ( op_bytes )
667 {
668 case 1:
669 if ( get_user(data, (u8 *)regs->esi) )
670 PAGE_FAULT(regs->esi, USER_READ_FAULT);
671 outb_user((u8)data, (u16)regs->edx, v, regs);
672 break;
673 case 2:
674 if ( get_user(data, (u16 *)regs->esi) )
675 PAGE_FAULT(regs->esi, USER_READ_FAULT);
676 outw_user((u16)data, (u16)regs->edx, v, regs);
677 break;
678 case 4:
679 if ( get_user(data, (u32 *)regs->esi) )
680 PAGE_FAULT(regs->esi, USER_READ_FAULT);
681 outl_user((u32)data, (u16)regs->edx, v, regs);
682 break;
683 }
684 regs->esi += (regs->eflags & EF_DF) ? -op_bytes : op_bytes;
685 break;
686 }
688 if ( rep_prefix && (--regs->ecx != 0) )
689 {
690 if ( !hypercall_preempt_check() )
691 goto continue_io_string;
692 eip = regs->eip;
693 }
695 goto done;
696 }
698 /* I/O Port and Interrupt Flag instructions. */
699 switch ( opcode )
700 {
701 case 0xe4: /* IN imm8,%al */
702 op_bytes = 1;
703 case 0xe5: /* IN imm8,%eax */
704 port = insn_fetch(u8, 1, eip);
705 exec_in:
706 if ( !guest_io_okay(port, op_bytes, v, regs) )
707 goto fail;
708 switch ( op_bytes )
709 {
710 case 1:
711 regs->eax &= ~0xffUL;
712 regs->eax |= (u8)inb_user(port, v, regs);
713 break;
714 case 2:
715 regs->eax &= ~0xffffUL;
716 regs->eax |= (u16)inw_user(port, v, regs);
717 break;
718 case 4:
719 regs->eax = (u32)inl_user(port, v, regs);
720 break;
721 }
722 goto done;
724 case 0xec: /* IN %dx,%al */
725 op_bytes = 1;
726 case 0xed: /* IN %dx,%eax */
727 port = (u16)regs->edx;
728 goto exec_in;
730 case 0xe6: /* OUT %al,imm8 */
731 op_bytes = 1;
732 case 0xe7: /* OUT %eax,imm8 */
733 port = insn_fetch(u8, 1, eip);
734 exec_out:
735 if ( !guest_io_okay(port, op_bytes, v, regs) )
736 goto fail;
737 switch ( op_bytes )
738 {
739 case 1:
740 outb_user((u8)regs->eax, port, v, regs);
741 break;
742 case 2:
743 outw_user((u16)regs->eax, port, v, regs);
744 break;
745 case 4:
746 outl_user((u32)regs->eax, port, v, regs);
747 break;
748 }
749 goto done;
751 case 0xee: /* OUT %al,%dx */
752 op_bytes = 1;
753 case 0xef: /* OUT %eax,%dx */
754 port = (u16)regs->edx;
755 goto exec_out;
757 case 0xfa: /* CLI */
758 case 0xfb: /* STI */
759 if ( v->arch.iopl < (KERNEL_MODE(v, regs) ? 1 : 3) )
760 goto fail;
761 /*
762 * This is just too dangerous to allow, in my opinion. Consider if the
763 * caller then tries to reenable interrupts using POPF: we can't trap
764 * that and we'll end up with hard-to-debug lockups. Fast & loose will
765 * do for us. :-)
766 */
767 /*v->vcpu_info->evtchn_upcall_mask = (opcode == 0xfa);*/
768 goto done;
770 case 0x0f: /* Two-byte opcode */
771 break;
773 default:
774 goto fail;
775 }
777 /* Remaining instructions only emulated from guest kernel. */
778 if ( !KERNEL_MODE(v, regs) )
779 goto fail;
781 /* Privileged (ring 0) instructions. */
782 opcode = insn_fetch(u8, 1, eip);
783 switch ( opcode )
784 {
785 case 0x06: /* CLTS */
786 (void)do_fpu_taskswitch(0);
787 break;
789 case 0x09: /* WBINVD */
790 /* Ignore the instruction if unprivileged. */
791 if ( !IS_CAPABLE_PHYSDEV(v->domain) )
792 DPRINTK("Non-physdev domain attempted WBINVD.\n");
793 else
794 wbinvd();
795 break;
797 case 0x20: /* MOV CR?,<reg> */
798 opcode = insn_fetch(u8, 1, eip);
799 modrm_reg |= (opcode >> 3) & 7;
800 modrm_rm |= (opcode >> 0) & 7;
801 reg = decode_register(modrm_rm, regs, 0);
802 switch ( modrm_reg )
803 {
804 case 0: /* Read CR0 */
805 *reg = v->arch.guest_context.ctrlreg[0];
806 break;
808 case 2: /* Read CR2 */
809 *reg = v->arch.guest_context.ctrlreg[2];
810 break;
812 case 3: /* Read CR3 */
813 *reg = pagetable_get_paddr(v->arch.guest_table);
814 break;
816 default:
817 goto fail;
818 }
819 break;
821 case 0x21: /* MOV DR?,<reg> */
822 opcode = insn_fetch(u8, 1, eip);
823 modrm_reg |= (opcode >> 3) & 7;
824 modrm_rm |= (opcode >> 0) & 7;
825 reg = decode_register(modrm_rm, regs, 0);
826 if ( (res = do_get_debugreg(modrm_reg)) > (unsigned long)-256 )
827 goto fail;
828 *reg = res;
829 break;
831 case 0x22: /* MOV <reg>,CR? */
832 opcode = insn_fetch(u8, 1, eip);
833 modrm_reg |= (opcode >> 3) & 7;
834 modrm_rm |= (opcode >> 0) & 7;
835 reg = decode_register(modrm_rm, regs, 0);
836 switch ( modrm_reg )
837 {
838 case 0: /* Write CR0 */
839 (void)do_fpu_taskswitch(!!(*reg & X86_CR0_TS));
840 break;
842 case 2: /* Write CR2 */
843 v->arch.guest_context.ctrlreg[2] = *reg;
844 break;
846 case 3: /* Write CR3 */
847 LOCK_BIGLOCK(v->domain);
848 (void)new_guest_cr3(*reg);
849 UNLOCK_BIGLOCK(v->domain);
850 break;
852 default:
853 goto fail;
854 }
855 break;
857 case 0x23: /* MOV <reg>,DR? */
858 opcode = insn_fetch(u8, 1, eip);
859 modrm_reg |= (opcode >> 3) & 7;
860 modrm_rm |= (opcode >> 0) & 7;
861 reg = decode_register(modrm_rm, regs, 0);
862 if ( do_set_debugreg(modrm_reg, *reg) != 0 )
863 goto fail;
864 break;
866 case 0x30: /* WRMSR */
867 /* Ignore the instruction if unprivileged. */
868 if ( !IS_PRIV(v->domain) )
869 DPRINTK("Non-priv domain attempted WRMSR(%p,%08lx,%08lx).\n",
870 _p(regs->ecx), (long)regs->eax, (long)regs->edx);
871 else if ( wrmsr_user(regs->ecx, regs->eax, regs->edx) )
872 goto fail;
873 break;
875 case 0x32: /* RDMSR */
876 if ( !IS_PRIV(v->domain) )
877 DPRINTK("Non-priv domain attempted RDMSR(%p,%08lx,%08lx).\n",
878 _p(regs->ecx), (long)regs->eax, (long)regs->edx);
879 /* Everyone can read the MSR space. */
880 if ( rdmsr_user(regs->ecx, regs->eax, regs->edx) )
881 goto fail;
882 break;
884 default:
885 goto fail;
886 }
888 done:
889 regs->eip = eip;
890 return EXCRET_fault_fixed;
892 fail:
893 return 0;
894 }
896 asmlinkage int do_general_protection(struct cpu_user_regs *regs)
897 {
898 struct vcpu *v = current;
899 struct trap_bounce *tb = &v->arch.trap_bounce;
900 trap_info_t *ti;
901 unsigned long fixup;
903 DEBUGGER_trap_entry(TRAP_gp_fault, regs);
905 if ( regs->error_code & 1 )
906 goto hardware_gp;
908 if ( !GUEST_MODE(regs) )
909 goto gp_in_kernel;
911 /*
912 * Cunning trick to allow arbitrary "INT n" handling.
913 *
914 * We set DPL == 0 on all vectors in the IDT. This prevents any INT <n>
915 * instruction from trapping to the appropriate vector, when that might not
916 * be expected by Xen or the guest OS. For example, that entry might be for
917 * a fault handler (unlike traps, faults don't increment EIP), or might
918 * expect an error code on the stack (which a software trap never
919 * provides), or might be a hardware interrupt handler that doesn't like
920 * being called spuriously.
921 *
922 * Instead, a GPF occurs with the faulting IDT vector in the error code.
923 * Bit 1 is set to indicate that an IDT entry caused the fault. Bit 0 is
924 * clear to indicate that it's a software fault, not hardware.
925 *
926 * NOTE: Vectors 3 and 4 are dealt with from their own handler. This is
927 * okay because they can only be triggered by an explicit DPL-checked
928 * instruction. The DPL specified by the guest OS for these vectors is NOT
929 * CHECKED!!
930 */
931 if ( (regs->error_code & 3) == 2 )
932 {
933 /* This fault must be due to <INT n> instruction. */
934 ti = &current->arch.guest_context.trap_ctxt[regs->error_code>>3];
935 if ( PERMIT_SOFTINT(TI_GET_DPL(ti), v, regs) )
936 {
937 tb->flags = TBF_EXCEPTION;
938 regs->eip += 2;
939 goto finish_propagation;
940 }
941 }
943 /* Emulate some simple privileged and I/O instructions. */
944 if ( (regs->error_code == 0) &&
945 emulate_privileged_op(regs) )
946 return 0;
948 #if defined(__i386__)
949 if ( VM_ASSIST(v->domain, VMASST_TYPE_4gb_segments) &&
950 (regs->error_code == 0) &&
951 gpf_emulate_4gb(regs) )
952 return 0;
953 #endif
955 /* Pass on GPF as is. */
956 ti = &current->arch.guest_context.trap_ctxt[TRAP_gp_fault];
957 tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE;
958 tb->error_code = regs->error_code;
959 finish_propagation:
960 tb->cs = ti->cs;
961 tb->eip = ti->address;
962 if ( TI_GET_IF(ti) )
963 tb->flags |= TBF_INTERRUPT;
964 return 0;
966 gp_in_kernel:
968 if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
969 {
970 DPRINTK("GPF (%04x): %p -> %p\n",
971 regs->error_code, _p(regs->eip), _p(fixup));
972 regs->eip = fixup;
973 return 0;
974 }
976 DEBUGGER_trap_fatal(TRAP_gp_fault, regs);
978 hardware_gp:
979 show_registers(regs);
980 panic("CPU%d GENERAL PROTECTION FAULT\n[error_code=%04x]\n",
981 smp_processor_id(), regs->error_code);
982 return 0;
983 }
985 unsigned long nmi_softirq_reason;
986 static void nmi_softirq(void)
987 {
988 if ( dom0 == NULL )
989 return;
991 if ( test_and_clear_bit(0, &nmi_softirq_reason) )
992 send_guest_virq(dom0->vcpu[0], VIRQ_PARITY_ERR);
994 if ( test_and_clear_bit(1, &nmi_softirq_reason) )
995 send_guest_virq(dom0->vcpu[0], VIRQ_IO_ERR);
996 }
998 asmlinkage void mem_parity_error(struct cpu_user_regs *regs)
999 {
1000 /* Clear and disable the parity-error line. */
1001 outb((inb(0x61)&15)|4,0x61);
1003 switch ( opt_nmi[0] )
1005 case 'd': /* 'dom0' */
1006 set_bit(0, &nmi_softirq_reason);
1007 raise_softirq(NMI_SOFTIRQ);
1008 case 'i': /* 'ignore' */
1009 break;
1010 default: /* 'fatal' */
1011 console_force_unlock();
1012 printk("\n\nNMI - MEMORY ERROR\n");
1013 fatal_trap(TRAP_nmi, regs);
1017 asmlinkage void io_check_error(struct cpu_user_regs *regs)
1019 /* Clear and disable the I/O-error line. */
1020 outb((inb(0x61)&15)|8,0x61);
1022 switch ( opt_nmi[0] )
1024 case 'd': /* 'dom0' */
1025 set_bit(0, &nmi_softirq_reason);
1026 raise_softirq(NMI_SOFTIRQ);
1027 case 'i': /* 'ignore' */
1028 break;
1029 default: /* 'fatal' */
1030 console_force_unlock();
1031 printk("\n\nNMI - I/O ERROR\n");
1032 fatal_trap(TRAP_nmi, regs);
1036 static void unknown_nmi_error(unsigned char reason)
1038 printk("Uhhuh. NMI received for unknown reason %02x.\n", reason);
1039 printk("Dazed and confused, but trying to continue\n");
1040 printk("Do you have a strange power saving mode enabled?\n");
1043 asmlinkage void do_nmi(struct cpu_user_regs *regs, unsigned long reason)
1045 ++nmi_count(smp_processor_id());
1047 if ( nmi_watchdog )
1048 nmi_watchdog_tick(regs);
1050 if ( reason & 0x80 )
1051 mem_parity_error(regs);
1052 else if ( reason & 0x40 )
1053 io_check_error(regs);
1054 else if ( !nmi_watchdog )
1055 unknown_nmi_error((unsigned char)(reason&0xff));
1058 asmlinkage int math_state_restore(struct cpu_user_regs *regs)
1060 /* Prevent recursion. */
1061 clts();
1063 setup_fpu(current);
1065 if ( current->arch.guest_context.ctrlreg[0] & X86_CR0_TS )
1067 struct trap_bounce *tb = &current->arch.trap_bounce;
1068 tb->flags = TBF_EXCEPTION;
1069 tb->cs = current->arch.guest_context.trap_ctxt[7].cs;
1070 tb->eip = current->arch.guest_context.trap_ctxt[7].address;
1071 current->arch.guest_context.ctrlreg[0] &= ~X86_CR0_TS;
1074 return EXCRET_fault_fixed;
1077 asmlinkage int do_debug(struct cpu_user_regs *regs)
1079 unsigned long condition;
1080 struct vcpu *v = current;
1081 struct trap_bounce *tb = &v->arch.trap_bounce;
1083 __asm__ __volatile__("mov %%db6,%0" : "=r" (condition));
1085 /* Mask out spurious debug traps due to lazy DR7 setting */
1086 if ( (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) &&
1087 (v->arch.guest_context.debugreg[7] == 0) )
1089 __asm__("mov %0,%%db7" : : "r" (0UL));
1090 goto out;
1093 DEBUGGER_trap_entry(TRAP_debug, regs);
1095 if ( !GUEST_MODE(regs) )
1097 /* Clear TF just for absolute sanity. */
1098 regs->eflags &= ~EF_TF;
1099 /*
1100 * We ignore watchpoints when they trigger within Xen. This may happen
1101 * when a buffer is passed to us which previously had a watchpoint set
1102 * on it. No need to bump EIP; the only faulting trap is an instruction
1103 * breakpoint, which can't happen to us.
1104 */
1105 goto out;
1108 /* Save debug status register where guest OS can peek at it */
1109 v->arch.guest_context.debugreg[6] = condition;
1111 tb->flags = TBF_EXCEPTION;
1112 tb->cs = v->arch.guest_context.trap_ctxt[TRAP_debug].cs;
1113 tb->eip = v->arch.guest_context.trap_ctxt[TRAP_debug].address;
1115 out:
1116 return EXCRET_not_a_fault;
1119 asmlinkage int do_spurious_interrupt_bug(struct cpu_user_regs *regs)
1121 return EXCRET_not_a_fault;
1124 void set_intr_gate(unsigned int n, void *addr)
1126 #ifdef __i386__
1127 int i;
1128 /* Keep secondary tables in sync with IRQ updates. */
1129 for ( i = 1; i < NR_CPUS; i++ )
1130 if ( idt_tables[i] != NULL )
1131 _set_gate(&idt_tables[i][n], 14, 0, addr);
1132 #endif
1133 _set_gate(&idt_table[n], 14, 0, addr);
1136 void set_system_gate(unsigned int n, void *addr)
1138 _set_gate(idt_table+n,14,3,addr);
1141 void set_task_gate(unsigned int n, unsigned int sel)
1143 idt_table[n].a = sel << 16;
1144 idt_table[n].b = 0x8500;
1147 void set_tss_desc(unsigned int n, void *addr)
1149 _set_tssldt_desc(
1150 gdt_table + __TSS(n) - FIRST_RESERVED_GDT_ENTRY,
1151 (unsigned long)addr,
1152 offsetof(struct tss_struct, __cacheline_filler) - 1,
1153 9);
1156 void __init trap_init(void)
1158 extern void percpu_traps_init(void);
1160 /*
1161 * Note that interrupt gates are always used, rather than trap gates. We
1162 * must have interrupts disabled until DS/ES/FS/GS are saved because the
1163 * first activation must have the "bad" value(s) for these registers and
1164 * we may lose them if another activation is installed before they are
1165 * saved. The page-fault handler also needs interrupts disabled until %cr2
1166 * has been read and saved on the stack.
1167 */
1168 set_intr_gate(TRAP_divide_error,&divide_error);
1169 set_intr_gate(TRAP_debug,&debug);
1170 set_intr_gate(TRAP_nmi,&nmi);
1171 set_system_gate(TRAP_int3,&int3); /* usable from all privileges */
1172 set_system_gate(TRAP_overflow,&overflow); /* usable from all privileges */
1173 set_intr_gate(TRAP_bounds,&bounds);
1174 set_intr_gate(TRAP_invalid_op,&invalid_op);
1175 set_intr_gate(TRAP_no_device,&device_not_available);
1176 set_intr_gate(TRAP_copro_seg,&coprocessor_segment_overrun);
1177 set_intr_gate(TRAP_invalid_tss,&invalid_TSS);
1178 set_intr_gate(TRAP_no_segment,&segment_not_present);
1179 set_intr_gate(TRAP_stack_error,&stack_segment);
1180 set_intr_gate(TRAP_gp_fault,&general_protection);
1181 set_intr_gate(TRAP_page_fault,&page_fault);
1182 set_intr_gate(TRAP_spurious_int,&spurious_interrupt_bug);
1183 set_intr_gate(TRAP_copro_error,&coprocessor_error);
1184 set_intr_gate(TRAP_alignment_check,&alignment_check);
1185 set_intr_gate(TRAP_machine_check,&machine_check);
1186 set_intr_gate(TRAP_simd_error,&simd_coprocessor_error);
1188 percpu_traps_init();
1190 cpu_init();
1192 open_softirq(NMI_SOFTIRQ, nmi_softirq);
1196 long do_set_trap_table(trap_info_t *traps)
1198 trap_info_t cur;
1199 trap_info_t *dst = current->arch.guest_context.trap_ctxt;
1200 long rc = 0;
1202 LOCK_BIGLOCK(current->domain);
1204 for ( ; ; )
1206 if ( hypercall_preempt_check() )
1208 rc = hypercall1_create_continuation(
1209 __HYPERVISOR_set_trap_table, traps);
1210 break;
1213 if ( copy_from_user(&cur, traps, sizeof(cur)) )
1215 rc = -EFAULT;
1216 break;
1219 if ( cur.address == 0 )
1220 break;
1222 if ( !VALID_CODESEL(cur.cs) )
1224 rc = -EPERM;
1225 break;
1228 memcpy(&dst[cur.vector], &cur, sizeof(cur));
1230 if ( cur.vector == 0x80 )
1231 init_int80_direct_trap(current);
1233 traps++;
1236 UNLOCK_BIGLOCK(current->domain);
1238 return rc;
1242 long set_debugreg(struct vcpu *p, int reg, unsigned long value)
1244 int i;
1246 switch ( reg )
1248 case 0:
1249 if ( !access_ok(value, sizeof(long)) )
1250 return -EPERM;
1251 if ( p == current )
1252 __asm__ ( "mov %0, %%db0" : : "r" (value) );
1253 break;
1254 case 1:
1255 if ( !access_ok(value, sizeof(long)) )
1256 return -EPERM;
1257 if ( p == current )
1258 __asm__ ( "mov %0, %%db1" : : "r" (value) );
1259 break;
1260 case 2:
1261 if ( !access_ok(value, sizeof(long)) )
1262 return -EPERM;
1263 if ( p == current )
1264 __asm__ ( "mov %0, %%db2" : : "r" (value) );
1265 break;
1266 case 3:
1267 if ( !access_ok(value, sizeof(long)) )
1268 return -EPERM;
1269 if ( p == current )
1270 __asm__ ( "mov %0, %%db3" : : "r" (value) );
1271 break;
1272 case 6:
1273 /*
1274 * DR6: Bits 4-11,16-31 reserved (set to 1).
1275 * Bit 12 reserved (set to 0).
1276 */
1277 value &= 0xffffefff; /* reserved bits => 0 */
1278 value |= 0xffff0ff0; /* reserved bits => 1 */
1279 if ( p == current )
1280 __asm__ ( "mov %0, %%db6" : : "r" (value) );
1281 break;
1282 case 7:
1283 /*
1284 * DR7: Bit 10 reserved (set to 1).
1285 * Bits 11-12,14-15 reserved (set to 0).
1286 * Privileged bits:
1287 * GD (bit 13): must be 0.
1288 * R/Wn (bits 16-17,20-21,24-25,28-29): mustn't be 10.
1289 * LENn (bits 18-19,22-23,26-27,30-31): mustn't be 10.
1290 */
1291 /* DR7 == 0 => debugging disabled for this domain. */
1292 if ( value != 0 )
1294 value &= 0xffff27ff; /* reserved bits => 0 */
1295 value |= 0x00000400; /* reserved bits => 1 */
1296 if ( (value & (1<<13)) != 0 ) return -EPERM;
1297 for ( i = 0; i < 16; i += 2 )
1298 if ( ((value >> (i+16)) & 3) == 2 ) return -EPERM;
1300 if ( p == current )
1301 __asm__ ( "mov %0, %%db7" : : "r" (value) );
1302 break;
1303 default:
1304 return -EINVAL;
1307 p->arch.guest_context.debugreg[reg] = value;
1308 return 0;
1311 long do_set_debugreg(int reg, unsigned long value)
1313 return set_debugreg(current, reg, value);
1316 unsigned long do_get_debugreg(int reg)
1318 if ( (reg < 0) || (reg > 7) ) return -EINVAL;
1319 return current->arch.guest_context.debugreg[reg];
1322 /*
1323 * Local variables:
1324 * mode: C
1325 * c-set-style: "BSD"
1326 * c-basic-offset: 4
1327 * tab-width: 4
1328 * indent-tabs-mode: nil
1329 * End:
1330 */