ia64/xen-unstable

view xen/arch/x86/traps.c @ 9584:7086a4e96ce0

With the standard (XEN) prefix and the setting of 8 words per line,
stack dumps on i386 came out at 81 characters per line. The change to
xen/arch/x86/traps.c makes this 80 (without changing the look on the
screen), and the change to xen/drivers/char/console.c arranges for
soft line breaks not getting duplicated by hard line breaks, so
displaying 80 characters per line doesn't result in a subsequent blank
line.

From: Jan Beulich

Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Tue Apr 04 15:06:38 2006 +0100 (2006-04-04)
parents 38c170be0e63
children be0a1f376223
line source
1 /******************************************************************************
2 * arch/x86/traps.c
3 *
4 * Modifications to Linux original are copyright (c) 2002-2004, K A Fraser
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
21 /*
22 * Copyright (C) 1991, 1992 Linus Torvalds
23 *
24 * Pentium III FXSR, SSE support
25 * Gareth Hughes <gareth@valinux.com>, May 2000
26 */
28 #include <xen/config.h>
29 #include <xen/init.h>
30 #include <xen/sched.h>
31 #include <xen/lib.h>
32 #include <xen/errno.h>
33 #include <xen/mm.h>
34 #include <xen/console.h>
35 #include <asm/regs.h>
36 #include <xen/delay.h>
37 #include <xen/event.h>
38 #include <xen/spinlock.h>
39 #include <xen/irq.h>
40 #include <xen/perfc.h>
41 #include <xen/softirq.h>
42 #include <xen/domain_page.h>
43 #include <xen/symbols.h>
44 #include <xen/iocap.h>
45 #include <asm/shadow.h>
46 #include <asm/system.h>
47 #include <asm/io.h>
48 #include <asm/atomic.h>
49 #include <asm/desc.h>
50 #include <asm/debugreg.h>
51 #include <asm/smp.h>
52 #include <asm/flushtlb.h>
53 #include <asm/uaccess.h>
54 #include <asm/i387.h>
55 #include <asm/debugger.h>
56 #include <asm/msr.h>
57 #include <asm/x86_emulate.h>
58 #include <asm/nmi.h>
60 /*
61 * opt_nmi: one of 'ignore', 'dom0', or 'fatal'.
62 * fatal: Xen prints diagnostic message and then hangs.
63 * dom0: The NMI is virtualised to DOM0.
64 * ignore: The NMI error is cleared and ignored.
65 */
66 #ifdef NDEBUG
67 char opt_nmi[10] = "dom0";
68 #else
69 char opt_nmi[10] = "fatal";
70 #endif
71 string_param("nmi", opt_nmi);
73 /* Master table, used by all CPUs on x86/64, and by CPU0 on x86/32.*/
74 idt_entry_t idt_table[IDT_ENTRIES];
76 #define DECLARE_TRAP_HANDLER(_name) \
77 asmlinkage void _name(void); \
78 asmlinkage int do_ ## _name(struct cpu_user_regs *regs)
80 asmlinkage void nmi(void);
81 DECLARE_TRAP_HANDLER(divide_error);
82 DECLARE_TRAP_HANDLER(debug);
83 DECLARE_TRAP_HANDLER(int3);
84 DECLARE_TRAP_HANDLER(overflow);
85 DECLARE_TRAP_HANDLER(bounds);
86 DECLARE_TRAP_HANDLER(invalid_op);
87 DECLARE_TRAP_HANDLER(device_not_available);
88 DECLARE_TRAP_HANDLER(coprocessor_segment_overrun);
89 DECLARE_TRAP_HANDLER(invalid_TSS);
90 DECLARE_TRAP_HANDLER(segment_not_present);
91 DECLARE_TRAP_HANDLER(stack_segment);
92 DECLARE_TRAP_HANDLER(general_protection);
93 DECLARE_TRAP_HANDLER(page_fault);
94 DECLARE_TRAP_HANDLER(coprocessor_error);
95 DECLARE_TRAP_HANDLER(simd_coprocessor_error);
96 DECLARE_TRAP_HANDLER(alignment_check);
97 DECLARE_TRAP_HANDLER(spurious_interrupt_bug);
98 DECLARE_TRAP_HANDLER(machine_check);
100 long do_set_debugreg(int reg, unsigned long value);
101 unsigned long do_get_debugreg(int reg);
103 static int debug_stack_lines = 20;
104 integer_param("debug_stack_lines", debug_stack_lines);
106 #ifdef CONFIG_X86_32
107 #define stack_words_per_line 8
108 #define ESP_BEFORE_EXCEPTION(regs) ((unsigned long *)&regs->esp)
109 #else
110 #define stack_words_per_line 4
111 #define ESP_BEFORE_EXCEPTION(regs) ((unsigned long *)regs->rsp)
112 #endif
114 int is_kernel_text(unsigned long addr)
115 {
116 extern char _stext, _etext;
117 if (addr >= (unsigned long) &_stext &&
118 addr <= (unsigned long) &_etext)
119 return 1;
120 return 0;
122 }
124 unsigned long kernel_text_end(void)
125 {
126 extern char _etext;
127 return (unsigned long) &_etext;
128 }
130 static void show_guest_stack(struct cpu_user_regs *regs)
131 {
132 int i;
133 unsigned long *stack, addr;
135 if ( hvm_guest(current) )
136 return;
138 if ( vm86_mode(regs) )
139 {
140 stack = (unsigned long *)((regs->ss << 4) + (regs->esp & 0xffff));
141 printk("Guest stack trace from ss:sp = %04x:%04x (VM86)\n ",
142 regs->ss, (uint16_t)(regs->esp & 0xffff));
143 }
144 else
145 {
146 stack = (unsigned long *)regs->esp;
147 printk("Guest stack trace from "__OP"sp=%p:\n ", stack);
148 }
150 for ( i = 0; i < (debug_stack_lines*stack_words_per_line); i++ )
151 {
152 if ( ((long)stack & (STACK_SIZE-BYTES_PER_LONG)) == 0 )
153 break;
154 if ( get_user(addr, stack) )
155 {
156 if ( i != 0 )
157 printk("\n ");
158 printk("Fault while accessing guest memory.");
159 i = 1;
160 break;
161 }
162 if ( (i != 0) && ((i % stack_words_per_line) == 0) )
163 printk("\n ");
164 printk(" %p", _p(addr));
165 stack++;
166 }
167 if ( i == 0 )
168 printk("Stack empty.");
169 printk("\n");
170 }
172 #ifdef NDEBUG
174 static void show_trace(struct cpu_user_regs *regs)
175 {
176 unsigned long *stack = ESP_BEFORE_EXCEPTION(regs), addr;
178 printk("Xen call trace:\n ");
180 printk("[<%p>]", _p(regs->eip));
181 print_symbol(" %s\n ", regs->eip);
183 while ( ((long)stack & (STACK_SIZE-BYTES_PER_LONG)) != 0 )
184 {
185 addr = *stack++;
186 if ( is_kernel_text(addr) )
187 {
188 printk("[<%p>]", _p(addr));
189 print_symbol(" %s\n ", addr);
190 }
191 }
193 printk("\n");
194 }
196 #else
198 static void show_trace(struct cpu_user_regs *regs)
199 {
200 unsigned long *frame, next, addr, low, high;
202 printk("Xen call trace:\n ");
204 printk("[<%p>]", _p(regs->eip));
205 print_symbol(" %s\n ", regs->eip);
207 /* Bounds for range of valid frame pointer. */
208 low = (unsigned long)(ESP_BEFORE_EXCEPTION(regs) - 2);
209 high = (low & ~(STACK_SIZE - 1)) +
210 (STACK_SIZE - sizeof(struct cpu_info) - 2*sizeof(unsigned long));
212 /* The initial frame pointer. */
213 next = regs->ebp;
215 for ( ; ; )
216 {
217 /* Valid frame pointer? */
218 if ( (next < low) || (next >= high) )
219 {
220 /*
221 * Exception stack frames have a different layout, denoted by an
222 * inverted frame pointer.
223 */
224 next = ~next;
225 if ( (next < low) || (next >= high) )
226 break;
227 frame = (unsigned long *)next;
228 next = frame[0];
229 addr = frame[(offsetof(struct cpu_user_regs, eip) -
230 offsetof(struct cpu_user_regs, ebp))
231 / BYTES_PER_LONG];
232 }
233 else
234 {
235 /* Ordinary stack frame. */
236 frame = (unsigned long *)next;
237 next = frame[0];
238 addr = frame[1];
239 }
241 printk("[<%p>]", _p(addr));
242 print_symbol(" %s\n ", addr);
244 low = (unsigned long)&frame[2];
245 }
247 printk("\n");
248 }
250 #endif
252 void show_stack(struct cpu_user_regs *regs)
253 {
254 unsigned long *stack = ESP_BEFORE_EXCEPTION(regs), addr;
255 int i;
257 if ( guest_mode(regs) )
258 return show_guest_stack(regs);
260 printk("Xen stack trace from "__OP"sp=%p:\n ", stack);
262 for ( i = 0; i < (debug_stack_lines*stack_words_per_line); i++ )
263 {
264 if ( ((long)stack & (STACK_SIZE-BYTES_PER_LONG)) == 0 )
265 break;
266 if ( (i != 0) && ((i % stack_words_per_line) == 0) )
267 printk("\n ");
268 addr = *stack++;
269 printk(" %p", _p(addr));
270 }
271 if ( i == 0 )
272 printk("Stack empty.");
273 printk("\n");
275 show_trace(regs);
276 }
278 /*
279 * This is called for faults at very unexpected times (e.g., when interrupts
280 * are disabled). In such situations we can't do much that is safe. We try to
281 * print out some tracing and then we just spin.
282 */
283 asmlinkage void fatal_trap(int trapnr, struct cpu_user_regs *regs)
284 {
285 int cpu = smp_processor_id();
286 unsigned long cr2;
287 static char *trapstr[] = {
288 "divide error", "debug", "nmi", "bkpt", "overflow", "bounds",
289 "invalid opcode", "device not available", "double fault",
290 "coprocessor segment", "invalid tss", "segment not found",
291 "stack error", "general protection fault", "page fault",
292 "spurious interrupt", "coprocessor error", "alignment check",
293 "machine check", "simd error"
294 };
296 watchdog_disable();
297 console_start_sync();
299 show_registers(regs);
301 if ( trapnr == TRAP_page_fault )
302 {
303 __asm__ __volatile__ ("mov %%cr2,%0" : "=r" (cr2) : );
304 printk("Faulting linear address: %p\n", _p(cr2));
305 show_page_walk(cr2);
306 }
308 printk("************************************\n");
309 printk("CPU%d FATAL TRAP %d (%s), ERROR_CODE %04x%s.\n",
310 cpu, trapnr, trapstr[trapnr], regs->error_code,
311 (regs->eflags & X86_EFLAGS_IF) ? "" : ", IN INTERRUPT CONTEXT");
312 printk("System shutting down -- need manual reset.\n");
313 printk("************************************\n");
315 (void)debugger_trap_fatal(trapnr, regs);
317 /* Lock up the console to prevent spurious output from other CPUs. */
318 console_force_lock();
320 /* Wait for manual reset. */
321 for ( ; ; )
322 __asm__ __volatile__ ( "hlt" );
323 }
325 static inline int do_trap(int trapnr, char *str,
326 struct cpu_user_regs *regs,
327 int use_error_code)
328 {
329 struct vcpu *v = current;
330 struct trap_bounce *tb = &v->arch.trap_bounce;
331 struct trap_info *ti;
332 unsigned long fixup;
334 DEBUGGER_trap_entry(trapnr, regs);
336 if ( !guest_mode(regs) )
337 goto xen_fault;
339 ti = &current->arch.guest_context.trap_ctxt[trapnr];
340 tb->flags = TBF_EXCEPTION;
341 tb->cs = ti->cs;
342 tb->eip = ti->address;
343 if ( use_error_code )
344 {
345 tb->flags |= TBF_EXCEPTION_ERRCODE;
346 tb->error_code = regs->error_code;
347 }
348 if ( TI_GET_IF(ti) )
349 tb->flags |= TBF_INTERRUPT;
350 return 0;
352 xen_fault:
354 if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
355 {
356 DPRINTK("Trap %d: %p -> %p\n", trapnr, _p(regs->eip), _p(fixup));
357 regs->eip = fixup;
358 return 0;
359 }
361 DEBUGGER_trap_fatal(trapnr, regs);
363 show_registers(regs);
364 panic("CPU%d FATAL TRAP: vector = %d (%s)\n"
365 "[error_code=%04x]\n",
366 smp_processor_id(), trapnr, str, regs->error_code);
367 return 0;
368 }
370 #define DO_ERROR_NOCODE(trapnr, str, name) \
371 asmlinkage int do_##name(struct cpu_user_regs *regs) \
372 { \
373 return do_trap(trapnr, str, regs, 0); \
374 }
376 #define DO_ERROR(trapnr, str, name) \
377 asmlinkage int do_##name(struct cpu_user_regs *regs) \
378 { \
379 return do_trap(trapnr, str, regs, 1); \
380 }
382 DO_ERROR_NOCODE( 0, "divide error", divide_error)
383 DO_ERROR_NOCODE( 4, "overflow", overflow)
384 DO_ERROR_NOCODE( 5, "bounds", bounds)
385 DO_ERROR_NOCODE( 9, "coprocessor segment overrun", coprocessor_segment_overrun)
386 DO_ERROR(10, "invalid TSS", invalid_TSS)
387 DO_ERROR(11, "segment not present", segment_not_present)
388 DO_ERROR(12, "stack segment", stack_segment)
389 DO_ERROR_NOCODE(16, "fpu error", coprocessor_error)
390 DO_ERROR(17, "alignment check", alignment_check)
391 DO_ERROR_NOCODE(19, "simd error", simd_coprocessor_error)
393 static int emulate_forced_invalid_op(struct cpu_user_regs *regs)
394 {
395 char signature[5], instr[2];
396 unsigned long a, b, c, d, eip;
398 a = regs->eax;
399 b = regs->ebx;
400 c = regs->ecx;
401 d = regs->edx;
402 eip = regs->eip;
404 /* Check for forced emulation signature: ud2 ; .ascii "xen". */
405 if ( copy_from_user(signature, (char *)eip, sizeof(signature)) ||
406 memcmp(signature, "\xf\xbxen", sizeof(signature)) )
407 return 0;
408 eip += sizeof(signature);
410 /* We only emulate CPUID. */
411 if ( copy_from_user(instr, (char *)eip, sizeof(instr)) ||
412 memcmp(instr, "\xf\xa2", sizeof(instr)) )
413 return 0;
414 eip += sizeof(instr);
416 __asm__ (
417 "cpuid"
418 : "=a" (a), "=b" (b), "=c" (c), "=d" (d)
419 : "0" (a), "1" (b), "2" (c), "3" (d) );
421 if ( regs->eax == 1 )
422 {
423 /* Modify Feature Information. */
424 clear_bit(X86_FEATURE_VME, &d);
425 clear_bit(X86_FEATURE_DE, &d);
426 clear_bit(X86_FEATURE_PSE, &d);
427 clear_bit(X86_FEATURE_PGE, &d);
428 clear_bit(X86_FEATURE_SEP, &d);
429 if ( !IS_PRIV(current->domain) )
430 clear_bit(X86_FEATURE_MTRR, &d);
431 }
433 regs->eax = a;
434 regs->ebx = b;
435 regs->ecx = c;
436 regs->edx = d;
437 regs->eip = eip;
439 return EXCRET_fault_fixed;
440 }
442 asmlinkage int do_invalid_op(struct cpu_user_regs *regs)
443 {
444 struct vcpu *v = current;
445 struct trap_bounce *tb = &v->arch.trap_bounce;
446 struct trap_info *ti;
447 int rc;
449 DEBUGGER_trap_entry(TRAP_invalid_op, regs);
451 if ( unlikely(!guest_mode(regs)) )
452 {
453 DEBUGGER_trap_fatal(TRAP_invalid_op, regs);
454 show_registers(regs);
455 panic("CPU%d FATAL TRAP: vector = %d (invalid opcode)\n",
456 smp_processor_id(), TRAP_invalid_op);
457 }
459 if ( (rc = emulate_forced_invalid_op(regs)) != 0 )
460 return rc;
462 ti = &current->arch.guest_context.trap_ctxt[TRAP_invalid_op];
463 tb->flags = TBF_EXCEPTION;
464 tb->cs = ti->cs;
465 tb->eip = ti->address;
466 if ( TI_GET_IF(ti) )
467 tb->flags |= TBF_INTERRUPT;
469 return 0;
470 }
472 asmlinkage int do_int3(struct cpu_user_regs *regs)
473 {
474 struct vcpu *v = current;
475 struct trap_bounce *tb = &v->arch.trap_bounce;
476 struct trap_info *ti;
478 DEBUGGER_trap_entry(TRAP_int3, regs);
480 if ( !guest_mode(regs) )
481 {
482 DEBUGGER_trap_fatal(TRAP_int3, regs);
483 show_registers(regs);
484 panic("CPU%d FATAL TRAP: vector = 3 (Int3)\n", smp_processor_id());
485 }
487 ti = &current->arch.guest_context.trap_ctxt[TRAP_int3];
488 tb->flags = TBF_EXCEPTION;
489 tb->cs = ti->cs;
490 tb->eip = ti->address;
491 if ( TI_GET_IF(ti) )
492 tb->flags |= TBF_INTERRUPT;
494 return 0;
495 }
497 asmlinkage int do_machine_check(struct cpu_user_regs *regs)
498 {
499 fatal_trap(TRAP_machine_check, regs);
500 return 0;
501 }
503 void propagate_page_fault(unsigned long addr, u16 error_code)
504 {
505 struct trap_info *ti;
506 struct vcpu *v = current;
507 struct trap_bounce *tb = &v->arch.trap_bounce;
509 v->arch.guest_context.ctrlreg[2] = addr;
510 v->vcpu_info->arch.cr2 = addr;
512 /* Re-set error_code.user flag appropriately for the guest. */
513 error_code &= ~4;
514 if ( !guest_kernel_mode(v, guest_cpu_user_regs()) )
515 error_code |= 4;
517 ti = &v->arch.guest_context.trap_ctxt[TRAP_page_fault];
518 tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE;
519 tb->error_code = error_code;
520 tb->cs = ti->cs;
521 tb->eip = ti->address;
522 if ( TI_GET_IF(ti) )
523 tb->flags |= TBF_INTERRUPT;
524 }
526 static int handle_gdt_ldt_mapping_fault(
527 unsigned long offset, struct cpu_user_regs *regs)
528 {
529 extern int map_ldt_shadow_page(unsigned int);
531 struct vcpu *v = current;
532 struct domain *d = v->domain;
533 int ret;
535 /* Which vcpu's area did we fault in, and is it in the ldt sub-area? */
536 unsigned int is_ldt_area = (offset >> (GDT_LDT_VCPU_VA_SHIFT-1)) & 1;
537 unsigned int vcpu_area = (offset >> GDT_LDT_VCPU_VA_SHIFT);
539 /* Should never fault in another vcpu's area. */
540 BUG_ON(vcpu_area != current->vcpu_id);
542 /* Byte offset within the gdt/ldt sub-area. */
543 offset &= (1UL << (GDT_LDT_VCPU_VA_SHIFT-1)) - 1UL;
545 if ( likely(is_ldt_area) )
546 {
547 /* LDT fault: Copy a mapping from the guest's LDT, if it is valid. */
548 LOCK_BIGLOCK(d);
549 ret = map_ldt_shadow_page(offset >> PAGE_SHIFT);
550 UNLOCK_BIGLOCK(d);
552 if ( unlikely(ret == 0) )
553 {
554 /* In hypervisor mode? Leave it to the #PF handler to fix up. */
555 if ( !guest_mode(regs) )
556 return 0;
557 /* In guest mode? Propagate #PF to guest, with adjusted %cr2. */
558 propagate_page_fault(
559 v->arch.guest_context.ldt_base + offset, regs->error_code);
560 }
561 }
562 else
563 {
564 /* GDT fault: handle the fault as #GP(selector). */
565 regs->error_code = (u16)offset & ~7;
566 (void)do_general_protection(regs);
567 }
569 return EXCRET_fault_fixed;
570 }
572 #ifdef HYPERVISOR_VIRT_END
573 #define IN_HYPERVISOR_RANGE(va) \
574 (((va) >= HYPERVISOR_VIRT_START) && ((va) < HYPERVISOR_VIRT_END))
575 #else
576 #define IN_HYPERVISOR_RANGE(va) \
577 (((va) >= HYPERVISOR_VIRT_START))
578 #endif
580 static int fixup_page_fault(unsigned long addr, struct cpu_user_regs *regs)
581 {
582 struct vcpu *v = current;
583 struct domain *d = v->domain;
585 if ( unlikely(IN_HYPERVISOR_RANGE(addr)) )
586 {
587 if ( shadow_mode_external(d) && guest_mode(regs) )
588 return shadow_fault(addr, regs);
589 if ( (addr >= GDT_LDT_VIRT_START) && (addr < GDT_LDT_VIRT_END) )
590 return handle_gdt_ldt_mapping_fault(
591 addr - GDT_LDT_VIRT_START, regs);
592 }
593 else if ( unlikely(shadow_mode_enabled(d)) )
594 {
595 return shadow_fault(addr, regs);
596 }
597 else if ( likely(VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) )
598 {
599 LOCK_BIGLOCK(d);
600 if ( unlikely(d->arch.ptwr[PTWR_PT_ACTIVE].l1va) &&
601 unlikely(l2_linear_offset(addr) ==
602 d->arch.ptwr[PTWR_PT_ACTIVE].l2_idx) )
603 {
604 ptwr_flush(d, PTWR_PT_ACTIVE);
605 UNLOCK_BIGLOCK(d);
606 return EXCRET_fault_fixed;
607 }
609 if ( guest_kernel_mode(v, regs) &&
610 /* Protection violation on write? No reserved-bit violation? */
611 ((regs->error_code & 0xb) == 0x3) &&
612 ptwr_do_page_fault(d, addr, regs) )
613 {
614 UNLOCK_BIGLOCK(d);
615 return EXCRET_fault_fixed;
616 }
617 UNLOCK_BIGLOCK(d);
618 }
620 return 0;
621 }
623 static int spurious_page_fault(unsigned long addr, struct cpu_user_regs *regs)
624 {
625 struct vcpu *v = current;
626 struct domain *d = v->domain;
627 int rc;
629 /*
630 * The only possible reason for a spurious page fault not to be picked
631 * up already is that a page directory was unhooked by writable page table
632 * logic and then reattached before the faulting VCPU could detect it.
633 */
634 if ( is_idle_domain(d) || /* no ptwr in idle domain */
635 IN_HYPERVISOR_RANGE(addr) || /* no ptwr on hypervisor addrs */
636 shadow_mode_enabled(d) || /* no ptwr logic in shadow mode */
637 ((regs->error_code & 0x1d) != 0) ) /* simple not-present fault? */
638 return 0;
640 LOCK_BIGLOCK(d);
642 /*
643 * The page directory could have been detached again while we weren't
644 * holding the per-domain lock. Detect that and fix up if it's the case.
645 */
646 if ( unlikely(d->arch.ptwr[PTWR_PT_ACTIVE].l1va) &&
647 unlikely(l2_linear_offset(addr) ==
648 d->arch.ptwr[PTWR_PT_ACTIVE].l2_idx) )
649 {
650 ptwr_flush(d, PTWR_PT_ACTIVE);
651 rc = 1;
652 }
653 else
654 {
655 /* Okay, walk the page tables. Only check for not-present faults.*/
656 rc = __spurious_page_fault(addr);
657 }
659 UNLOCK_BIGLOCK(d);
660 return rc;
661 }
663 /*
664 * #PF error code:
665 * Bit 0: Protection violation (=1) ; Page not present (=0)
666 * Bit 1: Write access
667 * Bit 2: User mode (=1) ; Supervisor mode (=0)
668 * Bit 3: Reserved bit violation
669 * Bit 4: Instruction fetch
670 */
671 asmlinkage int do_page_fault(struct cpu_user_regs *regs)
672 {
673 unsigned long addr, fixup;
674 int rc;
676 __asm__ __volatile__ ("mov %%cr2,%0" : "=r" (addr) : );
678 DEBUGGER_trap_entry(TRAP_page_fault, regs);
680 perfc_incrc(page_faults);
682 if ( unlikely((rc = fixup_page_fault(addr, regs)) != 0) )
683 return rc;
685 if ( unlikely(!guest_mode(regs)) )
686 {
687 if ( spurious_page_fault(addr, regs) )
688 {
689 DPRINTK("Spurious fault in domain %u:%u at addr %lx\n",
690 current->domain->domain_id, current->vcpu_id, addr);
691 return EXCRET_not_a_fault;
692 }
694 if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
695 {
696 perfc_incrc(copy_user_faults);
697 regs->eip = fixup;
698 return 0;
699 }
701 DEBUGGER_trap_fatal(TRAP_page_fault, regs);
703 show_registers(regs);
704 show_page_walk(addr);
705 panic("CPU%d FATAL PAGE FAULT\n"
706 "[error_code=%04x]\n"
707 "Faulting linear address: %p\n",
708 smp_processor_id(), regs->error_code, _p(addr));
709 }
711 propagate_page_fault(addr, regs->error_code);
712 return 0;
713 }
715 long do_fpu_taskswitch(int set)
716 {
717 struct vcpu *v = current;
719 if ( set )
720 {
721 v->arch.guest_context.ctrlreg[0] |= X86_CR0_TS;
722 stts();
723 }
724 else
725 {
726 v->arch.guest_context.ctrlreg[0] &= ~X86_CR0_TS;
727 if ( test_bit(_VCPUF_fpu_dirtied, &v->vcpu_flags) )
728 clts();
729 }
731 return 0;
732 }
734 /* Has the guest requested sufficient permission for this I/O access? */
735 static inline int guest_io_okay(
736 unsigned int port, unsigned int bytes,
737 struct vcpu *v, struct cpu_user_regs *regs)
738 {
739 u16 x;
740 #if defined(__x86_64__)
741 /* If in user mode, switch to kernel mode just to read I/O bitmap. */
742 int user_mode = !(v->arch.flags & TF_kernel_mode);
743 #define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v)
744 #elif defined(__i386__)
745 #define TOGGLE_MODE() ((void)0)
746 #endif
748 if ( v->arch.iopl >= (guest_kernel_mode(v, regs) ? 1 : 3) )
749 return 1;
751 if ( v->arch.iobmp_limit > (port + bytes) )
752 {
753 TOGGLE_MODE();
754 __get_user(x, (u16 *)(v->arch.iobmp+(port>>3)));
755 TOGGLE_MODE();
756 if ( (x & (((1<<bytes)-1) << (port&7))) == 0 )
757 return 1;
758 }
760 return 0;
761 }
763 /* Has the administrator granted sufficient permission for this I/O access? */
764 static inline int admin_io_okay(
765 unsigned int port, unsigned int bytes,
766 struct vcpu *v, struct cpu_user_regs *regs)
767 {
768 return ioports_access_permitted(v->domain, port, port + bytes - 1);
769 }
771 /* Check admin limits. Silently fail the access if it is disallowed. */
772 #define inb_user(_p, _d, _r) (admin_io_okay(_p, 1, _d, _r) ? inb(_p) : ~0)
773 #define inw_user(_p, _d, _r) (admin_io_okay(_p, 2, _d, _r) ? inw(_p) : ~0)
774 #define inl_user(_p, _d, _r) (admin_io_okay(_p, 4, _d, _r) ? inl(_p) : ~0)
775 #define outb_user(_v, _p, _d, _r) \
776 (admin_io_okay(_p, 1, _d, _r) ? outb(_v, _p) : ((void)0))
777 #define outw_user(_v, _p, _d, _r) \
778 (admin_io_okay(_p, 2, _d, _r) ? outw(_v, _p) : ((void)0))
779 #define outl_user(_v, _p, _d, _r) \
780 (admin_io_okay(_p, 4, _d, _r) ? outl(_v, _p) : ((void)0))
782 /* Propagate a fault back to the guest kernel. */
783 #define USER_READ_FAULT 4 /* user mode, read fault */
784 #define USER_WRITE_FAULT 6 /* user mode, write fault */
785 #define PAGE_FAULT(_faultaddr, _errcode) \
786 ({ propagate_page_fault(_faultaddr, _errcode); \
787 return EXCRET_fault_fixed; \
788 })
790 /* Isntruction fetch with error handling. */
791 #define insn_fetch(_type, _size, _ptr) \
792 ({ unsigned long _x; \
793 if ( get_user(_x, (_type *)eip) ) \
794 PAGE_FAULT(eip, USER_READ_FAULT); \
795 eip += _size; (_type)_x; })
797 static int emulate_privileged_op(struct cpu_user_regs *regs)
798 {
799 struct vcpu *v = current;
800 unsigned long *reg, eip = regs->eip, res;
801 u8 opcode, modrm_reg = 0, modrm_rm = 0, rep_prefix = 0;
802 unsigned int port, i, op_bytes = 4, data;
803 u32 l, h;
805 /* Legacy prefixes. */
806 for ( i = 0; i < 8; i++ )
807 {
808 switch ( opcode = insn_fetch(u8, 1, eip) )
809 {
810 case 0x66: /* operand-size override */
811 op_bytes ^= 6; /* switch between 2/4 bytes */
812 break;
813 case 0x67: /* address-size override */
814 case 0x2e: /* CS override */
815 case 0x3e: /* DS override */
816 case 0x26: /* ES override */
817 case 0x64: /* FS override */
818 case 0x65: /* GS override */
819 case 0x36: /* SS override */
820 case 0xf0: /* LOCK */
821 case 0xf2: /* REPNE/REPNZ */
822 break;
823 case 0xf3: /* REP/REPE/REPZ */
824 rep_prefix = 1;
825 break;
826 default:
827 goto done_prefixes;
828 }
829 }
830 done_prefixes:
832 #ifdef __x86_64__
833 /* REX prefix. */
834 if ( (opcode & 0xf0) == 0x40 )
835 {
836 modrm_reg = (opcode & 4) << 1; /* REX.R */
837 modrm_rm = (opcode & 1) << 3; /* REX.B */
839 /* REX.W and REX.X do not need to be decoded. */
840 opcode = insn_fetch(u8, 1, eip);
841 }
842 #endif
844 /* Input/Output String instructions. */
845 if ( (opcode >= 0x6c) && (opcode <= 0x6f) )
846 {
847 if ( rep_prefix && (regs->ecx == 0) )
848 goto done;
850 continue_io_string:
851 switch ( opcode )
852 {
853 case 0x6c: /* INSB */
854 op_bytes = 1;
855 case 0x6d: /* INSW/INSL */
856 if ( !guest_io_okay((u16)regs->edx, op_bytes, v, regs) )
857 goto fail;
858 switch ( op_bytes )
859 {
860 case 1:
861 data = (u8)inb_user((u16)regs->edx, v, regs);
862 if ( put_user((u8)data, (u8 *)regs->edi) )
863 PAGE_FAULT(regs->edi, USER_WRITE_FAULT);
864 break;
865 case 2:
866 data = (u16)inw_user((u16)regs->edx, v, regs);
867 if ( put_user((u16)data, (u16 *)regs->edi) )
868 PAGE_FAULT(regs->edi, USER_WRITE_FAULT);
869 break;
870 case 4:
871 data = (u32)inl_user((u16)regs->edx, v, regs);
872 if ( put_user((u32)data, (u32 *)regs->edi) )
873 PAGE_FAULT(regs->edi, USER_WRITE_FAULT);
874 break;
875 }
876 regs->edi += (regs->eflags & EF_DF) ? -op_bytes : op_bytes;
877 break;
879 case 0x6e: /* OUTSB */
880 op_bytes = 1;
881 case 0x6f: /* OUTSW/OUTSL */
882 if ( !guest_io_okay((u16)regs->edx, op_bytes, v, regs) )
883 goto fail;
884 switch ( op_bytes )
885 {
886 case 1:
887 if ( get_user(data, (u8 *)regs->esi) )
888 PAGE_FAULT(regs->esi, USER_READ_FAULT);
889 outb_user((u8)data, (u16)regs->edx, v, regs);
890 break;
891 case 2:
892 if ( get_user(data, (u16 *)regs->esi) )
893 PAGE_FAULT(regs->esi, USER_READ_FAULT);
894 outw_user((u16)data, (u16)regs->edx, v, regs);
895 break;
896 case 4:
897 if ( get_user(data, (u32 *)regs->esi) )
898 PAGE_FAULT(regs->esi, USER_READ_FAULT);
899 outl_user((u32)data, (u16)regs->edx, v, regs);
900 break;
901 }
902 regs->esi += (regs->eflags & EF_DF) ? -op_bytes : op_bytes;
903 break;
904 }
906 if ( rep_prefix && (--regs->ecx != 0) )
907 {
908 if ( !hypercall_preempt_check() )
909 goto continue_io_string;
910 eip = regs->eip;
911 }
913 goto done;
914 }
916 /* I/O Port and Interrupt Flag instructions. */
917 switch ( opcode )
918 {
919 case 0xe4: /* IN imm8,%al */
920 op_bytes = 1;
921 case 0xe5: /* IN imm8,%eax */
922 port = insn_fetch(u8, 1, eip);
923 exec_in:
924 if ( !guest_io_okay(port, op_bytes, v, regs) )
925 goto fail;
926 switch ( op_bytes )
927 {
928 case 1:
929 regs->eax &= ~0xffUL;
930 regs->eax |= (u8)inb_user(port, v, regs);
931 break;
932 case 2:
933 regs->eax &= ~0xffffUL;
934 regs->eax |= (u16)inw_user(port, v, regs);
935 break;
936 case 4:
937 regs->eax = (u32)inl_user(port, v, regs);
938 break;
939 }
940 goto done;
942 case 0xec: /* IN %dx,%al */
943 op_bytes = 1;
944 case 0xed: /* IN %dx,%eax */
945 port = (u16)regs->edx;
946 goto exec_in;
948 case 0xe6: /* OUT %al,imm8 */
949 op_bytes = 1;
950 case 0xe7: /* OUT %eax,imm8 */
951 port = insn_fetch(u8, 1, eip);
952 exec_out:
953 if ( !guest_io_okay(port, op_bytes, v, regs) )
954 goto fail;
955 switch ( op_bytes )
956 {
957 case 1:
958 outb_user((u8)regs->eax, port, v, regs);
959 break;
960 case 2:
961 outw_user((u16)regs->eax, port, v, regs);
962 break;
963 case 4:
964 outl_user((u32)regs->eax, port, v, regs);
965 break;
966 }
967 goto done;
969 case 0xee: /* OUT %al,%dx */
970 op_bytes = 1;
971 case 0xef: /* OUT %eax,%dx */
972 port = (u16)regs->edx;
973 goto exec_out;
975 case 0xfa: /* CLI */
976 case 0xfb: /* STI */
977 if ( v->arch.iopl < (guest_kernel_mode(v, regs) ? 1 : 3) )
978 goto fail;
979 /*
980 * This is just too dangerous to allow, in my opinion. Consider if the
981 * caller then tries to reenable interrupts using POPF: we can't trap
982 * that and we'll end up with hard-to-debug lockups. Fast & loose will
983 * do for us. :-)
984 */
985 /*v->vcpu_info->evtchn_upcall_mask = (opcode == 0xfa);*/
986 goto done;
988 case 0x0f: /* Two-byte opcode */
989 break;
991 default:
992 goto fail;
993 }
995 /* Remaining instructions only emulated from guest kernel. */
996 if ( !guest_kernel_mode(v, regs) )
997 goto fail;
999 /* Privileged (ring 0) instructions. */
1000 opcode = insn_fetch(u8, 1, eip);
1001 switch ( opcode )
1003 case 0x06: /* CLTS */
1004 (void)do_fpu_taskswitch(0);
1005 break;
1007 case 0x09: /* WBINVD */
1008 /* Ignore the instruction if unprivileged. */
1009 if ( !cache_flush_permitted(v->domain) )
1010 /* Non-physdev domain attempted WBINVD; ignore for now since
1011 newer linux uses this in some start-of-day timing loops */
1013 else
1014 wbinvd();
1015 break;
1017 case 0x20: /* MOV CR?,<reg> */
1018 opcode = insn_fetch(u8, 1, eip);
1019 modrm_reg |= (opcode >> 3) & 7;
1020 modrm_rm |= (opcode >> 0) & 7;
1021 reg = decode_register(modrm_rm, regs, 0);
1022 switch ( modrm_reg )
1024 case 0: /* Read CR0 */
1025 *reg = (read_cr0() & ~X86_CR0_TS) |
1026 v->arch.guest_context.ctrlreg[0];
1027 break;
1029 case 2: /* Read CR2 */
1030 *reg = v->arch.guest_context.ctrlreg[2];
1031 break;
1033 case 3: /* Read CR3 */
1034 *reg = pfn_to_paddr(mfn_to_gmfn(v->domain,
1035 pagetable_get_pfn(v->arch.guest_table)));
1036 break;
1038 case 4: /* Read CR4 */
1039 /*
1040 * Guests can read CR4 to see what features Xen has enabled. We
1041 * therefore lie about PGE & PSE as they are unavailable to guests.
1042 */
1043 *reg = read_cr4() & ~(X86_CR4_PGE|X86_CR4_PSE);
1044 break;
1046 default:
1047 goto fail;
1049 break;
1051 case 0x21: /* MOV DR?,<reg> */
1052 opcode = insn_fetch(u8, 1, eip);
1053 modrm_reg |= (opcode >> 3) & 7;
1054 modrm_rm |= (opcode >> 0) & 7;
1055 reg = decode_register(modrm_rm, regs, 0);
1056 if ( (res = do_get_debugreg(modrm_reg)) > (unsigned long)-256 )
1057 goto fail;
1058 *reg = res;
1059 break;
1061 case 0x22: /* MOV <reg>,CR? */
1062 opcode = insn_fetch(u8, 1, eip);
1063 modrm_reg |= (opcode >> 3) & 7;
1064 modrm_rm |= (opcode >> 0) & 7;
1065 reg = decode_register(modrm_rm, regs, 0);
1066 switch ( modrm_reg )
1068 case 0: /* Write CR0 */
1069 if ( (*reg ^ read_cr0()) & ~X86_CR0_TS )
1071 DPRINTK("Attempt to change unmodifiable CR0 flags.\n");
1072 goto fail;
1074 (void)do_fpu_taskswitch(!!(*reg & X86_CR0_TS));
1075 break;
1077 case 2: /* Write CR2 */
1078 v->arch.guest_context.ctrlreg[2] = *reg;
1079 v->vcpu_info->arch.cr2 = *reg;
1080 break;
1082 case 3: /* Write CR3 */
1083 LOCK_BIGLOCK(v->domain);
1084 cleanup_writable_pagetable(v->domain);
1085 (void)new_guest_cr3(gmfn_to_mfn(v->domain, paddr_to_pfn(*reg)));
1086 UNLOCK_BIGLOCK(v->domain);
1087 break;
1089 case 4:
1090 if ( *reg != (read_cr4() & ~(X86_CR4_PGE|X86_CR4_PSE)) )
1092 DPRINTK("Attempt to change CR4 flags.\n");
1093 goto fail;
1095 break;
1097 default:
1098 goto fail;
1100 break;
1102 case 0x23: /* MOV <reg>,DR? */
1103 opcode = insn_fetch(u8, 1, eip);
1104 modrm_reg |= (opcode >> 3) & 7;
1105 modrm_rm |= (opcode >> 0) & 7;
1106 reg = decode_register(modrm_rm, regs, 0);
1107 if ( do_set_debugreg(modrm_reg, *reg) != 0 )
1108 goto fail;
1109 break;
1111 case 0x30: /* WRMSR */
1112 switch ( regs->ecx )
1114 #ifdef CONFIG_X86_64
1115 case MSR_FS_BASE:
1116 if ( wrmsr_safe(MSR_FS_BASE, regs->eax, regs->edx) )
1117 goto fail;
1118 v->arch.guest_context.fs_base =
1119 ((u64)regs->edx << 32) | regs->eax;
1120 break;
1121 case MSR_GS_BASE:
1122 if ( wrmsr_safe(MSR_GS_BASE, regs->eax, regs->edx) )
1123 goto fail;
1124 v->arch.guest_context.gs_base_kernel =
1125 ((u64)regs->edx << 32) | regs->eax;
1126 break;
1127 case MSR_SHADOW_GS_BASE:
1128 if ( wrmsr_safe(MSR_SHADOW_GS_BASE, regs->eax, regs->edx) )
1129 goto fail;
1130 v->arch.guest_context.gs_base_user =
1131 ((u64)regs->edx << 32) | regs->eax;
1132 break;
1133 #endif
1134 default:
1135 if ( (rdmsr_safe(regs->ecx, l, h) != 0) ||
1136 (regs->eax != l) || (regs->edx != h) )
1137 DPRINTK("Domain attempted WRMSR %p from "
1138 "%08x:%08x to %08lx:%08lx.\n",
1139 _p(regs->ecx), h, l, (long)regs->edx, (long)regs->eax);
1140 break;
1142 break;
1144 case 0x32: /* RDMSR */
1145 switch ( regs->ecx )
1147 #ifdef CONFIG_X86_64
1148 case MSR_FS_BASE:
1149 regs->eax = v->arch.guest_context.fs_base & 0xFFFFFFFFUL;
1150 regs->edx = v->arch.guest_context.fs_base >> 32;
1151 break;
1152 case MSR_GS_BASE:
1153 regs->eax = v->arch.guest_context.gs_base_kernel & 0xFFFFFFFFUL;
1154 regs->edx = v->arch.guest_context.gs_base_kernel >> 32;
1155 break;
1156 case MSR_SHADOW_GS_BASE:
1157 regs->eax = v->arch.guest_context.gs_base_user & 0xFFFFFFFFUL;
1158 regs->edx = v->arch.guest_context.gs_base_user >> 32;
1159 break;
1160 #endif
1161 case MSR_EFER:
1162 if ( rdmsr_safe(regs->ecx, regs->eax, regs->edx) )
1163 goto fail;
1164 break;
1165 default:
1166 /* Everyone can read the MSR space. */
1167 /*DPRINTK("Domain attempted RDMSR %p.\n", _p(regs->ecx));*/
1168 if ( rdmsr_safe(regs->ecx, regs->eax, regs->edx) )
1169 goto fail;
1170 break;
1172 break;
1174 default:
1175 goto fail;
1178 done:
1179 regs->eip = eip;
1180 return EXCRET_fault_fixed;
1182 fail:
1183 return 0;
1186 asmlinkage int do_general_protection(struct cpu_user_regs *regs)
1188 struct vcpu *v = current;
1189 struct trap_bounce *tb = &v->arch.trap_bounce;
1190 struct trap_info *ti;
1191 unsigned long fixup;
1193 DEBUGGER_trap_entry(TRAP_gp_fault, regs);
1195 if ( regs->error_code & 1 )
1196 goto hardware_gp;
1198 if ( !guest_mode(regs) )
1199 goto gp_in_kernel;
1201 /*
1202 * Cunning trick to allow arbitrary "INT n" handling.
1204 * We set DPL == 0 on all vectors in the IDT. This prevents any INT <n>
1205 * instruction from trapping to the appropriate vector, when that might not
1206 * be expected by Xen or the guest OS. For example, that entry might be for
1207 * a fault handler (unlike traps, faults don't increment EIP), or might
1208 * expect an error code on the stack (which a software trap never
1209 * provides), or might be a hardware interrupt handler that doesn't like
1210 * being called spuriously.
1212 * Instead, a GPF occurs with the faulting IDT vector in the error code.
1213 * Bit 1 is set to indicate that an IDT entry caused the fault. Bit 0 is
1214 * clear to indicate that it's a software fault, not hardware.
1216 * NOTE: Vectors 3 and 4 are dealt with from their own handler. This is
1217 * okay because they can only be triggered by an explicit DPL-checked
1218 * instruction. The DPL specified by the guest OS for these vectors is NOT
1219 * CHECKED!!
1220 */
1221 if ( (regs->error_code & 3) == 2 )
1223 /* This fault must be due to <INT n> instruction. */
1224 ti = &current->arch.guest_context.trap_ctxt[regs->error_code>>3];
1225 if ( permit_softint(TI_GET_DPL(ti), v, regs) )
1227 tb->flags = TBF_EXCEPTION;
1228 regs->eip += 2;
1229 goto finish_propagation;
1233 /* Emulate some simple privileged and I/O instructions. */
1234 if ( (regs->error_code == 0) &&
1235 emulate_privileged_op(regs) )
1236 return 0;
1238 #if defined(__i386__)
1239 if ( VM_ASSIST(v->domain, VMASST_TYPE_4gb_segments) &&
1240 (regs->error_code == 0) &&
1241 gpf_emulate_4gb(regs) )
1242 return 0;
1243 #endif
1245 /* Pass on GPF as is. */
1246 ti = &current->arch.guest_context.trap_ctxt[TRAP_gp_fault];
1247 tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE;
1248 tb->error_code = regs->error_code;
1249 finish_propagation:
1250 tb->cs = ti->cs;
1251 tb->eip = ti->address;
1252 if ( TI_GET_IF(ti) )
1253 tb->flags |= TBF_INTERRUPT;
1254 return 0;
1256 gp_in_kernel:
1258 if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
1260 DPRINTK("GPF (%04x): %p -> %p\n",
1261 regs->error_code, _p(regs->eip), _p(fixup));
1262 regs->eip = fixup;
1263 return 0;
1266 DEBUGGER_trap_fatal(TRAP_gp_fault, regs);
1268 hardware_gp:
1269 show_registers(regs);
1270 panic("CPU%d GENERAL PROTECTION FAULT\n[error_code=%04x]\n",
1271 smp_processor_id(), regs->error_code);
1272 return 0;
1275 static void nmi_softirq(void)
1277 /* Only used to defer wakeup of dom0,vcpu0 to a safe (non-NMI) context. */
1278 evtchn_notify(dom0->vcpu[0]);
1281 static void nmi_dom0_report(unsigned int reason_idx)
1283 struct domain *d;
1285 if ( (d = dom0) == NULL )
1286 return;
1288 set_bit(reason_idx, &d->shared_info->arch.nmi_reason);
1290 if ( test_and_set_bit(_VCPUF_nmi_pending, &d->vcpu[0]->vcpu_flags) )
1291 raise_softirq(NMI_SOFTIRQ); /* not safe to wake up a vcpu here */
1294 asmlinkage void mem_parity_error(struct cpu_user_regs *regs)
1296 switch ( opt_nmi[0] )
1298 case 'd': /* 'dom0' */
1299 nmi_dom0_report(_XEN_NMIREASON_parity_error);
1300 case 'i': /* 'ignore' */
1301 break;
1302 default: /* 'fatal' */
1303 console_force_unlock();
1304 printk("\n\nNMI - MEMORY ERROR\n");
1305 fatal_trap(TRAP_nmi, regs);
1308 outb((inb(0x61) & 0x0f) | 0x04, 0x61); /* clear-and-disable parity check */
1309 mdelay(1);
1310 outb((inb(0x61) & 0x0b) | 0x00, 0x61); /* enable parity check */
1313 asmlinkage void io_check_error(struct cpu_user_regs *regs)
1315 switch ( opt_nmi[0] )
1317 case 'd': /* 'dom0' */
1318 nmi_dom0_report(_XEN_NMIREASON_io_error);
1319 case 'i': /* 'ignore' */
1320 break;
1321 default: /* 'fatal' */
1322 console_force_unlock();
1323 printk("\n\nNMI - I/O ERROR\n");
1324 fatal_trap(TRAP_nmi, regs);
1327 outb((inb(0x61) & 0x0f) | 0x08, 0x61); /* clear-and-disable IOCK */
1328 mdelay(1);
1329 outb((inb(0x61) & 0x07) | 0x00, 0x61); /* enable IOCK */
1332 static void unknown_nmi_error(unsigned char reason)
1334 switch ( opt_nmi[0] )
1336 case 'd': /* 'dom0' */
1337 nmi_dom0_report(_XEN_NMIREASON_unknown);
1338 case 'i': /* 'ignore' */
1339 break;
1340 default: /* 'fatal' */
1341 printk("Uhhuh. NMI received for unknown reason %02x.\n", reason);
1342 printk("Dazed and confused, but trying to continue\n");
1343 printk("Do you have a strange power saving mode enabled?\n");
1347 static int dummy_nmi_callback(struct cpu_user_regs *regs, int cpu)
1349 return 0;
1352 static nmi_callback_t nmi_callback = dummy_nmi_callback;
1354 asmlinkage void do_nmi(struct cpu_user_regs *regs)
1356 unsigned int cpu = smp_processor_id();
1357 unsigned char reason;
1359 ++nmi_count(cpu);
1361 if ( nmi_callback(regs, cpu) )
1362 return;
1364 if ( nmi_watchdog )
1365 nmi_watchdog_tick(regs);
1367 /* Only the BSP gets external NMIs from the system. */
1368 if ( cpu == 0 )
1370 reason = inb(0x61);
1371 if ( reason & 0x80 )
1372 mem_parity_error(regs);
1373 else if ( reason & 0x40 )
1374 io_check_error(regs);
1375 else if ( !nmi_watchdog )
1376 unknown_nmi_error((unsigned char)(reason&0xff));
1380 void set_nmi_callback(nmi_callback_t callback)
1382 nmi_callback = callback;
1385 void unset_nmi_callback(void)
1387 nmi_callback = dummy_nmi_callback;
1390 asmlinkage int math_state_restore(struct cpu_user_regs *regs)
1392 struct trap_bounce *tb;
1393 struct trap_info *ti;
1395 setup_fpu(current);
1397 if ( current->arch.guest_context.ctrlreg[0] & X86_CR0_TS )
1399 tb = &current->arch.trap_bounce;
1400 ti = &current->arch.guest_context.trap_ctxt[TRAP_no_device];
1402 tb->flags = TBF_EXCEPTION;
1403 tb->cs = ti->cs;
1404 tb->eip = ti->address;
1405 if ( TI_GET_IF(ti) )
1406 tb->flags |= TBF_INTERRUPT;
1408 current->arch.guest_context.ctrlreg[0] &= ~X86_CR0_TS;
1411 return EXCRET_fault_fixed;
1414 asmlinkage int do_debug(struct cpu_user_regs *regs)
1416 unsigned long condition;
1417 struct vcpu *v = current;
1418 struct trap_bounce *tb = &v->arch.trap_bounce;
1419 struct trap_info *ti;
1421 __asm__ __volatile__("mov %%db6,%0" : "=r" (condition));
1423 /* Mask out spurious debug traps due to lazy DR7 setting */
1424 if ( (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) &&
1425 (v->arch.guest_context.debugreg[7] == 0) )
1427 __asm__("mov %0,%%db7" : : "r" (0UL));
1428 goto out;
1431 DEBUGGER_trap_entry(TRAP_debug, regs);
1433 if ( !guest_mode(regs) )
1435 /* Clear TF just for absolute sanity. */
1436 regs->eflags &= ~EF_TF;
1437 /*
1438 * We ignore watchpoints when they trigger within Xen. This may happen
1439 * when a buffer is passed to us which previously had a watchpoint set
1440 * on it. No need to bump EIP; the only faulting trap is an instruction
1441 * breakpoint, which can't happen to us.
1442 */
1443 goto out;
1446 /* Save debug status register where guest OS can peek at it */
1447 v->arch.guest_context.debugreg[6] = condition;
1449 ti = &v->arch.guest_context.trap_ctxt[TRAP_debug];
1450 tb->flags = TBF_EXCEPTION;
1451 tb->cs = ti->cs;
1452 tb->eip = ti->address;
1453 if ( TI_GET_IF(ti) )
1454 tb->flags |= TBF_INTERRUPT;
1456 out:
1457 return EXCRET_not_a_fault;
1460 asmlinkage int do_spurious_interrupt_bug(struct cpu_user_regs *regs)
1462 return EXCRET_not_a_fault;
1465 void set_intr_gate(unsigned int n, void *addr)
1467 #ifdef __i386__
1468 int i;
1469 /* Keep secondary tables in sync with IRQ updates. */
1470 for ( i = 1; i < NR_CPUS; i++ )
1471 if ( idt_tables[i] != NULL )
1472 _set_gate(&idt_tables[i][n], 14, 0, addr);
1473 #endif
1474 _set_gate(&idt_table[n], 14, 0, addr);
1477 void set_system_gate(unsigned int n, void *addr)
1479 _set_gate(idt_table+n,14,3,addr);
1482 void set_task_gate(unsigned int n, unsigned int sel)
1484 idt_table[n].a = sel << 16;
1485 idt_table[n].b = 0x8500;
1488 void set_tss_desc(unsigned int n, void *addr)
1490 _set_tssldt_desc(
1491 gdt_table + __TSS(n) - FIRST_RESERVED_GDT_ENTRY,
1492 (unsigned long)addr,
1493 offsetof(struct tss_struct, __cacheline_filler) - 1,
1494 9);
1497 void __init trap_init(void)
1499 extern void percpu_traps_init(void);
1501 /*
1502 * Note that interrupt gates are always used, rather than trap gates. We
1503 * must have interrupts disabled until DS/ES/FS/GS are saved because the
1504 * first activation must have the "bad" value(s) for these registers and
1505 * we may lose them if another activation is installed before they are
1506 * saved. The page-fault handler also needs interrupts disabled until %cr2
1507 * has been read and saved on the stack.
1508 */
1509 set_intr_gate(TRAP_divide_error,&divide_error);
1510 set_intr_gate(TRAP_debug,&debug);
1511 set_intr_gate(TRAP_nmi,&nmi);
1512 set_system_gate(TRAP_int3,&int3); /* usable from all privileges */
1513 set_system_gate(TRAP_overflow,&overflow); /* usable from all privileges */
1514 set_intr_gate(TRAP_bounds,&bounds);
1515 set_intr_gate(TRAP_invalid_op,&invalid_op);
1516 set_intr_gate(TRAP_no_device,&device_not_available);
1517 set_intr_gate(TRAP_copro_seg,&coprocessor_segment_overrun);
1518 set_intr_gate(TRAP_invalid_tss,&invalid_TSS);
1519 set_intr_gate(TRAP_no_segment,&segment_not_present);
1520 set_intr_gate(TRAP_stack_error,&stack_segment);
1521 set_intr_gate(TRAP_gp_fault,&general_protection);
1522 set_intr_gate(TRAP_page_fault,&page_fault);
1523 set_intr_gate(TRAP_spurious_int,&spurious_interrupt_bug);
1524 set_intr_gate(TRAP_copro_error,&coprocessor_error);
1525 set_intr_gate(TRAP_alignment_check,&alignment_check);
1526 set_intr_gate(TRAP_machine_check,&machine_check);
1527 set_intr_gate(TRAP_simd_error,&simd_coprocessor_error);
1529 percpu_traps_init();
1531 cpu_init();
1533 open_softirq(NMI_SOFTIRQ, nmi_softirq);
1537 long do_set_trap_table(GUEST_HANDLE(trap_info_t) traps)
1539 struct trap_info cur;
1540 struct trap_info *dst = current->arch.guest_context.trap_ctxt;
1541 long rc = 0;
1543 /* If no table is presented then clear the entire virtual IDT. */
1544 if ( guest_handle_is_null(traps) )
1546 memset(dst, 0, 256 * sizeof(*dst));
1547 init_int80_direct_trap(current);
1548 return 0;
1551 for ( ; ; )
1553 if ( hypercall_preempt_check() )
1555 rc = hypercall_create_continuation(
1556 __HYPERVISOR_set_trap_table, "h", traps);
1557 break;
1560 if ( copy_from_guest(&cur, traps, 1) )
1562 rc = -EFAULT;
1563 break;
1566 if ( cur.address == 0 )
1567 break;
1569 fixup_guest_code_selector(cur.cs);
1571 memcpy(&dst[cur.vector], &cur, sizeof(cur));
1573 if ( cur.vector == 0x80 )
1574 init_int80_direct_trap(current);
1576 guest_handle_add_offset(traps, 1);
1579 return rc;
1583 long set_debugreg(struct vcpu *p, int reg, unsigned long value)
1585 int i;
1587 switch ( reg )
1589 case 0:
1590 if ( !access_ok(value, sizeof(long)) )
1591 return -EPERM;
1592 if ( p == current )
1593 __asm__ ( "mov %0, %%db0" : : "r" (value) );
1594 break;
1595 case 1:
1596 if ( !access_ok(value, sizeof(long)) )
1597 return -EPERM;
1598 if ( p == current )
1599 __asm__ ( "mov %0, %%db1" : : "r" (value) );
1600 break;
1601 case 2:
1602 if ( !access_ok(value, sizeof(long)) )
1603 return -EPERM;
1604 if ( p == current )
1605 __asm__ ( "mov %0, %%db2" : : "r" (value) );
1606 break;
1607 case 3:
1608 if ( !access_ok(value, sizeof(long)) )
1609 return -EPERM;
1610 if ( p == current )
1611 __asm__ ( "mov %0, %%db3" : : "r" (value) );
1612 break;
1613 case 6:
1614 /*
1615 * DR6: Bits 4-11,16-31 reserved (set to 1).
1616 * Bit 12 reserved (set to 0).
1617 */
1618 value &= 0xffffefff; /* reserved bits => 0 */
1619 value |= 0xffff0ff0; /* reserved bits => 1 */
1620 if ( p == current )
1621 __asm__ ( "mov %0, %%db6" : : "r" (value) );
1622 break;
1623 case 7:
1624 /*
1625 * DR7: Bit 10 reserved (set to 1).
1626 * Bits 11-12,14-15 reserved (set to 0).
1627 * Privileged bits:
1628 * GD (bit 13): must be 0.
1629 * R/Wn (bits 16-17,20-21,24-25,28-29): mustn't be 10.
1630 * LENn (bits 18-19,22-23,26-27,30-31): mustn't be 10.
1631 */
1632 /* DR7 == 0 => debugging disabled for this domain. */
1633 if ( value != 0 )
1635 value &= 0xffff27ff; /* reserved bits => 0 */
1636 value |= 0x00000400; /* reserved bits => 1 */
1637 if ( (value & (1<<13)) != 0 ) return -EPERM;
1638 for ( i = 0; i < 16; i += 2 )
1639 if ( ((value >> (i+16)) & 3) == 2 ) return -EPERM;
1641 if ( p == current )
1642 __asm__ ( "mov %0, %%db7" : : "r" (value) );
1643 break;
1644 default:
1645 return -EINVAL;
1648 p->arch.guest_context.debugreg[reg] = value;
1649 return 0;
1652 long do_set_debugreg(int reg, unsigned long value)
1654 return set_debugreg(current, reg, value);
1657 unsigned long do_get_debugreg(int reg)
1659 if ( (reg < 0) || (reg > 7) ) return -EINVAL;
1660 return current->arch.guest_context.debugreg[reg];
1663 /*
1664 * Local variables:
1665 * mode: C
1666 * c-set-style: "BSD"
1667 * c-basic-offset: 4
1668 * tab-width: 4
1669 * indent-tabs-mode: nil
1670 * End:
1671 */