ia64/xen-unstable

view xen/arch/x86/traps.c @ 5424:02d442d39367

bitkeeper revision 1.1159.258.165 (42a9a534b-NRTORtgH1Qjzz6EjTo6Q)

Ensure we only handle writable pagetable faults taken in guest kernel
mode.
Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Fri Jun 10 14:35:32 2005 +0000 (2005-06-10)
parents d1189200b017
children 4bcb6d1a8fc5
line source
1 /******************************************************************************
2 * arch/i386/traps.c
3 *
4 * Modifications to Linux original are copyright (c) 2002-2004, K A Fraser
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
21 /*
22 * Copyright (C) 1991, 1992 Linus Torvalds
23 *
24 * Pentium III FXSR, SSE support
25 * Gareth Hughes <gareth@valinux.com>, May 2000
26 */
28 #include <xen/config.h>
29 #include <xen/init.h>
30 #include <xen/sched.h>
31 #include <xen/lib.h>
32 #include <xen/errno.h>
33 #include <xen/mm.h>
34 #include <xen/console.h>
35 #include <asm/regs.h>
36 #include <xen/delay.h>
37 #include <xen/event.h>
38 #include <xen/spinlock.h>
39 #include <xen/irq.h>
40 #include <xen/perfc.h>
41 #include <xen/softirq.h>
42 #include <asm/shadow.h>
43 #include <asm/domain_page.h>
44 #include <asm/system.h>
45 #include <asm/io.h>
46 #include <asm/atomic.h>
47 #include <asm/desc.h>
48 #include <asm/debugreg.h>
49 #include <asm/smp.h>
50 #include <asm/flushtlb.h>
51 #include <asm/uaccess.h>
52 #include <asm/i387.h>
53 #include <asm/debugger.h>
55 /*
56 * opt_nmi: one of 'ignore', 'dom0', or 'fatal'.
57 * fatal: Xen prints diagnostic message and then hangs.
58 * dom0: The NMI is virtualised to DOM0.
59 * ignore: The NMI error is cleared and ignored.
60 */
61 #ifdef NDEBUG
62 char opt_nmi[10] = "dom0";
63 #else
64 char opt_nmi[10] = "fatal";
65 #endif
66 string_param("nmi", opt_nmi);
68 #if defined(__i386__)
70 #define GUEST_FAULT(_r) (likely(VM86_MODE(_r) || !RING_0(_r)))
72 #define DOUBLEFAULT_STACK_SIZE 1024
73 static struct tss_struct doublefault_tss;
74 static unsigned char doublefault_stack[DOUBLEFAULT_STACK_SIZE];
76 asmlinkage int hypercall(void);
78 /* Master table, and the one used by CPU0. */
79 struct desc_struct idt_table[256] = { {0, 0}, };
80 /* All other CPUs have their own copy. */
81 struct desc_struct *idt_tables[NR_CPUS] = { 0 };
83 asmlinkage void divide_error(void);
84 asmlinkage void debug(void);
85 asmlinkage void nmi(void);
86 asmlinkage void int3(void);
87 asmlinkage void overflow(void);
88 asmlinkage void bounds(void);
89 asmlinkage void invalid_op(void);
90 asmlinkage void device_not_available(void);
91 asmlinkage void coprocessor_segment_overrun(void);
92 asmlinkage void invalid_TSS(void);
93 asmlinkage void segment_not_present(void);
94 asmlinkage void stack_segment(void);
95 asmlinkage void general_protection(void);
96 asmlinkage void page_fault(void);
97 asmlinkage void coprocessor_error(void);
98 asmlinkage void simd_coprocessor_error(void);
99 asmlinkage void alignment_check(void);
100 asmlinkage void spurious_interrupt_bug(void);
101 asmlinkage void machine_check(void);
103 int kstack_depth_to_print = 8*20;
105 static inline int kernel_text_address(unsigned long addr)
106 {
107 if (addr >= (unsigned long) &_stext &&
108 addr <= (unsigned long) &_etext)
109 return 1;
110 return 0;
112 }
114 void show_guest_stack()
115 {
116 int i;
117 execution_context_t *ec = get_execution_context();
118 unsigned long *stack = (unsigned long *)ec->esp;
119 printk("Guest EIP is %lx\n",ec->eip);
121 for ( i = 0; i < kstack_depth_to_print; i++ )
122 {
123 if ( ((long)stack & (STACK_SIZE-1)) == 0 )
124 break;
125 if ( i && ((i % 8) == 0) )
126 printk("\n ");
127 printk("%08lx ", *stack++);
128 }
129 printk("\n");
131 }
133 void show_trace(unsigned long *esp)
134 {
135 unsigned long *stack, addr;
136 int i;
138 printk("Call Trace from ESP=%p: ", esp);
139 stack = esp;
140 i = 0;
141 while (((long) stack & (STACK_SIZE-1)) != 0) {
142 addr = *stack++;
143 if (kernel_text_address(addr)) {
144 if (i && ((i % 6) == 0))
145 printk("\n ");
146 printk("[<%08lx>] ", addr);
147 i++;
148 }
149 }
150 printk("\n");
151 }
153 void show_stack(unsigned long *esp)
154 {
155 unsigned long *stack;
156 int i;
158 printk("Stack trace from ESP=%p:\n", esp);
160 stack = esp;
161 for ( i = 0; i < kstack_depth_to_print; i++ )
162 {
163 if ( ((long)stack & (STACK_SIZE-1)) == 0 )
164 break;
165 if ( i && ((i % 8) == 0) )
166 printk("\n ");
167 if ( kernel_text_address(*stack) )
168 printk("[%08lx] ", *stack++);
169 else
170 printk("%08lx ", *stack++);
171 }
172 printk("\n");
174 show_trace( esp );
175 }
177 void show_registers(struct xen_regs *regs)
178 {
179 unsigned long esp;
180 unsigned short ss, ds, es, fs, gs;
182 if ( GUEST_FAULT(regs) )
183 {
184 esp = regs->esp;
185 ss = regs->ss & 0xffff;
186 ds = regs->ds & 0xffff;
187 es = regs->es & 0xffff;
188 fs = regs->fs & 0xffff;
189 gs = regs->gs & 0xffff;
190 }
191 else
192 {
193 esp = (unsigned long)(&regs->esp);
194 ss = __HYPERVISOR_DS;
195 ds = __HYPERVISOR_DS;
196 es = __HYPERVISOR_DS;
197 fs = __HYPERVISOR_DS;
198 gs = __HYPERVISOR_DS;
199 }
201 printk("CPU: %d\nEIP: %04x:[<%08x>] \nEFLAGS: %08x\n",
202 smp_processor_id(), 0xffff & regs->cs, regs->eip, regs->eflags);
203 printk("eax: %08x ebx: %08x ecx: %08x edx: %08x\n",
204 regs->eax, regs->ebx, regs->ecx, regs->edx);
205 printk("esi: %08x edi: %08x ebp: %08x esp: %08lx\n",
206 regs->esi, regs->edi, regs->ebp, esp);
207 printk("ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n",
208 ds, es, fs, gs, ss);
210 show_stack((unsigned long *)&regs->esp);
211 }
213 /*
214 * This is called for faults at very unexpected times (e.g., when interrupts
215 * are disabled). In such situations we can't do much that is safe. We try to
216 * print out some tracing and then we just spin.
217 */
218 asmlinkage void fatal_trap(int trapnr, struct xen_regs *regs)
219 {
220 int cpu = smp_processor_id();
221 unsigned long cr2;
222 static char *trapstr[] = {
223 "divide error", "debug", "nmi", "bkpt", "overflow", "bounds",
224 "invalid operation", "device not available", "double fault",
225 "coprocessor segment", "invalid tss", "segment not found",
226 "stack error", "general protection fault", "page fault",
227 "spurious interrupt", "coprocessor error", "alignment check",
228 "machine check", "simd error"
229 };
231 show_registers(regs);
233 if ( trapnr == TRAP_page_fault )
234 {
235 __asm__ __volatile__ ("movl %%cr2,%0" : "=r" (cr2) : );
236 printk("Faulting linear address might be %08lx\n", cr2);
237 }
239 printk("************************************\n");
240 printk("CPU%d FATAL TRAP %d (%s), ERROR_CODE %04x%s.\n",
241 cpu, trapnr, trapstr[trapnr], regs->error_code,
242 (regs->eflags & X86_EFLAGS_IF) ? "" : ", IN INTERRUPT CONTEXT");
243 printk("System shutting down -- need manual reset.\n");
244 printk("************************************\n");
246 /* Lock up the console to prevent spurious output from other CPUs. */
247 console_force_lock();
249 /* Wait for manual reset. */
250 for ( ; ; )
251 __asm__ __volatile__ ( "hlt" );
252 }
254 static inline int do_trap(int trapnr, char *str,
255 struct xen_regs *regs,
256 int use_error_code)
257 {
258 struct domain *d = current;
259 struct trap_bounce *tb = &d->thread.trap_bounce;
260 trap_info_t *ti;
261 unsigned long fixup;
263 DEBUGGER_trap_entry(trapnr, regs);
265 if ( !GUEST_FAULT(regs) )
266 goto xen_fault;
268 ti = current->thread.traps + trapnr;
269 tb->flags = TBF_EXCEPTION;
270 tb->cs = ti->cs;
271 tb->eip = ti->address;
272 if ( use_error_code )
273 {
274 tb->flags |= TBF_EXCEPTION_ERRCODE;
275 tb->error_code = regs->error_code;
276 }
277 if ( TI_GET_IF(ti) )
278 d->shared_info->vcpu_data[0].evtchn_upcall_mask = 1;
279 return 0;
281 xen_fault:
283 if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
284 {
285 DPRINTK("Trap %d: %08x -> %08lx\n", trapnr, regs->eip, fixup);
286 regs->eip = fixup;
287 return 0;
288 }
290 DEBUGGER_trap_fatal(trapnr, regs);
292 show_registers(regs);
293 panic("CPU%d FATAL TRAP: vector = %d (%s)\n"
294 "[error_code=%04x]\n",
295 smp_processor_id(), trapnr, str, regs->error_code);
296 return 0;
297 }
299 #define DO_ERROR_NOCODE(trapnr, str, name) \
300 asmlinkage int do_##name(struct xen_regs *regs) \
301 { \
302 return do_trap(trapnr, str, regs, 0); \
303 }
305 #define DO_ERROR(trapnr, str, name) \
306 asmlinkage int do_##name(struct xen_regs *regs) \
307 { \
308 return do_trap(trapnr, str, regs, 1); \
309 }
311 DO_ERROR_NOCODE( 0, "divide error", divide_error)
312 DO_ERROR_NOCODE( 4, "overflow", overflow)
313 DO_ERROR_NOCODE( 5, "bounds", bounds)
314 DO_ERROR_NOCODE( 6, "invalid operand", invalid_op)
315 DO_ERROR_NOCODE( 9, "coprocessor segment overrun", coprocessor_segment_overrun)
316 DO_ERROR(10, "invalid TSS", invalid_TSS)
317 DO_ERROR(11, "segment not present", segment_not_present)
318 DO_ERROR(12, "stack segment", stack_segment)
319 DO_ERROR_NOCODE(16, "fpu error", coprocessor_error)
320 DO_ERROR(17, "alignment check", alignment_check)
321 DO_ERROR_NOCODE(19, "simd error", simd_coprocessor_error)
323 asmlinkage int do_int3(struct xen_regs *regs)
324 {
325 struct domain *d = current;
326 struct trap_bounce *tb = &d->thread.trap_bounce;
327 trap_info_t *ti;
329 DEBUGGER_trap_entry(TRAP_int3, regs);
331 if ( !GUEST_FAULT(regs) )
332 {
333 DEBUGGER_trap_fatal(TRAP_int3, regs);
334 show_registers(regs);
335 panic("CPU%d FATAL TRAP: vector = 3 (Int3)\n", smp_processor_id());
336 }
338 ti = current->thread.traps + 3;
339 tb->flags = TBF_EXCEPTION;
340 tb->cs = ti->cs;
341 tb->eip = ti->address;
342 if ( TI_GET_IF(ti) )
343 d->shared_info->vcpu_data[0].evtchn_upcall_mask = 1;
345 return 0;
346 }
348 asmlinkage void do_double_fault(void)
349 {
350 struct tss_struct *tss = &doublefault_tss;
351 unsigned int cpu = ((tss->back_link>>3)-__FIRST_TSS_ENTRY)>>1;
353 /* Disable the NMI watchdog. It's useless now. */
354 watchdog_on = 0;
356 /* Find information saved during fault and dump it to the console. */
357 tss = &init_tss[cpu];
358 printk("CPU: %d\nEIP: %04x:[<%08x>] \nEFLAGS: %08x\n",
359 cpu, tss->cs, tss->eip, tss->eflags);
360 printk("CR3: %08x\n", tss->__cr3);
361 printk("eax: %08x ebx: %08x ecx: %08x edx: %08x\n",
362 tss->eax, tss->ebx, tss->ecx, tss->edx);
363 printk("esi: %08x edi: %08x ebp: %08x esp: %08x\n",
364 tss->esi, tss->edi, tss->ebp, tss->esp);
365 printk("ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n",
366 tss->ds, tss->es, tss->fs, tss->gs, tss->ss);
367 printk("************************************\n");
368 printk("CPU%d DOUBLE FAULT -- system shutdown\n", cpu);
369 printk("System needs manual reset.\n");
370 printk("************************************\n");
372 /* Lock up the console to prevent spurious output from other CPUs. */
373 console_force_lock();
375 /* Wait for manual reset. */
376 for ( ; ; )
377 __asm__ __volatile__ ( "hlt" );
378 }
380 asmlinkage void do_machine_check(struct xen_regs *regs)
381 {
382 fatal_trap(TRAP_machine_check, regs);
383 }
385 void propagate_page_fault(unsigned long addr, u16 error_code)
386 {
387 trap_info_t *ti;
388 struct domain *d = current;
389 struct trap_bounce *tb = &d->thread.trap_bounce;
391 ti = d->thread.traps + 14;
392 tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE | TBF_EXCEPTION_CR2;
393 tb->cr2 = addr;
394 tb->error_code = error_code;
395 tb->cs = ti->cs;
396 tb->eip = ti->address;
397 if ( TI_GET_IF(ti) )
398 d->shared_info->vcpu_data[0].evtchn_upcall_mask = 1;
399 }
401 asmlinkage int do_page_fault(struct xen_regs *regs)
402 {
403 unsigned long off, addr, fixup;
404 struct domain *d = current;
405 extern int map_ldt_shadow_page(unsigned int);
406 int cpu = d->processor;
408 __asm__ __volatile__ ("movl %%cr2,%0" : "=r" (addr) : );
410 DEBUGGER_trap_entry(TRAP_page_fault, regs);
412 perfc_incrc(page_faults);
414 if ( likely(VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) )
415 {
416 if ( unlikely(ptwr_info[cpu].ptinfo[PTWR_PT_ACTIVE].l1va) &&
417 unlikely((addr >> L2_PAGETABLE_SHIFT) ==
418 ptwr_info[cpu].ptinfo[PTWR_PT_ACTIVE].l2_idx) )
419 {
420 ptwr_flush(PTWR_PT_ACTIVE);
421 return EXCRET_fault_fixed;
422 }
424 if ( (addr < PAGE_OFFSET) &&
425 !VM86_MODE(regs) && ((regs->cs & 3) == 1) && /* ring 1 */
426 ((regs->error_code & 3) == 3) && /* write-protection fault */
427 ptwr_do_page_fault(addr) )
428 {
429 if ( unlikely(d->mm.shadow_mode) )
430 (void)shadow_fault(addr, regs->error_code);
431 return EXCRET_fault_fixed;
432 }
433 }
435 if ( unlikely(d->mm.shadow_mode) &&
436 (addr < PAGE_OFFSET) && shadow_fault(addr, regs->error_code) )
437 return EXCRET_fault_fixed;
439 if ( unlikely(addr >= LDT_VIRT_START) &&
440 (addr < (LDT_VIRT_START + (d->mm.ldt_ents*LDT_ENTRY_SIZE))) )
441 {
442 /*
443 * Copy a mapping from the guest's LDT, if it is valid. Otherwise we
444 * send the fault up to the guest OS to be handled.
445 */
446 off = addr - LDT_VIRT_START;
447 addr = d->mm.ldt_base + off;
448 if ( likely(map_ldt_shadow_page(off >> PAGE_SHIFT)) )
449 return EXCRET_fault_fixed; /* successfully copied the mapping */
450 }
452 if ( !GUEST_FAULT(regs) )
453 goto xen_fault;
455 propagate_page_fault(addr, regs->error_code);
456 return 0;
458 xen_fault:
460 if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
461 {
462 perfc_incrc(copy_user_faults);
463 if ( !d->mm.shadow_mode )
464 DPRINTK("Page fault: %08x -> %08lx\n", regs->eip, fixup);
465 regs->eip = fixup;
466 return 0;
467 }
469 DEBUGGER_trap_fatal(TRAP_page_fault, regs);
471 if ( addr >= PAGE_OFFSET )
472 {
473 unsigned long page;
474 page = l2_pgentry_val(idle_pg_table[addr >> L2_PAGETABLE_SHIFT]);
475 printk("*pde = %08lx\n", page);
476 if ( page & _PAGE_PRESENT )
477 {
478 page &= PAGE_MASK;
479 page = ((unsigned long *) __va(page))[(addr&0x3ff000)>>PAGE_SHIFT];
480 printk(" *pte = %08lx\n", page);
481 }
482 #ifdef MEMORY_GUARD
483 if ( !(regs->error_code & 1) )
484 printk(" -- POSSIBLY AN ACCESS TO FREED MEMORY? --\n");
485 #endif
486 }
488 show_registers(regs);
489 panic("CPU%d FATAL PAGE FAULT\n"
490 "[error_code=%04x]\n"
491 "Faulting linear address might be %08lx\n",
492 smp_processor_id(), regs->error_code, addr);
493 return 0;
494 }
496 asmlinkage int do_general_protection(struct xen_regs *regs)
497 {
498 struct domain *d = current;
499 struct trap_bounce *tb = &d->thread.trap_bounce;
500 trap_info_t *ti;
501 unsigned long fixup;
503 DEBUGGER_trap_entry(TRAP_gp_fault, regs);
505 if ( regs->error_code & 1 )
506 goto hardware_gp;
508 if ( !GUEST_FAULT(regs) )
509 goto gp_in_kernel;
511 /*
512 * Cunning trick to allow arbitrary "INT n" handling.
513 *
514 * We set DPL == 0 on all vectors in the IDT. This prevents any INT <n>
515 * instruction from trapping to the appropriate vector, when that might not
516 * be expected by Xen or the guest OS. For example, that entry might be for
517 * a fault handler (unlike traps, faults don't increment EIP), or might
518 * expect an error code on the stack (which a software trap never
519 * provides), or might be a hardware interrupt handler that doesn't like
520 * being called spuriously.
521 *
522 * Instead, a GPF occurs with the faulting IDT vector in the error code.
523 * Bit 1 is set to indicate that an IDT entry caused the fault. Bit 0 is
524 * clear to indicate that it's a software fault, not hardware.
525 *
526 * NOTE: Vectors 3 and 4 are dealt with from their own handler. This is
527 * okay because they can only be triggered by an explicit DPL-checked
528 * instruction. The DPL specified by the guest OS for these vectors is NOT
529 * CHECKED!!
530 */
531 if ( (regs->error_code & 3) == 2 )
532 {
533 /* This fault must be due to <INT n> instruction. */
534 ti = current->thread.traps + (regs->error_code>>3);
535 if ( TI_GET_DPL(ti) >= (VM86_MODE(regs) ? 3 : (regs->cs & 3)) )
536 {
537 tb->flags = TBF_EXCEPTION;
538 regs->eip += 2;
539 goto finish_propagation;
540 }
541 }
543 #if defined(__i386__)
544 if ( VM_ASSIST(d, VMASST_TYPE_4gb_segments) &&
545 (regs->error_code == 0) &&
546 gpf_emulate_4gb(regs) )
547 return 0;
548 #endif
550 /* Pass on GPF as is. */
551 ti = current->thread.traps + 13;
552 tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE;
553 tb->error_code = regs->error_code;
554 finish_propagation:
555 tb->cs = ti->cs;
556 tb->eip = ti->address;
557 if ( TI_GET_IF(ti) )
558 d->shared_info->vcpu_data[0].evtchn_upcall_mask = 1;
559 return 0;
561 gp_in_kernel:
563 if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
564 {
565 DPRINTK("GPF (%04x): %08x -> %08lx\n",
566 regs->error_code, regs->eip, fixup);
567 regs->eip = fixup;
568 return 0;
569 }
571 DEBUGGER_trap_fatal(TRAP_gp_fault, regs);
573 hardware_gp:
574 show_registers(regs);
575 panic("CPU%d GENERAL PROTECTION FAULT\n[error_code=%04x]\n",
576 smp_processor_id(), regs->error_code);
577 return 0;
578 }
580 asmlinkage void mem_parity_error(struct xen_regs *regs)
581 {
582 console_force_unlock();
583 printk("\n\nNMI - MEMORY ERROR\n");
584 fatal_trap(TRAP_nmi, regs);
585 }
587 asmlinkage void io_check_error(struct xen_regs *regs)
588 {
589 console_force_unlock();
591 printk("\n\nNMI - I/O ERROR\n");
592 fatal_trap(TRAP_nmi, regs);
593 }
595 static void unknown_nmi_error(unsigned char reason)
596 {
597 printk("Uhhuh. NMI received for unknown reason %02x.\n", reason);
598 printk("Dazed and confused, but trying to continue\n");
599 printk("Do you have a strange power saving mode enabled?\n");
600 }
602 asmlinkage void do_nmi(struct xen_regs *regs, unsigned long reason)
603 {
604 ++nmi_count(smp_processor_id());
606 #if CONFIG_X86_LOCAL_APIC
607 if ( nmi_watchdog )
608 nmi_watchdog_tick(regs);
609 else
610 #endif
611 unknown_nmi_error((unsigned char)(reason&0xff));
612 }
614 unsigned long nmi_softirq_reason;
615 static void nmi_softirq(void)
616 {
617 if ( dom0 == NULL )
618 return;
620 if ( test_and_clear_bit(0, &nmi_softirq_reason) )
621 send_guest_virq(dom0, VIRQ_PARITY_ERR);
623 if ( test_and_clear_bit(1, &nmi_softirq_reason) )
624 send_guest_virq(dom0, VIRQ_IO_ERR);
625 }
627 asmlinkage int math_state_restore(struct xen_regs *regs)
628 {
629 /* Prevent recursion. */
630 clts();
632 if ( !test_bit(DF_USEDFPU, &current->flags) )
633 {
634 if ( test_bit(DF_DONEFPUINIT, &current->flags) )
635 restore_fpu(current);
636 else
637 init_fpu();
638 set_bit(DF_USEDFPU, &current->flags); /* so we fnsave on switch_to() */
639 }
641 if ( test_and_clear_bit(DF_GUEST_STTS, &current->flags) )
642 {
643 struct trap_bounce *tb = &current->thread.trap_bounce;
644 tb->flags = TBF_EXCEPTION;
645 tb->cs = current->thread.traps[7].cs;
646 tb->eip = current->thread.traps[7].address;
647 }
649 return EXCRET_fault_fixed;
650 }
652 asmlinkage int do_debug(struct xen_regs *regs)
653 {
654 unsigned int condition;
655 struct domain *d = current;
656 struct trap_bounce *tb = &d->thread.trap_bounce;
658 __asm__ __volatile__("movl %%db6,%0" : "=r" (condition));
660 /* Mask out spurious debug traps due to lazy DR7 setting */
661 if ( (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) &&
662 (d->thread.debugreg[7] == 0) )
663 {
664 __asm__("movl %0,%%db7" : : "r" (0));
665 goto out;
666 }
668 DEBUGGER_trap_entry(TRAP_debug, regs);
670 if ( !GUEST_FAULT(regs) )
671 {
672 /* Clear TF just for absolute sanity. */
673 regs->eflags &= ~EF_TF;
674 /*
675 * We ignore watchpoints when they trigger within Xen. This may happen
676 * when a buffer is passed to us which previously had a watchpoint set
677 * on it. No need to bump EIP; the only faulting trap is an instruction
678 * breakpoint, which can't happen to us.
679 */
680 goto out;
681 }
683 /* Save debug status register where guest OS can peek at it */
684 d->thread.debugreg[6] = condition;
686 tb->flags = TBF_EXCEPTION;
687 tb->cs = d->thread.traps[1].cs;
688 tb->eip = d->thread.traps[1].address;
690 out:
691 return EXCRET_not_a_fault;
692 }
694 asmlinkage int do_spurious_interrupt_bug(struct xen_regs *regs)
695 {
696 return EXCRET_not_a_fault;
697 }
699 BUILD_SMP_INTERRUPT(deferred_nmi, TRAP_deferred_nmi)
700 asmlinkage void smp_deferred_nmi(struct xen_regs regs)
701 {
702 ack_APIC_irq();
703 do_nmi(&regs, 0);
704 }
706 #define _set_gate(gate_addr,type,dpl,addr) \
707 do { \
708 int __d0, __d1; \
709 __asm__ __volatile__ ("movw %%dx,%%ax\n\t" \
710 "movw %4,%%dx\n\t" \
711 "movl %%eax,%0\n\t" \
712 "movl %%edx,%1" \
713 :"=m" (*((long *) (gate_addr))), \
714 "=m" (*(1+(long *) (gate_addr))), "=&a" (__d0), "=&d" (__d1) \
715 :"i" ((short) (0x8000+(dpl<<13)+(type<<8))), \
716 "3" ((char *) (addr)),"2" (__HYPERVISOR_CS << 16)); \
717 } while (0)
719 void set_intr_gate(unsigned int n, void *addr)
720 {
721 _set_gate(idt_table+n,14,0,addr);
722 }
724 static void __init set_system_gate(unsigned int n, void *addr)
725 {
726 _set_gate(idt_table+n,14,3,addr);
727 }
729 static void set_task_gate(unsigned int n, unsigned int sel)
730 {
731 idt_table[n].a = sel << 16;
732 idt_table[n].b = 0x8500;
733 }
735 #define _set_seg_desc(gate_addr,type,dpl,base,limit) {\
736 *((gate_addr)+1) = ((base) & 0xff000000) | \
737 (((base) & 0x00ff0000)>>16) | \
738 ((limit) & 0xf0000) | \
739 ((dpl)<<13) | \
740 (0x00408000) | \
741 ((type)<<8); \
742 *(gate_addr) = (((base) & 0x0000ffff)<<16) | \
743 ((limit) & 0x0ffff); }
745 #define _set_tssldt_desc(n,addr,limit,type) \
746 __asm__ __volatile__ ("movw %w3,0(%2)\n\t" \
747 "movw %%ax,2(%2)\n\t" \
748 "rorl $16,%%eax\n\t" \
749 "movb %%al,4(%2)\n\t" \
750 "movb %4,5(%2)\n\t" \
751 "movb $0,6(%2)\n\t" \
752 "movb %%ah,7(%2)\n\t" \
753 "rorl $16,%%eax" \
754 : "=m"(*(n)) : "a" (addr), "r"(n), "ir"(limit), "i"(type))
756 void set_tss_desc(unsigned int n, void *addr)
757 {
758 _set_tssldt_desc(
759 gdt_table + __TSS(n),
760 (int)addr,
761 offsetof(struct tss_struct, __cacheline_filler) - 1,
762 0x89);
763 }
765 void __init trap_init(void)
766 {
767 /*
768 * Make a separate task for double faults. This will get us debug output if
769 * we blow the kernel stack.
770 */
771 struct tss_struct *tss = &doublefault_tss;
772 memset(tss, 0, sizeof(*tss));
773 tss->ds = __HYPERVISOR_DS;
774 tss->es = __HYPERVISOR_DS;
775 tss->ss = __HYPERVISOR_DS;
776 tss->esp = (unsigned long)
777 &doublefault_stack[DOUBLEFAULT_STACK_SIZE];
778 tss->__cr3 = __pa(idle_pg_table);
779 tss->cs = __HYPERVISOR_CS;
780 tss->eip = (unsigned long)do_double_fault;
781 tss->eflags = 2;
782 tss->bitmap = IOBMP_INVALID_OFFSET;
783 _set_tssldt_desc(gdt_table+__DOUBLEFAULT_TSS_ENTRY,
784 (int)tss, 235, 0x89);
786 /*
787 * Note that interrupt gates are always used, rather than trap gates. We
788 * must have interrupts disabled until DS/ES/FS/GS are saved because the
789 * first activation must have the "bad" value(s) for these registers and
790 * we may lose them if another activation is installed before they are
791 * saved. The page-fault handler also needs interrupts disabled until %cr2
792 * has been read and saved on the stack.
793 */
794 set_intr_gate(TRAP_divide_error,&divide_error);
795 set_intr_gate(TRAP_debug,&debug);
796 set_intr_gate(TRAP_nmi,&nmi);
797 set_system_gate(TRAP_int3,&int3); /* usable from all privileges */
798 set_system_gate(TRAP_overflow,&overflow); /* usable from all privileges */
799 set_intr_gate(TRAP_bounds,&bounds);
800 set_intr_gate(TRAP_invalid_op,&invalid_op);
801 set_intr_gate(TRAP_no_device,&device_not_available);
802 set_task_gate(TRAP_double_fault,__DOUBLEFAULT_TSS_ENTRY<<3);
803 set_intr_gate(TRAP_copro_seg,&coprocessor_segment_overrun);
804 set_intr_gate(TRAP_invalid_tss,&invalid_TSS);
805 set_intr_gate(TRAP_no_segment,&segment_not_present);
806 set_intr_gate(TRAP_stack_error,&stack_segment);
807 set_intr_gate(TRAP_gp_fault,&general_protection);
808 set_intr_gate(TRAP_page_fault,&page_fault);
809 set_intr_gate(TRAP_spurious_int,&spurious_interrupt_bug);
810 set_intr_gate(TRAP_copro_error,&coprocessor_error);
811 set_intr_gate(TRAP_alignment_check,&alignment_check);
812 set_intr_gate(TRAP_machine_check,&machine_check);
813 set_intr_gate(TRAP_simd_error,&simd_coprocessor_error);
814 set_intr_gate(TRAP_deferred_nmi,&deferred_nmi);
816 /* Only ring 1 can access Xen services. */
817 _set_gate(idt_table+HYPERCALL_VECTOR,14,1,&hypercall);
819 /* CPU0 uses the master IDT. */
820 idt_tables[0] = idt_table;
822 /*
823 * Should be a barrier for any external CPU state.
824 */
825 {
826 extern void cpu_init(void);
827 cpu_init();
828 }
830 open_softirq(NMI_SOFTIRQ, nmi_softirq);
831 }
834 long do_set_trap_table(trap_info_t *traps)
835 {
836 trap_info_t cur;
837 trap_info_t *dst = current->thread.traps;
839 for ( ; ; )
840 {
841 if ( hypercall_preempt_check() )
842 return hypercall_create_continuation(
843 __HYPERVISOR_set_trap_table, 1, traps);
845 if ( copy_from_user(&cur, traps, sizeof(cur)) ) return -EFAULT;
847 if ( cur.address == 0 ) break;
849 if ( !VALID_CODESEL(cur.cs) ) return -EPERM;
851 memcpy(dst+cur.vector, &cur, sizeof(cur));
852 traps++;
853 }
855 return 0;
856 }
859 long do_set_callbacks(unsigned long event_selector,
860 unsigned long event_address,
861 unsigned long failsafe_selector,
862 unsigned long failsafe_address)
863 {
864 struct domain *d = current;
866 if ( !VALID_CODESEL(event_selector) || !VALID_CODESEL(failsafe_selector) )
867 return -EPERM;
869 d->thread.event_selector = event_selector;
870 d->thread.event_address = event_address;
871 d->thread.failsafe_selector = failsafe_selector;
872 d->thread.failsafe_address = failsafe_address;
874 return 0;
875 }
878 long set_fast_trap(struct domain *p, int idx)
879 {
880 trap_info_t *ti;
882 /* Index 0 is special: it disables fast traps. */
883 if ( idx == 0 )
884 {
885 if ( p == current )
886 CLEAR_FAST_TRAP(&p->thread);
887 SET_DEFAULT_FAST_TRAP(&p->thread);
888 return 0;
889 }
891 /*
892 * We only fast-trap vectors 0x20-0x2f, and vector 0x80.
893 * The former range is used by Windows and MS-DOS.
894 * Vector 0x80 is used by Linux and the BSD variants.
895 */
896 if ( (idx != 0x80) && ((idx < 0x20) || (idx > 0x2f)) )
897 return -1;
899 ti = p->thread.traps + idx;
901 /*
902 * We can't virtualise interrupt gates, as there's no way to get
903 * the CPU to automatically clear the events_mask variable.
904 */
905 if ( TI_GET_IF(ti) )
906 return -1;
908 if ( p == current )
909 CLEAR_FAST_TRAP(&p->thread);
911 p->thread.fast_trap_idx = idx;
912 p->thread.fast_trap_desc.a = (ti->cs << 16) | (ti->address & 0xffff);
913 p->thread.fast_trap_desc.b =
914 (ti->address & 0xffff0000) | 0x8f00 | (TI_GET_DPL(ti)&3)<<13;
916 if ( p == current )
917 SET_FAST_TRAP(&p->thread);
919 return 0;
920 }
923 long do_set_fast_trap(int idx)
924 {
925 return set_fast_trap(current, idx);
926 }
929 long do_fpu_taskswitch(void)
930 {
931 set_bit(DF_GUEST_STTS, &current->flags);
932 stts();
933 return 0;
934 }
937 long set_debugreg(struct domain *p, int reg, unsigned long value)
938 {
939 int i;
941 switch ( reg )
942 {
943 case 0:
944 if ( value > (PAGE_OFFSET-4) ) return -EPERM;
945 if ( p == current )
946 __asm__ ( "movl %0, %%db0" : : "r" (value) );
947 break;
948 case 1:
949 if ( value > (PAGE_OFFSET-4) ) return -EPERM;
950 if ( p == current )
951 __asm__ ( "movl %0, %%db1" : : "r" (value) );
952 break;
953 case 2:
954 if ( value > (PAGE_OFFSET-4) ) return -EPERM;
955 if ( p == current )
956 __asm__ ( "movl %0, %%db2" : : "r" (value) );
957 break;
958 case 3:
959 if ( value > (PAGE_OFFSET-4) ) return -EPERM;
960 if ( p == current )
961 __asm__ ( "movl %0, %%db3" : : "r" (value) );
962 break;
963 case 6:
964 /*
965 * DR6: Bits 4-11,16-31 reserved (set to 1).
966 * Bit 12 reserved (set to 0).
967 */
968 value &= 0xffffefff; /* reserved bits => 0 */
969 value |= 0xffff0ff0; /* reserved bits => 1 */
970 if ( p == current )
971 __asm__ ( "movl %0, %%db6" : : "r" (value) );
972 break;
973 case 7:
974 /*
975 * DR7: Bit 10 reserved (set to 1).
976 * Bits 11-12,14-15 reserved (set to 0).
977 * Privileged bits:
978 * GD (bit 13): must be 0.
979 * R/Wn (bits 16-17,20-21,24-25,28-29): mustn't be 10.
980 * LENn (bits 18-19,22-23,26-27,30-31): mustn't be 10.
981 */
982 /* DR7 == 0 => debugging disabled for this domain. */
983 if ( value != 0 )
984 {
985 value &= 0xffff27ff; /* reserved bits => 0 */
986 value |= 0x00000400; /* reserved bits => 1 */
987 if ( (value & (1<<13)) != 0 ) return -EPERM;
988 for ( i = 0; i < 16; i += 2 )
989 if ( ((value >> (i+16)) & 3) == 2 ) return -EPERM;
990 }
991 if ( p == current )
992 __asm__ ( "movl %0, %%db7" : : "r" (value) );
993 break;
994 default:
995 return -EINVAL;
996 }
998 p->thread.debugreg[reg] = value;
999 return 0;
1002 long do_set_debugreg(int reg, unsigned long value)
1004 return set_debugreg(current, reg, value);
1007 unsigned long do_get_debugreg(int reg)
1009 if ( (reg < 0) || (reg > 7) ) return -EINVAL;
1010 return current->thread.debugreg[reg];
1013 #else
1015 asmlinkage void fatal_trap(int trapnr, struct xen_regs *regs)
1019 #endif /* __i386__ */