ia64/xen-unstable

view xen/arch/x86/traps.c @ 3105:f7a9de9a462f

bitkeeper revision 1.1159.189.6 (41a4df56fjKgjR75gUVniMEBSnS-9Q)

Unlock biglock on hypercall preemption.
author cl349@arcadians.cl.cam.ac.uk
date Wed Nov 24 19:21:58 2004 +0000 (2004-11-24)
parents 2fae9947de6f
children 75f82adfcc90
line source
1 /******************************************************************************
2 * arch/i386/traps.c
3 *
4 * Modifications to Linux original are copyright (c) 2002-2004, K A Fraser
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
21 /*
22 * Copyright (C) 1991, 1992 Linus Torvalds
23 *
24 * Pentium III FXSR, SSE support
25 * Gareth Hughes <gareth@valinux.com>, May 2000
26 */
28 #include <xen/config.h>
29 #include <xen/init.h>
30 #include <xen/sched.h>
31 #include <xen/lib.h>
32 #include <xen/errno.h>
33 #include <xen/mm.h>
34 #include <xen/console.h>
35 #include <asm/regs.h>
36 #include <xen/delay.h>
37 #include <xen/event.h>
38 #include <xen/spinlock.h>
39 #include <xen/irq.h>
40 #include <xen/perfc.h>
41 #include <xen/softirq.h>
42 #include <asm/shadow.h>
43 #include <asm/domain_page.h>
44 #include <asm/system.h>
45 #include <asm/io.h>
46 #include <asm/atomic.h>
47 #include <asm/desc.h>
48 #include <asm/debugreg.h>
49 #include <asm/smp.h>
50 #include <asm/flushtlb.h>
51 #include <asm/uaccess.h>
52 #include <asm/i387.h>
53 #include <asm/debugger.h>
55 #if defined(__i386__)
57 #define DOUBLEFAULT_STACK_SIZE 1024
58 static struct tss_struct doublefault_tss;
59 static unsigned char doublefault_stack[DOUBLEFAULT_STACK_SIZE];
61 asmlinkage int hypercall(void);
63 /* Master table, and the one used by CPU0. */
64 struct desc_struct idt_table[256] = { {0, 0}, };
65 /* All other CPUs have their own copy. */
66 struct desc_struct *idt_tables[NR_CPUS] = { 0 };
68 asmlinkage void divide_error(void);
69 asmlinkage void debug(void);
70 asmlinkage void nmi(void);
71 asmlinkage void int3(void);
72 asmlinkage void overflow(void);
73 asmlinkage void bounds(void);
74 asmlinkage void invalid_op(void);
75 asmlinkage void device_not_available(void);
76 asmlinkage void coprocessor_segment_overrun(void);
77 asmlinkage void invalid_TSS(void);
78 asmlinkage void segment_not_present(void);
79 asmlinkage void stack_segment(void);
80 asmlinkage void general_protection(void);
81 asmlinkage void page_fault(void);
82 asmlinkage void coprocessor_error(void);
83 asmlinkage void simd_coprocessor_error(void);
84 asmlinkage void alignment_check(void);
85 asmlinkage void spurious_interrupt_bug(void);
86 asmlinkage void machine_check(void);
88 int kstack_depth_to_print = 8*20;
90 static inline int kernel_text_address(unsigned long addr)
91 {
92 if (addr >= (unsigned long) &_stext &&
93 addr <= (unsigned long) &_etext)
94 return 1;
95 return 0;
97 }
99 void show_guest_stack()
100 {
101 int i;
102 execution_context_t *ec = get_execution_context();
103 unsigned long *stack = (unsigned long *)ec->esp;
104 printk("Guest EIP is %lx\n",ec->eip);
106 for ( i = 0; i < kstack_depth_to_print; i++ )
107 {
108 if ( ((long)stack & (STACK_SIZE-1)) == 0 )
109 break;
110 if ( i && ((i % 8) == 0) )
111 printk("\n ");
112 printk("%08lx ", *stack++);
113 }
114 printk("\n");
116 }
118 void show_trace(unsigned long *esp)
119 {
120 unsigned long *stack, addr;
121 int i;
123 printk("Call Trace from ESP=%p: ", esp);
124 stack = esp;
125 i = 0;
126 while (((long) stack & (STACK_SIZE-1)) != 0) {
127 addr = *stack++;
128 if (kernel_text_address(addr)) {
129 if (i && ((i % 6) == 0))
130 printk("\n ");
131 printk("[<%08lx>] ", addr);
132 i++;
133 }
134 }
135 printk("\n");
136 }
138 void show_stack(unsigned long *esp)
139 {
140 unsigned long *stack;
141 int i;
143 printk("Stack trace from ESP=%p:\n", esp);
145 stack = esp;
146 for ( i = 0; i < kstack_depth_to_print; i++ )
147 {
148 if ( ((long)stack & (STACK_SIZE-1)) == 0 )
149 break;
150 if ( i && ((i % 8) == 0) )
151 printk("\n ");
152 if ( kernel_text_address(*stack) )
153 printk("[%08lx] ", *stack++);
154 else
155 printk("%08lx ", *stack++);
156 }
157 printk("\n");
159 show_trace( esp );
160 }
162 void show_registers(struct xen_regs *regs)
163 {
164 unsigned long esp;
165 unsigned short ss, ds, es, fs, gs;
167 if ( regs->cs & 3 )
168 {
169 esp = regs->esp;
170 ss = regs->ss & 0xffff;
171 ds = regs->ds & 0xffff;
172 es = regs->es & 0xffff;
173 fs = regs->fs & 0xffff;
174 gs = regs->gs & 0xffff;
175 }
176 else
177 {
178 esp = (unsigned long)(&regs->esp);
179 ss = __HYPERVISOR_DS;
180 ds = __HYPERVISOR_DS;
181 es = __HYPERVISOR_DS;
182 fs = __HYPERVISOR_DS;
183 gs = __HYPERVISOR_DS;
184 }
186 printk("CPU: %d\nEIP: %04x:[<%08x>] \nEFLAGS: %08x\n",
187 smp_processor_id(), 0xffff & regs->cs, regs->eip, regs->eflags);
188 printk("eax: %08x ebx: %08x ecx: %08x edx: %08x\n",
189 regs->eax, regs->ebx, regs->ecx, regs->edx);
190 printk("esi: %08x edi: %08x ebp: %08x esp: %08lx\n",
191 regs->esi, regs->edi, regs->ebp, esp);
192 printk("ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n",
193 ds, es, fs, gs, ss);
195 show_stack((unsigned long *)&regs->esp);
196 }
198 /*
199 * This is called for faults at very unexpected times (e.g., when interrupts
200 * are disabled). In such situations we can't do much that is safe. We try to
201 * print out some tracing and then we just spin.
202 */
203 asmlinkage void fatal_trap(int trapnr, struct xen_regs *regs)
204 {
205 int cpu = smp_processor_id();
206 unsigned long cr2;
207 static char *trapstr[] = {
208 "divide error", "debug", "nmi", "bkpt", "overflow", "bounds",
209 "invalid operation", "device not available", "double fault",
210 "coprocessor segment", "invalid tss", "segment not found",
211 "stack error", "general protection fault", "page fault",
212 "spurious interrupt", "coprocessor error", "alignment check",
213 "machine check", "simd error"
214 };
216 show_registers(regs);
218 if ( trapnr == TRAP_page_fault )
219 {
220 __asm__ __volatile__ ("movl %%cr2,%0" : "=r" (cr2) : );
221 printk("Faulting linear address might be %08lx\n", cr2);
222 }
224 printk("************************************\n");
225 printk("CPU%d FATAL TRAP %d (%s), ERROR_CODE %04x%s.\n",
226 cpu, trapnr, trapstr[trapnr], regs->error_code,
227 (regs->eflags & X86_EFLAGS_IF) ? "" : ", IN INTERRUPT CONTEXT");
228 printk("System shutting down -- need manual reset.\n");
229 printk("************************************\n");
231 /* Lock up the console to prevent spurious output from other CPUs. */
232 console_force_lock();
234 /* Wait for manual reset. */
235 for ( ; ; )
236 __asm__ __volatile__ ( "hlt" );
237 }
239 static inline int do_trap(int trapnr, char *str,
240 struct xen_regs *regs,
241 int use_error_code)
242 {
243 struct exec_domain *ed = current;
244 struct trap_bounce *tb = &ed->thread.trap_bounce;
245 trap_info_t *ti;
246 unsigned long fixup;
248 DEBUGGER_trap_entry(trapnr, regs);
250 if ( !(regs->cs & 3) )
251 goto xen_fault;
253 ti = current->thread.traps + trapnr;
254 tb->flags = TBF_EXCEPTION;
255 tb->cs = ti->cs;
256 tb->eip = ti->address;
257 if ( use_error_code )
258 {
259 tb->flags |= TBF_EXCEPTION_ERRCODE;
260 tb->error_code = regs->error_code;
261 }
262 if ( TI_GET_IF(ti) )
263 ed->vcpu_info->evtchn_upcall_mask = 1;
264 return 0;
266 xen_fault:
268 if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
269 {
270 DPRINTK("Trap %d: %08x -> %08lx\n", trapnr, regs->eip, fixup);
271 regs->eip = fixup;
272 return 0;
273 }
275 DEBUGGER_trap_fatal(trapnr, regs);
277 show_registers(regs);
278 panic("CPU%d FATAL TRAP: vector = %d (%s)\n"
279 "[error_code=%04x]\n",
280 smp_processor_id(), trapnr, str, regs->error_code);
281 return 0;
282 }
284 #define DO_ERROR_NOCODE(trapnr, str, name) \
285 asmlinkage int do_##name(struct xen_regs *regs) \
286 { \
287 return do_trap(trapnr, str, regs, 0); \
288 }
290 #define DO_ERROR(trapnr, str, name) \
291 asmlinkage int do_##name(struct xen_regs *regs) \
292 { \
293 return do_trap(trapnr, str, regs, 1); \
294 }
296 DO_ERROR_NOCODE( 0, "divide error", divide_error)
297 DO_ERROR_NOCODE( 4, "overflow", overflow)
298 DO_ERROR_NOCODE( 5, "bounds", bounds)
299 DO_ERROR_NOCODE( 6, "invalid operand", invalid_op)
300 DO_ERROR_NOCODE( 9, "coprocessor segment overrun", coprocessor_segment_overrun)
301 DO_ERROR(10, "invalid TSS", invalid_TSS)
302 DO_ERROR(11, "segment not present", segment_not_present)
303 DO_ERROR(12, "stack segment", stack_segment)
304 DO_ERROR_NOCODE(16, "fpu error", coprocessor_error)
305 DO_ERROR(17, "alignment check", alignment_check)
306 DO_ERROR_NOCODE(19, "simd error", simd_coprocessor_error)
308 asmlinkage int do_int3(struct xen_regs *regs)
309 {
310 struct exec_domain *ed = current;
311 struct trap_bounce *tb = &ed->thread.trap_bounce;
312 trap_info_t *ti;
314 DEBUGGER_trap_entry(TRAP_int3, regs);
316 if ( unlikely((regs->cs & 3) == 0) )
317 {
318 DEBUGGER_trap_fatal(TRAP_int3, regs);
319 show_registers(regs);
320 panic("CPU%d FATAL TRAP: vector = 3 (Int3)\n", smp_processor_id());
321 }
323 ti = current->thread.traps + 3;
324 tb->flags = TBF_EXCEPTION;
325 tb->cs = ti->cs;
326 tb->eip = ti->address;
327 if ( TI_GET_IF(ti) )
328 ed->vcpu_info->evtchn_upcall_mask = 1;
330 return 0;
331 }
333 asmlinkage void do_double_fault(void)
334 {
335 struct tss_struct *tss = &doublefault_tss;
336 unsigned int cpu = ((tss->back_link>>3)-__FIRST_TSS_ENTRY)>>1;
338 /* Disable the NMI watchdog. It's useless now. */
339 watchdog_on = 0;
341 /* Find information saved during fault and dump it to the console. */
342 tss = &init_tss[cpu];
343 printk("CPU: %d\nEIP: %04x:[<%08x>] \nEFLAGS: %08x\n",
344 cpu, tss->cs, tss->eip, tss->eflags);
345 printk("CR3: %08x\n", tss->__cr3);
346 printk("eax: %08x ebx: %08x ecx: %08x edx: %08x\n",
347 tss->eax, tss->ebx, tss->ecx, tss->edx);
348 printk("esi: %08x edi: %08x ebp: %08x esp: %08x\n",
349 tss->esi, tss->edi, tss->ebp, tss->esp);
350 printk("ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n",
351 tss->ds, tss->es, tss->fs, tss->gs, tss->ss);
352 printk("************************************\n");
353 printk("CPU%d DOUBLE FAULT -- system shutdown\n", cpu);
354 printk("System needs manual reset.\n");
355 printk("************************************\n");
357 /* Lock up the console to prevent spurious output from other CPUs. */
358 console_force_lock();
360 /* Wait for manual reset. */
361 for ( ; ; )
362 __asm__ __volatile__ ( "hlt" );
363 }
365 asmlinkage void do_machine_check(struct xen_regs *regs)
366 {
367 fatal_trap(TRAP_machine_check, regs);
368 }
370 asmlinkage int do_page_fault(struct xen_regs *regs)
371 {
372 trap_info_t *ti;
373 unsigned long off, addr, fixup;
374 struct exec_domain *ed = current;
375 struct domain *d = ed->domain;
376 extern int map_ldt_shadow_page(unsigned int);
377 struct trap_bounce *tb = &ed->thread.trap_bounce;
378 int cpu = ed->processor;
379 int ret;
381 __asm__ __volatile__ ("movl %%cr2,%0" : "=r" (addr) : );
383 DEBUGGER_trap_entry(TRAP_page_fault, regs);
385 perfc_incrc(page_faults);
387 if ( likely(VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) )
388 {
389 if ( unlikely(ptwr_info[cpu].ptinfo[PTWR_PT_ACTIVE].l1va) &&
390 unlikely((addr >> L2_PAGETABLE_SHIFT) ==
391 ptwr_info[cpu].ptinfo[PTWR_PT_ACTIVE].l2_idx) )
392 {
393 LOCK_BIGLOCK(d);
394 ptwr_flush(PTWR_PT_ACTIVE);
395 UNLOCK_BIGLOCK(d);
396 return EXCRET_fault_fixed;
397 }
399 if ( (addr < PAGE_OFFSET) &&
400 ((regs->error_code & 3) == 3) && /* write-protection fault */
401 ptwr_do_page_fault(addr) )
402 {
403 if ( unlikely(ed->mm.shadow_mode) )
404 (void)shadow_fault(addr, regs->error_code);
405 return EXCRET_fault_fixed;
406 }
407 }
409 if ( unlikely(ed->mm.shadow_mode) &&
410 (addr < PAGE_OFFSET) && shadow_fault(addr, regs->error_code) )
411 return EXCRET_fault_fixed;
413 if ( unlikely(addr >= LDT_VIRT_START(ed)) &&
414 (addr < (LDT_VIRT_START(ed) + (ed->mm.ldt_ents*LDT_ENTRY_SIZE))) )
415 {
416 /*
417 * Copy a mapping from the guest's LDT, if it is valid. Otherwise we
418 * send the fault up to the guest OS to be handled.
419 */
420 LOCK_BIGLOCK(d);
421 off = addr - LDT_VIRT_START(ed);
422 addr = ed->mm.ldt_base + off;
423 ret = map_ldt_shadow_page(off >> PAGE_SHIFT);
424 UNLOCK_BIGLOCK(d);
425 if ( likely(ret) )
426 return EXCRET_fault_fixed; /* successfully copied the mapping */
427 }
429 if ( unlikely(!(regs->cs & 3)) )
430 goto xen_fault;
432 ti = ed->thread.traps + 14;
433 tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE | TBF_EXCEPTION_CR2;
434 tb->cr2 = addr;
435 tb->error_code = regs->error_code;
436 tb->cs = ti->cs;
437 tb->eip = ti->address;
438 if ( TI_GET_IF(ti) )
439 ed->vcpu_info->evtchn_upcall_mask = 1;
440 return 0;
442 xen_fault:
444 if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
445 {
446 perfc_incrc(copy_user_faults);
447 if ( !ed->mm.shadow_mode )
448 DPRINTK("Page fault: %08x -> %08lx\n", regs->eip, fixup);
449 regs->eip = fixup;
450 return 0;
451 }
453 DEBUGGER_trap_fatal(TRAP_page_fault, regs);
455 if ( addr >= PAGE_OFFSET )
456 {
457 unsigned long page;
458 page = l2_pgentry_val(idle_pg_table[addr >> L2_PAGETABLE_SHIFT]);
459 printk("*pde = %08lx\n", page);
460 if ( page & _PAGE_PRESENT )
461 {
462 page &= PAGE_MASK;
463 page = ((unsigned long *) __va(page))[(addr&0x3ff000)>>PAGE_SHIFT];
464 printk(" *pte = %08lx\n", page);
465 }
466 #ifdef MEMORY_GUARD
467 if ( !(regs->error_code & 1) )
468 printk(" -- POSSIBLY AN ACCESS TO FREED MEMORY? --\n");
469 #endif
470 }
472 show_registers(regs);
473 panic("CPU%d FATAL PAGE FAULT\n"
474 "[error_code=%04x]\n"
475 "Faulting linear address might be %08lx\n",
476 smp_processor_id(), regs->error_code, addr);
477 return 0;
478 }
480 asmlinkage int do_general_protection(struct xen_regs *regs)
481 {
482 struct exec_domain *ed = current;
483 struct domain *d = ed->domain;
484 struct trap_bounce *tb = &ed->thread.trap_bounce;
485 trap_info_t *ti;
486 unsigned long fixup;
488 DEBUGGER_trap_entry(TRAP_gp_fault, regs);
490 /* Badness if error in ring 0, or result of an interrupt. */
491 if ( !(regs->cs & 3) || (regs->error_code & 1) )
492 goto gp_in_kernel;
494 /*
495 * Cunning trick to allow arbitrary "INT n" handling.
496 *
497 * We set DPL == 0 on all vectors in the IDT. This prevents any INT <n>
498 * instruction from trapping to the appropriate vector, when that might not
499 * be expected by Xen or the guest OS. For example, that entry might be for
500 * a fault handler (unlike traps, faults don't increment EIP), or might
501 * expect an error code on the stack (which a software trap never
502 * provides), or might be a hardware interrupt handler that doesn't like
503 * being called spuriously.
504 *
505 * Instead, a GPF occurs with the faulting IDT vector in the error code.
506 * Bit 1 is set to indicate that an IDT entry caused the fault. Bit 0 is
507 * clear to indicate that it's a software fault, not hardware.
508 *
509 * NOTE: Vectors 3 and 4 are dealt with from their own handler. This is
510 * okay because they can only be triggered by an explicit DPL-checked
511 * instruction. The DPL specified by the guest OS for these vectors is NOT
512 * CHECKED!!
513 */
514 if ( (regs->error_code & 3) == 2 )
515 {
516 /* This fault must be due to <INT n> instruction. */
517 ti = current->thread.traps + (regs->error_code>>3);
518 if ( TI_GET_DPL(ti) >= (regs->cs & 3) )
519 {
520 tb->flags = TBF_EXCEPTION;
521 regs->eip += 2;
522 goto finish_propagation;
523 }
524 }
526 #if defined(__i386__)
527 if ( VM_ASSIST(d, VMASST_TYPE_4gb_segments) &&
528 (regs->error_code == 0) &&
529 gpf_emulate_4gb(regs) )
530 return 0;
531 #endif
533 /* Pass on GPF as is. */
534 ti = current->thread.traps + 13;
535 tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE;
536 tb->error_code = regs->error_code;
537 finish_propagation:
538 tb->cs = ti->cs;
539 tb->eip = ti->address;
540 if ( TI_GET_IF(ti) )
541 ed->vcpu_info->evtchn_upcall_mask = 1;
542 return 0;
544 gp_in_kernel:
546 if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
547 {
548 DPRINTK("GPF (%04x): %08x -> %08lx\n",
549 regs->error_code, regs->eip, fixup);
550 regs->eip = fixup;
551 return 0;
552 }
554 DEBUGGER_trap_fatal(TRAP_gp_fault, regs);
556 show_registers(regs);
557 panic("CPU%d GENERAL PROTECTION FAULT\n[error_code=%04x]\n",
558 smp_processor_id(), regs->error_code);
559 return 0;
560 }
562 asmlinkage void mem_parity_error(struct xen_regs *regs)
563 {
564 console_force_unlock();
565 printk("\n\nNMI - MEMORY ERROR\n");
566 fatal_trap(TRAP_nmi, regs);
567 }
569 asmlinkage void io_check_error(struct xen_regs *regs)
570 {
571 console_force_unlock();
573 printk("\n\nNMI - I/O ERROR\n");
574 fatal_trap(TRAP_nmi, regs);
575 }
577 static void unknown_nmi_error(unsigned char reason, struct xen_regs * regs)
578 {
579 printk("Uhhuh. NMI received for unknown reason %02x.\n", reason);
580 printk("Dazed and confused, but trying to continue\n");
581 printk("Do you have a strange power saving mode enabled?\n");
582 }
584 asmlinkage void do_nmi(struct xen_regs * regs, unsigned long reason)
585 {
586 ++nmi_count(smp_processor_id());
588 #if CONFIG_X86_LOCAL_APIC
589 if ( nmi_watchdog )
590 nmi_watchdog_tick(regs);
591 else
592 #endif
593 unknown_nmi_error((unsigned char)(reason&0xff), regs);
594 }
596 unsigned long nmi_softirq_reason;
597 static void nmi_softirq(void)
598 {
599 if ( dom0 == NULL )
600 return;
602 if ( test_and_clear_bit(0, &nmi_softirq_reason) )
603 send_guest_virq(dom0->exec_domain[0], VIRQ_PARITY_ERR);
605 if ( test_and_clear_bit(1, &nmi_softirq_reason) )
606 send_guest_virq(dom0->exec_domain[0], VIRQ_IO_ERR);
607 }
609 asmlinkage int math_state_restore(struct xen_regs *regs)
610 {
611 /* Prevent recursion. */
612 clts();
614 if ( !test_bit(EDF_USEDFPU, &current->ed_flags) )
615 {
616 if ( test_bit(EDF_DONEFPUINIT, &current->ed_flags) )
617 restore_fpu(current);
618 else
619 init_fpu();
620 set_bit(EDF_USEDFPU, &current->ed_flags); /* so we fnsave on switch_to() */
621 }
623 if ( test_and_clear_bit(EDF_GUEST_STTS, &current->ed_flags) )
624 {
625 struct trap_bounce *tb = &current->thread.trap_bounce;
626 tb->flags = TBF_EXCEPTION;
627 tb->cs = current->thread.traps[7].cs;
628 tb->eip = current->thread.traps[7].address;
629 }
631 return EXCRET_fault_fixed;
632 }
634 asmlinkage int do_debug(struct xen_regs *regs)
635 {
636 unsigned int condition;
637 struct exec_domain *d = current;
638 struct trap_bounce *tb = &d->thread.trap_bounce;
640 DEBUGGER_trap_entry(TRAP_debug, regs);
642 __asm__ __volatile__("movl %%db6,%0" : "=r" (condition));
644 /* Mask out spurious debug traps due to lazy DR7 setting */
645 if ( (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) &&
646 (d->thread.debugreg[7] == 0) )
647 {
648 __asm__("movl %0,%%db7" : : "r" (0));
649 goto out;
650 }
652 if ( (regs->cs & 3) == 0 )
653 {
654 /* Clear TF just for absolute sanity. */
655 regs->eflags &= ~EF_TF;
656 /*
657 * We ignore watchpoints when they trigger within Xen. This may happen
658 * when a buffer is passed to us which previously had a watchpoint set
659 * on it. No need to bump EIP; the only faulting trap is an instruction
660 * breakpoint, which can't happen to us.
661 */
662 goto out;
663 }
665 /* Save debug status register where guest OS can peek at it */
666 d->thread.debugreg[6] = condition;
668 tb->flags = TBF_EXCEPTION;
669 tb->cs = d->thread.traps[1].cs;
670 tb->eip = d->thread.traps[1].address;
672 out:
673 return EXCRET_not_a_fault;
674 }
676 asmlinkage int do_spurious_interrupt_bug(struct xen_regs *regs)
677 {
678 return EXCRET_not_a_fault;
679 }
681 #define _set_gate(gate_addr,type,dpl,addr) \
682 do { \
683 int __d0, __d1; \
684 __asm__ __volatile__ ("movw %%dx,%%ax\n\t" \
685 "movw %4,%%dx\n\t" \
686 "movl %%eax,%0\n\t" \
687 "movl %%edx,%1" \
688 :"=m" (*((long *) (gate_addr))), \
689 "=m" (*(1+(long *) (gate_addr))), "=&a" (__d0), "=&d" (__d1) \
690 :"i" ((short) (0x8000+(dpl<<13)+(type<<8))), \
691 "3" ((char *) (addr)),"2" (__HYPERVISOR_CS << 16)); \
692 } while (0)
694 void set_intr_gate(unsigned int n, void *addr)
695 {
696 _set_gate(idt_table+n,14,0,addr);
697 }
699 static void __init set_system_gate(unsigned int n, void *addr)
700 {
701 _set_gate(idt_table+n,14,3,addr);
702 }
704 static void set_task_gate(unsigned int n, unsigned int sel)
705 {
706 idt_table[n].a = sel << 16;
707 idt_table[n].b = 0x8500;
708 }
710 #define _set_seg_desc(gate_addr,type,dpl,base,limit) {\
711 *((gate_addr)+1) = ((base) & 0xff000000) | \
712 (((base) & 0x00ff0000)>>16) | \
713 ((limit) & 0xf0000) | \
714 ((dpl)<<13) | \
715 (0x00408000) | \
716 ((type)<<8); \
717 *(gate_addr) = (((base) & 0x0000ffff)<<16) | \
718 ((limit) & 0x0ffff); }
720 #define _set_tssldt_desc(n,addr,limit,type) \
721 __asm__ __volatile__ ("movw %w3,0(%2)\n\t" \
722 "movw %%ax,2(%2)\n\t" \
723 "rorl $16,%%eax\n\t" \
724 "movb %%al,4(%2)\n\t" \
725 "movb %4,5(%2)\n\t" \
726 "movb $0,6(%2)\n\t" \
727 "movb %%ah,7(%2)\n\t" \
728 "rorl $16,%%eax" \
729 : "=m"(*(n)) : "a" (addr), "r"(n), "ir"(limit), "i"(type))
731 void set_tss_desc(unsigned int n, void *addr)
732 {
733 _set_tssldt_desc(
734 gdt_table + __TSS(n),
735 (int)addr,
736 offsetof(struct tss_struct, __cacheline_filler) - 1,
737 0x89);
738 }
740 void __init trap_init(void)
741 {
742 /*
743 * Make a separate task for double faults. This will get us debug output if
744 * we blow the kernel stack.
745 */
746 struct tss_struct *tss = &doublefault_tss;
747 memset(tss, 0, sizeof(*tss));
748 tss->ds = __HYPERVISOR_DS;
749 tss->es = __HYPERVISOR_DS;
750 tss->ss = __HYPERVISOR_DS;
751 tss->esp = (unsigned long)
752 &doublefault_stack[DOUBLEFAULT_STACK_SIZE];
753 tss->__cr3 = __pa(idle_pg_table);
754 tss->cs = __HYPERVISOR_CS;
755 tss->eip = (unsigned long)do_double_fault;
756 tss->eflags = 2;
757 tss->bitmap = IOBMP_INVALID_OFFSET;
758 _set_tssldt_desc(gdt_table+__DOUBLEFAULT_TSS_ENTRY,
759 (int)tss, 235, 0x89);
761 /*
762 * Note that interrupt gates are always used, rather than trap gates. We
763 * must have interrupts disabled until DS/ES/FS/GS are saved because the
764 * first activation must have the "bad" value(s) for these registers and
765 * we may lose them if another activation is installed before they are
766 * saved. The page-fault handler also needs interrupts disabled until %cr2
767 * has been read and saved on the stack.
768 */
769 set_intr_gate(TRAP_divide_error,&divide_error);
770 set_intr_gate(TRAP_debug,&debug);
771 set_intr_gate(TRAP_nmi,&nmi);
772 set_system_gate(TRAP_int3,&int3); /* usable from all privileges */
773 set_system_gate(TRAP_overflow,&overflow); /* usable from all privileges */
774 set_intr_gate(TRAP_bounds,&bounds);
775 set_intr_gate(TRAP_invalid_op,&invalid_op);
776 set_intr_gate(TRAP_no_device,&device_not_available);
777 set_task_gate(TRAP_double_fault,__DOUBLEFAULT_TSS_ENTRY<<3);
778 set_intr_gate(TRAP_copro_seg,&coprocessor_segment_overrun);
779 set_intr_gate(TRAP_invalid_tss,&invalid_TSS);
780 set_intr_gate(TRAP_no_segment,&segment_not_present);
781 set_intr_gate(TRAP_stack_error,&stack_segment);
782 set_intr_gate(TRAP_gp_fault,&general_protection);
783 set_intr_gate(TRAP_page_fault,&page_fault);
784 set_intr_gate(TRAP_spurious_int,&spurious_interrupt_bug);
785 set_intr_gate(TRAP_copro_error,&coprocessor_error);
786 set_intr_gate(TRAP_alignment_check,&alignment_check);
787 set_intr_gate(TRAP_machine_check,&machine_check);
788 set_intr_gate(TRAP_simd_error,&simd_coprocessor_error);
790 /* Only ring 1 can access Xen services. */
791 _set_gate(idt_table+HYPERCALL_VECTOR,14,1,&hypercall);
793 /* CPU0 uses the master IDT. */
794 idt_tables[0] = idt_table;
796 /*
797 * Should be a barrier for any external CPU state.
798 */
799 {
800 extern void cpu_init(void);
801 cpu_init();
802 }
804 open_softirq(NMI_SOFTIRQ, nmi_softirq);
805 }
808 long do_set_trap_table(trap_info_t *traps)
809 {
810 trap_info_t cur;
811 trap_info_t *dst = current->thread.traps;
813 LOCK_BIGLOCK(current->domain);
815 for ( ; ; )
816 {
817 locked_hypercall_may_preempt(current->domain,
818 __HYPERVISOR_set_trap_table, 1, traps);
820 if ( copy_from_user(&cur, traps, sizeof(cur)) ) return -EFAULT;
822 if ( cur.address == 0 ) break;
824 if ( !VALID_CODESEL(cur.cs) ) return -EPERM;
826 memcpy(dst+cur.vector, &cur, sizeof(cur));
827 traps++;
828 }
830 UNLOCK_BIGLOCK(current->domain);
832 return 0;
833 }
836 long do_set_callbacks(unsigned long event_selector,
837 unsigned long event_address,
838 unsigned long failsafe_selector,
839 unsigned long failsafe_address)
840 {
841 struct exec_domain *d = current;
843 if ( !VALID_CODESEL(event_selector) || !VALID_CODESEL(failsafe_selector) )
844 return -EPERM;
846 d->thread.event_selector = event_selector;
847 d->thread.event_address = event_address;
848 d->thread.failsafe_selector = failsafe_selector;
849 d->thread.failsafe_address = failsafe_address;
851 return 0;
852 }
855 long set_fast_trap(struct exec_domain *p, int idx)
856 {
857 trap_info_t *ti;
859 /* Index 0 is special: it disables fast traps. */
860 if ( idx == 0 )
861 {
862 if ( p == current )
863 CLEAR_FAST_TRAP(&p->thread);
864 SET_DEFAULT_FAST_TRAP(&p->thread);
865 return 0;
866 }
868 /*
869 * We only fast-trap vectors 0x20-0x2f, and vector 0x80.
870 * The former range is used by Windows and MS-DOS.
871 * Vector 0x80 is used by Linux and the BSD variants.
872 */
873 if ( (idx != 0x80) && ((idx < 0x20) || (idx > 0x2f)) )
874 return -1;
876 ti = p->thread.traps + idx;
878 /*
879 * We can't virtualise interrupt gates, as there's no way to get
880 * the CPU to automatically clear the events_mask variable.
881 */
882 if ( TI_GET_IF(ti) )
883 return -1;
885 if ( p == current )
886 CLEAR_FAST_TRAP(&p->thread);
888 p->thread.fast_trap_idx = idx;
889 p->thread.fast_trap_desc.a = (ti->cs << 16) | (ti->address & 0xffff);
890 p->thread.fast_trap_desc.b =
891 (ti->address & 0xffff0000) | 0x8f00 | (TI_GET_DPL(ti)&3)<<13;
893 if ( p == current )
894 SET_FAST_TRAP(&p->thread);
896 return 0;
897 }
900 long do_set_fast_trap(int idx)
901 {
902 return set_fast_trap(current, idx);
903 }
906 long do_fpu_taskswitch(void)
907 {
908 set_bit(EDF_GUEST_STTS, &current->ed_flags);
909 stts();
910 return 0;
911 }
914 long set_debugreg(struct exec_domain *p, int reg, unsigned long value)
915 {
916 int i;
918 switch ( reg )
919 {
920 case 0:
921 if ( value > (PAGE_OFFSET-4) ) return -EPERM;
922 if ( p == current )
923 __asm__ ( "movl %0, %%db0" : : "r" (value) );
924 break;
925 case 1:
926 if ( value > (PAGE_OFFSET-4) ) return -EPERM;
927 if ( p == current )
928 __asm__ ( "movl %0, %%db1" : : "r" (value) );
929 break;
930 case 2:
931 if ( value > (PAGE_OFFSET-4) ) return -EPERM;
932 if ( p == current )
933 __asm__ ( "movl %0, %%db2" : : "r" (value) );
934 break;
935 case 3:
936 if ( value > (PAGE_OFFSET-4) ) return -EPERM;
937 if ( p == current )
938 __asm__ ( "movl %0, %%db3" : : "r" (value) );
939 break;
940 case 6:
941 /*
942 * DR6: Bits 4-11,16-31 reserved (set to 1).
943 * Bit 12 reserved (set to 0).
944 */
945 value &= 0xffffefff; /* reserved bits => 0 */
946 value |= 0xffff0ff0; /* reserved bits => 1 */
947 if ( p == current )
948 __asm__ ( "movl %0, %%db6" : : "r" (value) );
949 break;
950 case 7:
951 /*
952 * DR7: Bit 10 reserved (set to 1).
953 * Bits 11-12,14-15 reserved (set to 0).
954 * Privileged bits:
955 * GD (bit 13): must be 0.
956 * R/Wn (bits 16-17,20-21,24-25,28-29): mustn't be 10.
957 * LENn (bits 18-19,22-23,26-27,30-31): mustn't be 10.
958 */
959 /* DR7 == 0 => debugging disabled for this domain. */
960 if ( value != 0 )
961 {
962 value &= 0xffff27ff; /* reserved bits => 0 */
963 value |= 0x00000400; /* reserved bits => 1 */
964 if ( (value & (1<<13)) != 0 ) return -EPERM;
965 for ( i = 0; i < 16; i += 2 )
966 if ( ((value >> (i+16)) & 3) == 2 ) return -EPERM;
967 }
968 if ( p == current )
969 __asm__ ( "movl %0, %%db7" : : "r" (value) );
970 break;
971 default:
972 return -EINVAL;
973 }
975 p->thread.debugreg[reg] = value;
976 return 0;
977 }
979 long do_set_debugreg(int reg, unsigned long value)
980 {
981 return set_debugreg(current, reg, value);
982 }
984 unsigned long do_get_debugreg(int reg)
985 {
986 if ( (reg < 0) || (reg > 7) ) return -EINVAL;
987 return current->thread.debugreg[reg];
988 }
990 #endif /* __i386__ */