direct-io.hg

view xen/arch/x86/x86_32/traps.c @ 10445:8d75d4e0af1e

[XEN] Improve double-fault tracing -- print backtrace
on stack overflow.
Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@dhcp93.uk.xensource.com
date Mon Jun 19 11:21:40 2006 +0100 (2006-06-19)
parents e1ae7b3cb5b7
children f3561b1ee7a3
line source
2 #include <xen/config.h>
3 #include <xen/compile.h>
4 #include <xen/domain_page.h>
5 #include <xen/init.h>
6 #include <xen/sched.h>
7 #include <xen/lib.h>
8 #include <xen/console.h>
9 #include <xen/mm.h>
10 #include <xen/irq.h>
11 #include <xen/symbols.h>
12 #include <xen/reboot.h>
13 #include <xen/nmi.h>
14 #include <asm/current.h>
15 #include <asm/flushtlb.h>
16 #include <asm/hvm/hvm.h>
17 #include <asm/hvm/support.h>
19 #include <public/callback.h>
21 /* All CPUs have their own IDT to allow int80 direct trap. */
22 idt_entry_t *idt_tables[NR_CPUS] = { 0 };
24 void show_registers(struct cpu_user_regs *regs)
25 {
26 struct cpu_user_regs fault_regs = *regs;
27 unsigned long fault_crs[8];
28 char taint_str[TAINT_STRING_MAX_LEN];
29 const char *context;
31 if ( hvm_guest(current) && guest_mode(regs) )
32 {
33 context = "hvm";
34 hvm_store_cpu_guest_regs(current, &fault_regs, fault_crs);
35 }
36 else
37 {
38 context = guest_mode(regs) ? "guest" : "hypervisor";
40 if ( !guest_mode(regs) )
41 {
42 fault_regs.esp = (unsigned long)&regs->esp;
43 fault_regs.ss = read_segment_register(ss);
44 fault_regs.ds = read_segment_register(ds);
45 fault_regs.es = read_segment_register(es);
46 fault_regs.fs = read_segment_register(fs);
47 fault_regs.gs = read_segment_register(gs);
48 }
50 fault_crs[0] = read_cr0();
51 fault_crs[3] = read_cr3();
52 }
54 printk("----[ Xen-%d.%d%s %s ]----\n",
55 XEN_VERSION, XEN_SUBVERSION, XEN_EXTRAVERSION,
56 print_tainted(taint_str));
57 printk("CPU: %d\nEIP: %04x:[<%08x>]",
58 smp_processor_id(), fault_regs.cs, fault_regs.eip);
59 if ( !guest_mode(regs) )
60 print_symbol(" %s", fault_regs.eip);
61 printk("\nEFLAGS: %08x CONTEXT: %s\n", fault_regs.eflags, context);
62 printk("eax: %08x ebx: %08x ecx: %08x edx: %08x\n",
63 fault_regs.eax, fault_regs.ebx, fault_regs.ecx, fault_regs.edx);
64 printk("esi: %08x edi: %08x ebp: %08x esp: %08x\n",
65 fault_regs.esi, fault_regs.edi, fault_regs.ebp, fault_regs.esp);
66 printk("cr0: %08lx cr3: %08lx\n", fault_crs[0], fault_crs[3]);
67 printk("ds: %04x es: %04x fs: %04x gs: %04x "
68 "ss: %04x cs: %04x\n",
69 fault_regs.ds, fault_regs.es, fault_regs.fs,
70 fault_regs.gs, fault_regs.ss, fault_regs.cs);
72 show_stack(regs);
73 }
75 void show_page_walk(unsigned long addr)
76 {
77 unsigned long pfn, mfn = read_cr3() >> PAGE_SHIFT;
78 #ifdef CONFIG_X86_PAE
79 l3_pgentry_t l3e, *l3t;
80 #endif
81 l2_pgentry_t l2e, *l2t;
82 l1_pgentry_t l1e, *l1t;
84 printk("Pagetable walk from %08lx:\n", addr);
86 #ifdef CONFIG_X86_PAE
87 l3t = map_domain_page(mfn);
88 l3e = l3t[l3_table_offset(addr)];
89 mfn = l3e_get_pfn(l3e);
90 pfn = get_gpfn_from_mfn(mfn);
91 printk(" L3 = %"PRIpte" %08lx\n", l3e_get_intpte(l3e), pfn);
92 unmap_domain_page(l3t);
93 if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
94 return;
95 #endif
97 l2t = map_domain_page(mfn);
98 l2e = l2t[l2_table_offset(addr)];
99 mfn = l2e_get_pfn(l2e);
100 pfn = get_gpfn_from_mfn(mfn);
101 printk(" L2 = %"PRIpte" %08lx %s\n", l2e_get_intpte(l2e), pfn,
102 (l2e_get_flags(l2e) & _PAGE_PSE) ? "(PSE)" : "");
103 unmap_domain_page(l2t);
104 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) ||
105 (l2e_get_flags(l2e) & _PAGE_PSE) )
106 return;
108 l1t = map_domain_page(mfn);
109 l1e = l1t[l1_table_offset(addr)];
110 mfn = l1e_get_pfn(l1e);
111 pfn = get_gpfn_from_mfn(mfn);
112 printk(" L1 = %"PRIpte" %08lx\n", l1e_get_intpte(l1e), pfn);
113 unmap_domain_page(l1t);
114 }
116 #define DOUBLEFAULT_STACK_SIZE 1024
117 static struct tss_struct doublefault_tss;
118 static unsigned char doublefault_stack[DOUBLEFAULT_STACK_SIZE];
120 asmlinkage void do_double_fault(void)
121 {
122 struct tss_struct *tss = &doublefault_tss;
123 unsigned int cpu = ((tss->back_link>>3)-__FIRST_TSS_ENTRY)>>1;
125 watchdog_disable();
127 console_force_unlock();
129 /* Find information saved during fault and dump it to the console. */
130 tss = &init_tss[cpu];
131 printk("CPU: %d\nEIP: %04x:[<%08x>]",
132 cpu, tss->cs, tss->eip);
133 print_symbol(" %s\n", tss->eip);
134 printk("EFLAGS: %08x\n", tss->eflags);
135 printk("CR3: %08x\n", tss->__cr3);
136 printk("eax: %08x ebx: %08x ecx: %08x edx: %08x\n",
137 tss->eax, tss->ebx, tss->ecx, tss->edx);
138 printk("esi: %08x edi: %08x ebp: %08x esp: %08x\n",
139 tss->esi, tss->edi, tss->ebp, tss->esp);
140 printk("ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n",
141 tss->ds, tss->es, tss->fs, tss->gs, tss->ss);
142 show_stack_overflow(tss->esp);
143 printk("************************************\n");
144 printk("CPU%d DOUBLE FAULT -- system shutdown\n", cpu);
145 printk("System needs manual reset.\n");
146 printk("************************************\n");
148 /* Lock up the console to prevent spurious output from other CPUs. */
149 console_force_lock();
151 /* Wait for manual reset. */
152 machine_halt();
153 }
155 unsigned long do_iret(void)
156 {
157 struct cpu_user_regs *regs = guest_cpu_user_regs();
158 u32 eflags;
160 /* Check worst-case stack frame for overlap with Xen protected area. */
161 if ( unlikely(!access_ok(regs->esp, 40)) )
162 domain_crash_synchronous();
164 /* Pop and restore EAX (clobbered by hypercall). */
165 if ( unlikely(__copy_from_user(&regs->eax, (void __user *)regs->esp, 4)) )
166 domain_crash_synchronous();
167 regs->esp += 4;
169 /* Pop and restore CS and EIP. */
170 if ( unlikely(__copy_from_user(&regs->eip, (void __user *)regs->esp, 8)) )
171 domain_crash_synchronous();
172 regs->esp += 8;
174 /*
175 * Pop, fix up and restore EFLAGS. We fix up in a local staging area
176 * to avoid firing the BUG_ON(IOPL) check in arch_getdomaininfo_ctxt.
177 */
178 if ( unlikely(__copy_from_user(&eflags, (void __user *)regs->esp, 4)) )
179 domain_crash_synchronous();
180 regs->esp += 4;
181 regs->eflags = (eflags & ~X86_EFLAGS_IOPL) | X86_EFLAGS_IF;
183 if ( vm86_mode(regs) )
184 {
185 /* Return to VM86 mode: pop and restore ESP,SS,ES,DS,FS and GS. */
186 if ( __copy_from_user(&regs->esp, (void __user *)regs->esp, 24) )
187 domain_crash_synchronous();
188 }
189 else if ( unlikely(ring_0(regs)) )
190 {
191 domain_crash_synchronous();
192 }
193 else if ( !ring_1(regs) )
194 {
195 /* Return to ring 2/3: pop and restore ESP and SS. */
196 if ( __copy_from_user(&regs->esp, (void __user *)regs->esp, 8) )
197 domain_crash_synchronous();
198 }
200 /* No longer in NMI context. */
201 clear_bit(_VCPUF_nmi_masked, &current->vcpu_flags);
203 /* Restore upcall mask from supplied EFLAGS.IF. */
204 current->vcpu_info->evtchn_upcall_mask = !(eflags & X86_EFLAGS_IF);
206 /*
207 * The hypercall exit path will overwrite EAX with this return
208 * value.
209 */
210 return regs->eax;
211 }
213 #include <asm/asm_defns.h>
214 BUILD_SMP_INTERRUPT(deferred_nmi, TRAP_deferred_nmi)
215 fastcall void smp_deferred_nmi(struct cpu_user_regs *regs)
216 {
217 asmlinkage void do_nmi(struct cpu_user_regs *);
218 ack_APIC_irq();
219 do_nmi(regs);
220 }
222 void __init percpu_traps_init(void)
223 {
224 struct tss_struct *tss = &doublefault_tss;
225 asmlinkage int hypercall(void);
227 if ( smp_processor_id() != 0 )
228 return;
230 /* CPU0 uses the master IDT. */
231 idt_tables[0] = idt_table;
233 /* The hypercall entry vector is only accessible from ring 1. */
234 _set_gate(idt_table+HYPERCALL_VECTOR, 14, 1, &hypercall);
236 set_intr_gate(TRAP_deferred_nmi, &deferred_nmi);
238 /*
239 * Make a separate task for double faults. This will get us debug output if
240 * we blow the kernel stack.
241 */
242 memset(tss, 0, sizeof(*tss));
243 tss->ds = __HYPERVISOR_DS;
244 tss->es = __HYPERVISOR_DS;
245 tss->ss = __HYPERVISOR_DS;
246 tss->esp = (unsigned long)
247 &doublefault_stack[DOUBLEFAULT_STACK_SIZE];
248 tss->__cr3 = __pa(idle_pg_table);
249 tss->cs = __HYPERVISOR_CS;
250 tss->eip = (unsigned long)do_double_fault;
251 tss->eflags = 2;
252 tss->bitmap = IOBMP_INVALID_OFFSET;
253 _set_tssldt_desc(
254 gdt_table + __DOUBLEFAULT_TSS_ENTRY - FIRST_RESERVED_GDT_ENTRY,
255 (unsigned long)tss, 235, 9);
257 set_task_gate(TRAP_double_fault, __DOUBLEFAULT_TSS_ENTRY<<3);
258 }
260 void init_int80_direct_trap(struct vcpu *v)
261 {
262 struct trap_info *ti = &v->arch.guest_context.trap_ctxt[0x80];
264 /*
265 * We can't virtualise interrupt gates, as there's no way to get
266 * the CPU to automatically clear the events_mask variable. Also we
267 * must ensure that the CS is safe to poke into an interrupt gate.
268 *
269 * When running with supervisor_mode_kernel enabled a direct trap
270 * to the guest OS cannot be used because the INT instruction will
271 * switch to the Xen stack and we need to swap back to the guest
272 * kernel stack before passing control to the system call entry point.
273 */
274 if ( TI_GET_IF(ti) || !guest_gate_selector_okay(ti->cs) ||
275 supervisor_mode_kernel )
276 {
277 v->arch.int80_desc.a = v->arch.int80_desc.b = 0;
278 return;
279 }
281 v->arch.int80_desc.a = (ti->cs << 16) | (ti->address & 0xffff);
282 v->arch.int80_desc.b =
283 (ti->address & 0xffff0000) | 0x8f00 | ((TI_GET_DPL(ti) & 3) << 13);
285 if ( v == current )
286 set_int80_direct_trap(v);
287 }
289 #ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL
290 static void do_update_sysenter(void *info)
291 {
292 xen_callback_t *address = info;
294 wrmsr(MSR_IA32_SYSENTER_CS, address->cs, 0);
295 wrmsr(MSR_IA32_SYSENTER_EIP, address->eip, 0);
296 }
297 #endif
299 static long register_guest_callback(struct callback_register *reg)
300 {
301 long ret = 0;
302 struct vcpu *v = current;
304 fixup_guest_code_selector(reg->address.cs);
306 switch ( reg->type )
307 {
308 case CALLBACKTYPE_event:
309 v->arch.guest_context.event_callback_cs = reg->address.cs;
310 v->arch.guest_context.event_callback_eip = reg->address.eip;
311 break;
313 case CALLBACKTYPE_failsafe:
314 v->arch.guest_context.failsafe_callback_cs = reg->address.cs;
315 v->arch.guest_context.failsafe_callback_eip = reg->address.eip;
316 if ( reg->flags & CALLBACKF_mask_events )
317 set_bit(_VGCF_failsafe_disables_events,
318 &v->arch.guest_context.flags);
319 else
320 clear_bit(_VGCF_failsafe_disables_events,
321 &v->arch.guest_context.flags);
322 break;
324 #ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL
325 case CALLBACKTYPE_sysenter:
326 if ( ! cpu_has_sep )
327 ret = -EINVAL;
328 else if ( on_each_cpu(do_update_sysenter, &reg->address, 1, 1) != 0 )
329 ret = -EIO;
330 break;
331 #endif
333 case CALLBACKTYPE_nmi:
334 ret = register_guest_nmi_callback(reg->address.eip);
335 break;
337 default:
338 ret = -EINVAL;
339 break;
340 }
342 return ret;
343 }
345 static long unregister_guest_callback(struct callback_unregister *unreg)
346 {
347 long ret;
349 switch ( unreg->type )
350 {
351 case CALLBACKTYPE_nmi:
352 ret = unregister_guest_nmi_callback();
353 break;
355 default:
356 ret = -EINVAL;
357 break;
358 }
360 return ret;
361 }
364 long do_callback_op(int cmd, XEN_GUEST_HANDLE(void) arg)
365 {
366 long ret;
368 switch ( cmd )
369 {
370 case CALLBACKOP_register:
371 {
372 struct callback_register reg;
374 ret = -EFAULT;
375 if ( copy_from_guest(&reg, arg, 1) )
376 break;
378 ret = register_guest_callback(&reg);
379 }
380 break;
382 case CALLBACKOP_unregister:
383 {
384 struct callback_unregister unreg;
386 ret = -EFAULT;
387 if ( copy_from_guest(&unreg, arg, 1) )
388 break;
390 ret = unregister_guest_callback(&unreg);
391 }
392 break;
394 default:
395 ret = -EINVAL;
396 break;
397 }
399 return ret;
400 }
402 long do_set_callbacks(unsigned long event_selector,
403 unsigned long event_address,
404 unsigned long failsafe_selector,
405 unsigned long failsafe_address)
406 {
407 struct callback_register event = {
408 .type = CALLBACKTYPE_event,
409 .address = { event_selector, event_address },
410 };
411 struct callback_register failsafe = {
412 .type = CALLBACKTYPE_failsafe,
413 .address = { failsafe_selector, failsafe_address },
414 };
416 register_guest_callback(&event);
417 register_guest_callback(&failsafe);
419 return 0;
420 }
422 static void hypercall_page_initialise_ring0_kernel(void *hypercall_page)
423 {
424 extern asmlinkage int hypercall(void);
425 char *p;
426 int i;
428 /* Fill in all the transfer points with template machine code. */
430 for ( i = 0; i < (PAGE_SIZE / 32); i++ )
431 {
432 p = (char *)(hypercall_page + (i * 32));
434 *(u8 *)(p+ 0) = 0x9c; /* pushf */
435 *(u8 *)(p+ 1) = 0xfa; /* cli */
436 *(u8 *)(p+ 2) = 0xb8; /* mov $<i>,%eax */
437 *(u32 *)(p+ 3) = i;
438 *(u8 *)(p+ 7) = 0x9a; /* lcall $__HYPERVISOR_CS,&hypercall */
439 *(u32 *)(p+ 8) = (u32)&hypercall;
440 *(u16 *)(p+12) = (u16)__HYPERVISOR_CS;
441 *(u8 *)(p+14) = 0xc3; /* ret */
442 }
444 /*
445 * HYPERVISOR_iret is special because it doesn't return and expects a
446 * special stack frame. Guests jump at this transfer point instead of
447 * calling it.
448 */
449 p = (char *)(hypercall_page + (__HYPERVISOR_iret * 32));
450 *(u8 *)(p+ 0) = 0x50; /* push %eax */
451 *(u8 *)(p+ 1) = 0x9c; /* pushf */
452 *(u8 *)(p+ 2) = 0xfa; /* cli */
453 *(u8 *)(p+ 3) = 0xb8; /* mov $<i>,%eax */
454 *(u32 *)(p+ 4) = __HYPERVISOR_iret;
455 *(u8 *)(p+ 8) = 0x9a; /* lcall $__HYPERVISOR_CS,&hypercall */
456 *(u32 *)(p+ 9) = (u32)&hypercall;
457 *(u16 *)(p+13) = (u16)__HYPERVISOR_CS;
458 }
460 static void hypercall_page_initialise_ring1_kernel(void *hypercall_page)
461 {
462 char *p;
463 int i;
465 /* Fill in all the transfer points with template machine code. */
467 for ( i = 0; i < (PAGE_SIZE / 32); i++ )
468 {
469 p = (char *)(hypercall_page + (i * 32));
470 *(u8 *)(p+ 0) = 0xb8; /* mov $<i>,%eax */
471 *(u32 *)(p+ 1) = i;
472 *(u16 *)(p+ 5) = 0x82cd; /* int $0x82 */
473 *(u8 *)(p+ 7) = 0xc3; /* ret */
474 }
476 /*
477 * HYPERVISOR_iret is special because it doesn't return and expects a
478 * special stack frame. Guests jump at this transfer point instead of
479 * calling it.
480 */
481 p = (char *)(hypercall_page + (__HYPERVISOR_iret * 32));
482 *(u8 *)(p+ 0) = 0x50; /* push %eax */
483 *(u8 *)(p+ 1) = 0xb8; /* mov $__HYPERVISOR_iret,%eax */
484 *(u32 *)(p+ 2) = __HYPERVISOR_iret;
485 *(u16 *)(p+ 6) = 0x82cd; /* int $0x82 */
486 }
488 void hypercall_page_initialise(void *hypercall_page)
489 {
490 if ( supervisor_mode_kernel )
491 hypercall_page_initialise_ring0_kernel(hypercall_page);
492 else
493 hypercall_page_initialise_ring1_kernel(hypercall_page);
494 }
496 /*
497 * Local variables:
498 * mode: C
499 * c-set-style: "BSD"
500 * c-basic-offset: 4
501 * tab-width: 4
502 * indent-tabs-mode: nil
503 * End:
504 */