direct-io.hg

view xen/arch/x86/x86_64/traps.c @ 10445:8d75d4e0af1e

[XEN] Improve double-fault tracing -- print backtrace
on stack overflow.
Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@dhcp93.uk.xensource.com
date Mon Jun 19 11:21:40 2006 +0100 (2006-06-19)
parents e1ae7b3cb5b7
children f3561b1ee7a3
line source
2 #include <xen/config.h>
3 #include <xen/compile.h>
4 #include <xen/init.h>
5 #include <xen/sched.h>
6 #include <xen/lib.h>
7 #include <xen/errno.h>
8 #include <xen/mm.h>
9 #include <xen/irq.h>
10 #include <xen/symbols.h>
11 #include <xen/console.h>
12 #include <xen/sched.h>
13 #include <xen/reboot.h>
14 #include <xen/nmi.h>
15 #include <asm/current.h>
16 #include <asm/flushtlb.h>
17 #include <asm/msr.h>
18 #include <asm/shadow.h>
19 #include <asm/hvm/hvm.h>
20 #include <asm/hvm/support.h>
22 #include <public/callback.h>
24 static void __show_registers(struct cpu_user_regs *regs)
25 {
26 struct cpu_user_regs fault_regs = *regs;
27 unsigned long fault_crs[8];
28 char taint_str[TAINT_STRING_MAX_LEN];
29 const char *context;
31 if ( hvm_guest(current) && guest_mode(regs) )
32 {
33 context = "hvm";
34 hvm_store_cpu_guest_regs(current, &fault_regs, fault_crs);
35 }
36 else
37 {
38 context = guest_mode(regs) ? "guest" : "hypervisor";
39 fault_crs[0] = read_cr0();
40 fault_crs[3] = read_cr3();
41 fault_regs.ds = read_segment_register(ds);
42 fault_regs.es = read_segment_register(es);
43 fault_regs.fs = read_segment_register(fs);
44 fault_regs.gs = read_segment_register(gs);
45 }
47 printk("----[ Xen-%d.%d%s %s ]----\n",
48 XEN_VERSION, XEN_SUBVERSION, XEN_EXTRAVERSION,
49 print_tainted(taint_str));
50 printk("CPU: %d\nRIP: %04x:[<%016lx>]",
51 smp_processor_id(), fault_regs.cs, fault_regs.rip);
52 if ( !guest_mode(regs) )
53 print_symbol(" %s", fault_regs.rip);
54 printk("\nRFLAGS: %016lx CONTEXT: %s\n", fault_regs.rflags, context);
55 printk("rax: %016lx rbx: %016lx rcx: %016lx\n",
56 fault_regs.rax, fault_regs.rbx, fault_regs.rcx);
57 printk("rdx: %016lx rsi: %016lx rdi: %016lx\n",
58 fault_regs.rdx, fault_regs.rsi, fault_regs.rdi);
59 printk("rbp: %016lx rsp: %016lx r8: %016lx\n",
60 fault_regs.rbp, fault_regs.rsp, fault_regs.r8);
61 printk("r9: %016lx r10: %016lx r11: %016lx\n",
62 fault_regs.r9, fault_regs.r10, fault_regs.r11);
63 printk("r12: %016lx r13: %016lx r14: %016lx\n",
64 fault_regs.r12, fault_regs.r13, fault_regs.r14);
65 printk("r15: %016lx cr0: %016lx cr3: %016lx\n",
66 fault_regs.r15, fault_crs[0], fault_crs[3]);
67 printk("ds: %04x es: %04x fs: %04x gs: %04x "
68 "ss: %04x cs: %04x\n",
69 fault_regs.ds, fault_regs.es, fault_regs.fs,
70 fault_regs.gs, fault_regs.ss, fault_regs.cs);
71 }
73 void show_registers(struct cpu_user_regs *regs)
74 {
75 __show_registers(regs);
76 show_stack(regs);
77 }
79 void show_page_walk(unsigned long addr)
80 {
81 unsigned long pfn, mfn = read_cr3() >> PAGE_SHIFT;
82 l4_pgentry_t l4e, *l4t;
83 l3_pgentry_t l3e, *l3t;
84 l2_pgentry_t l2e, *l2t;
85 l1_pgentry_t l1e, *l1t;
87 printk("Pagetable walk from %016lx:\n", addr);
89 l4t = mfn_to_virt(mfn);
90 l4e = l4t[l4_table_offset(addr)];
91 mfn = l4e_get_pfn(l4e);
92 pfn = get_gpfn_from_mfn(mfn);
93 printk(" L4 = %"PRIpte" %016lx\n", l4e_get_intpte(l4e), pfn);
94 if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) )
95 return;
97 l3t = mfn_to_virt(mfn);
98 l3e = l3t[l3_table_offset(addr)];
99 mfn = l3e_get_pfn(l3e);
100 pfn = get_gpfn_from_mfn(mfn);
101 printk(" L3 = %"PRIpte" %016lx\n", l3e_get_intpte(l3e), pfn);
102 if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
103 return;
105 l2t = mfn_to_virt(mfn);
106 l2e = l2t[l2_table_offset(addr)];
107 mfn = l2e_get_pfn(l2e);
108 pfn = get_gpfn_from_mfn(mfn);
109 printk(" L2 = %"PRIpte" %016lx %s\n", l2e_get_intpte(l2e), pfn,
110 (l2e_get_flags(l2e) & _PAGE_PSE) ? "(PSE)" : "");
111 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) ||
112 (l2e_get_flags(l2e) & _PAGE_PSE) )
113 return;
115 l1t = mfn_to_virt(mfn);
116 l1e = l1t[l1_table_offset(addr)];
117 mfn = l1e_get_pfn(l1e);
118 pfn = get_gpfn_from_mfn(mfn);
119 printk(" L1 = %"PRIpte" %016lx\n", l1e_get_intpte(l1e), pfn);
120 }
122 asmlinkage void double_fault(void);
123 asmlinkage void do_double_fault(struct cpu_user_regs *regs)
124 {
125 watchdog_disable();
127 console_force_unlock();
129 /* Find information saved during fault and dump it to the console. */
130 printk("************************************\n");
131 __show_registers(regs);
132 show_stack_overflow(regs->rsp);
133 printk("************************************\n");
134 printk("CPU%d DOUBLE FAULT -- system shutdown\n", smp_processor_id());
135 printk("System needs manual reset.\n");
136 printk("************************************\n");
138 /* Lock up the console to prevent spurious output from other CPUs. */
139 console_force_lock();
141 /* Wait for manual reset. */
142 machine_halt();
143 }
145 void toggle_guest_mode(struct vcpu *v)
146 {
147 v->arch.flags ^= TF_kernel_mode;
148 __asm__ __volatile__ ( "swapgs" );
149 update_pagetables(v);
150 write_ptbase(v);
151 }
153 unsigned long do_iret(void)
154 {
155 struct cpu_user_regs *regs = guest_cpu_user_regs();
156 struct iret_context iret_saved;
157 struct vcpu *v = current;
159 if ( unlikely(copy_from_user(&iret_saved, (void *)regs->rsp,
160 sizeof(iret_saved))) )
161 {
162 DPRINTK("Fault while reading IRET context from guest stack\n");
163 domain_crash_synchronous();
164 }
166 /* Returning to user mode? */
167 if ( (iret_saved.cs & 3) == 3 )
168 {
169 if ( unlikely(pagetable_is_null(v->arch.guest_table_user)) )
170 {
171 DPRINTK("Guest switching to user mode with no user page tables\n");
172 domain_crash_synchronous();
173 }
174 toggle_guest_mode(v);
175 }
177 regs->rip = iret_saved.rip;
178 regs->cs = iret_saved.cs | 3; /* force guest privilege */
179 regs->rflags = (iret_saved.rflags & ~(EF_IOPL|EF_VM)) | EF_IE;
180 regs->rsp = iret_saved.rsp;
181 regs->ss = iret_saved.ss | 3; /* force guest privilege */
183 if ( !(iret_saved.flags & VGCF_IN_SYSCALL) )
184 {
185 regs->entry_vector = 0;
186 regs->r11 = iret_saved.r11;
187 regs->rcx = iret_saved.rcx;
188 }
190 /* No longer in NMI context. */
191 clear_bit(_VCPUF_nmi_masked, &current->vcpu_flags);
193 /* Restore upcall mask from supplied EFLAGS.IF. */
194 current->vcpu_info->evtchn_upcall_mask = !(iret_saved.rflags & EF_IE);
196 /* Saved %rax gets written back to regs->rax in entry.S. */
197 return iret_saved.rax;
198 }
200 asmlinkage void syscall_enter(void);
201 void __init percpu_traps_init(void)
202 {
203 char *stack_bottom, *stack;
204 int cpu = smp_processor_id();
206 if ( cpu == 0 )
207 {
208 /* Specify dedicated interrupt stacks for NMIs and double faults. */
209 set_intr_gate(TRAP_double_fault, &double_fault);
210 idt_table[TRAP_double_fault].a |= 1UL << 32; /* IST1 */
211 idt_table[TRAP_nmi].a |= 2UL << 32; /* IST2 */
212 }
214 stack_bottom = (char *)get_stack_bottom();
215 stack = (char *)((unsigned long)stack_bottom & ~(STACK_SIZE - 1));
217 /* Double-fault handler has its own per-CPU 1kB stack. */
218 init_tss[cpu].ist[0] = (unsigned long)&stack[1024];
220 /* NMI handler has its own per-CPU 1kB stack. */
221 init_tss[cpu].ist[1] = (unsigned long)&stack[2048];
223 /*
224 * Trampoline for SYSCALL entry from long mode.
225 */
227 /* Skip the NMI and DF stacks. */
228 stack = &stack[2048];
229 wrmsr(MSR_LSTAR, (unsigned long)stack, ((unsigned long)stack>>32));
231 /* movq %rsp, saversp(%rip) */
232 stack[0] = 0x48;
233 stack[1] = 0x89;
234 stack[2] = 0x25;
235 *(u32 *)&stack[3] = (stack_bottom - &stack[7]) - 16;
237 /* leaq saversp(%rip), %rsp */
238 stack[7] = 0x48;
239 stack[8] = 0x8d;
240 stack[9] = 0x25;
241 *(u32 *)&stack[10] = (stack_bottom - &stack[14]) - 16;
243 /* pushq %r11 */
244 stack[14] = 0x41;
245 stack[15] = 0x53;
247 /* pushq $__GUEST_CS64 */
248 stack[16] = 0x68;
249 *(u32 *)&stack[17] = __GUEST_CS64;
251 /* jmp syscall_enter */
252 stack[21] = 0xe9;
253 *(u32 *)&stack[22] = (char *)syscall_enter - &stack[26];
255 /*
256 * Trampoline for SYSCALL entry from compatibility mode.
257 */
259 /* Skip the long-mode entry trampoline. */
260 stack = &stack[26];
261 wrmsr(MSR_CSTAR, (unsigned long)stack, ((unsigned long)stack>>32));
263 /* movq %rsp, saversp(%rip) */
264 stack[0] = 0x48;
265 stack[1] = 0x89;
266 stack[2] = 0x25;
267 *(u32 *)&stack[3] = (stack_bottom - &stack[7]) - 16;
269 /* leaq saversp(%rip), %rsp */
270 stack[7] = 0x48;
271 stack[8] = 0x8d;
272 stack[9] = 0x25;
273 *(u32 *)&stack[10] = (stack_bottom - &stack[14]) - 16;
275 /* pushq %r11 */
276 stack[14] = 0x41;
277 stack[15] = 0x53;
279 /* pushq $__GUEST_CS32 */
280 stack[16] = 0x68;
281 *(u32 *)&stack[17] = __GUEST_CS32;
283 /* jmp syscall_enter */
284 stack[21] = 0xe9;
285 *(u32 *)&stack[22] = (char *)syscall_enter - &stack[26];
287 /*
288 * Common SYSCALL parameters.
289 */
291 wrmsr(MSR_STAR, 0, (FLAT_RING3_CS32<<16) | __HYPERVISOR_CS);
292 wrmsr(MSR_SYSCALL_MASK, EF_VM|EF_RF|EF_NT|EF_DF|EF_IE|EF_TF, 0U);
293 }
295 static long register_guest_callback(struct callback_register *reg)
296 {
297 long ret = 0;
298 struct vcpu *v = current;
300 switch ( reg->type )
301 {
302 case CALLBACKTYPE_event:
303 v->arch.guest_context.event_callback_eip = reg->address;
304 break;
306 case CALLBACKTYPE_failsafe:
307 v->arch.guest_context.failsafe_callback_eip = reg->address;
308 if ( reg->flags & CALLBACKF_mask_events )
309 set_bit(_VGCF_failsafe_disables_events,
310 &v->arch.guest_context.flags);
311 else
312 clear_bit(_VGCF_failsafe_disables_events,
313 &v->arch.guest_context.flags);
314 break;
316 case CALLBACKTYPE_syscall:
317 v->arch.guest_context.syscall_callback_eip = reg->address;
318 if ( reg->flags & CALLBACKF_mask_events )
319 set_bit(_VGCF_syscall_disables_events,
320 &v->arch.guest_context.flags);
321 else
322 clear_bit(_VGCF_syscall_disables_events,
323 &v->arch.guest_context.flags);
324 break;
326 case CALLBACKTYPE_nmi:
327 ret = register_guest_nmi_callback(reg->address);
328 break;
330 default:
331 ret = -EINVAL;
332 break;
333 }
335 return ret;
336 }
338 static long unregister_guest_callback(struct callback_unregister *unreg)
339 {
340 long ret;
342 switch ( unreg->type )
343 {
344 case CALLBACKTYPE_nmi:
345 ret = unregister_guest_nmi_callback();
346 break;
348 default:
349 ret = -EINVAL;
350 break;
351 }
353 return ret;
354 }
357 long do_callback_op(int cmd, XEN_GUEST_HANDLE(void) arg)
358 {
359 long ret;
361 switch ( cmd )
362 {
363 case CALLBACKOP_register:
364 {
365 struct callback_register reg;
367 ret = -EFAULT;
368 if ( copy_from_guest(&reg, arg, 1) )
369 break;
371 ret = register_guest_callback(&reg);
372 }
373 break;
375 case CALLBACKOP_unregister:
376 {
377 struct callback_unregister unreg;
379 ret = -EFAULT;
380 if ( copy_from_guest(&unreg, arg, 1) )
381 break;
383 ret = unregister_guest_callback(&unreg);
384 }
385 break;
387 default:
388 ret = -EINVAL;
389 break;
390 }
392 return ret;
393 }
395 long do_set_callbacks(unsigned long event_address,
396 unsigned long failsafe_address,
397 unsigned long syscall_address)
398 {
399 struct callback_register event = {
400 .type = CALLBACKTYPE_event,
401 .address = event_address,
402 };
403 struct callback_register failsafe = {
404 .type = CALLBACKTYPE_failsafe,
405 .address = failsafe_address,
406 };
407 struct callback_register syscall = {
408 .type = CALLBACKTYPE_syscall,
409 .address = syscall_address,
410 };
412 register_guest_callback(&event);
413 register_guest_callback(&failsafe);
414 register_guest_callback(&syscall);
416 return 0;
417 }
419 void hypercall_page_initialise(void *hypercall_page)
420 {
421 char *p;
422 int i;
424 /* Fill in all the transfer points with template machine code. */
425 for ( i = 0; i < (PAGE_SIZE / 32); i++ )
426 {
427 p = (char *)(hypercall_page + (i * 32));
428 *(u8 *)(p+ 0) = 0x51; /* push %rcx */
429 *(u16 *)(p+ 1) = 0x5341; /* push %r11 */
430 *(u8 *)(p+ 3) = 0xb8; /* mov $<i>,%eax */
431 *(u32 *)(p+ 4) = i;
432 *(u16 *)(p+ 8) = 0x050f; /* syscall */
433 *(u16 *)(p+10) = 0x5b41; /* pop %r11 */
434 *(u8 *)(p+12) = 0x59; /* pop %rcx */
435 *(u8 *)(p+13) = 0xc3; /* ret */
436 }
438 /*
439 * HYPERVISOR_iret is special because it doesn't return and expects a
440 * special stack frame. Guests jump at this transfer point instead of
441 * calling it.
442 */
443 p = (char *)(hypercall_page + (__HYPERVISOR_iret * 32));
444 *(u8 *)(p+ 0) = 0x51; /* push %rcx */
445 *(u16 *)(p+ 1) = 0x5341; /* push %r11 */
446 *(u8 *)(p+ 3) = 0x50; /* push %rax */
447 *(u8 *)(p+ 4) = 0xb8; /* mov $__HYPERVISOR_iret,%eax */
448 *(u32 *)(p+ 5) = __HYPERVISOR_iret;
449 *(u16 *)(p+ 9) = 0x050f; /* syscall */
450 }
452 /*
453 * Local variables:
454 * mode: C
455 * c-set-style: "BSD"
456 * c-basic-offset: 4
457 * tab-width: 4
458 * indent-tabs-mode: nil
459 * End:
460 */