ia64/xen-unstable

view xen/arch/x86/x86_32/traps.c @ 19835:edfdeb150f27

Fix buildsystem to detect udev > version 124

udev removed the udevinfo symlink from versions higher than 123 and
xen's build-system could not detect if udev is in place and has the
required version.

Signed-off-by: Marc-A. Dahlhaus <mad@wol.de>
author Keir Fraser <keir.fraser@citrix.com>
date Thu Jun 25 13:02:37 2009 +0100 (2009-06-25)
parents 7dfc0a20fa59
children
line source
2 #include <xen/config.h>
3 #include <xen/version.h>
4 #include <xen/domain_page.h>
5 #include <xen/init.h>
6 #include <xen/sched.h>
7 #include <xen/lib.h>
8 #include <xen/console.h>
9 #include <xen/mm.h>
10 #include <xen/irq.h>
11 #include <xen/symbols.h>
12 #include <xen/shutdown.h>
13 #include <xen/nmi.h>
14 #include <asm/current.h>
15 #include <asm/flushtlb.h>
16 #include <asm/hvm/hvm.h>
17 #include <asm/hvm/support.h>
19 #include <public/callback.h>
21 static void print_xen_info(void)
22 {
23 char taint_str[TAINT_STRING_MAX_LEN];
24 char debug = 'n', *arch = "x86_32p";
26 #ifndef NDEBUG
27 debug = 'y';
28 #endif
30 printk("----[ Xen-%d.%d%s %s debug=%c %s ]----\n",
31 xen_major_version(), xen_minor_version(), xen_extra_version(),
32 arch, debug, print_tainted(taint_str));
33 }
35 enum context { CTXT_hypervisor, CTXT_pv_guest, CTXT_hvm_guest };
37 static void _show_registers(
38 const struct cpu_user_regs *regs, unsigned long crs[8],
39 enum context context, const struct vcpu *v)
40 {
41 const static char *context_names[] = {
42 [CTXT_hypervisor] = "hypervisor",
43 [CTXT_pv_guest] = "pv guest",
44 [CTXT_hvm_guest] = "hvm guest"
45 };
47 printk("EIP: %04x:[<%08x>]", regs->cs, regs->eip);
48 if ( context == CTXT_hypervisor )
49 print_symbol(" %s", regs->eip);
50 printk("\nEFLAGS: %08x ", regs->eflags);
51 if ( (context == CTXT_pv_guest) && v && v->vcpu_info )
52 printk("EM: %d ", !!v->vcpu_info->evtchn_upcall_mask);
53 printk("CONTEXT: %s\n", context_names[context]);
55 printk("eax: %08x ebx: %08x ecx: %08x edx: %08x\n",
56 regs->eax, regs->ebx, regs->ecx, regs->edx);
57 printk("esi: %08x edi: %08x ebp: %08x esp: %08x\n",
58 regs->esi, regs->edi, regs->ebp, regs->esp);
59 printk("cr0: %08lx cr4: %08lx cr3: %08lx cr2: %08lx\n",
60 crs[0], crs[4], crs[3], crs[2]);
61 printk("ds: %04x es: %04x fs: %04x gs: %04x "
62 "ss: %04x cs: %04x\n",
63 regs->ds, regs->es, regs->fs,
64 regs->gs, regs->ss, regs->cs);
65 }
67 void show_registers(struct cpu_user_regs *regs)
68 {
69 struct cpu_user_regs fault_regs = *regs;
70 unsigned long fault_crs[8];
71 enum context context;
72 struct vcpu *v = current;
74 if ( is_hvm_vcpu(v) && guest_mode(regs) )
75 {
76 struct segment_register sreg;
77 context = CTXT_hvm_guest;
78 fault_crs[0] = v->arch.hvm_vcpu.guest_cr[0];
79 fault_crs[2] = v->arch.hvm_vcpu.guest_cr[2];
80 fault_crs[3] = v->arch.hvm_vcpu.guest_cr[3];
81 fault_crs[4] = v->arch.hvm_vcpu.guest_cr[4];
82 hvm_get_segment_register(v, x86_seg_cs, &sreg);
83 fault_regs.cs = sreg.sel;
84 hvm_get_segment_register(v, x86_seg_ds, &sreg);
85 fault_regs.ds = sreg.sel;
86 hvm_get_segment_register(v, x86_seg_es, &sreg);
87 fault_regs.es = sreg.sel;
88 hvm_get_segment_register(v, x86_seg_fs, &sreg);
89 fault_regs.fs = sreg.sel;
90 hvm_get_segment_register(v, x86_seg_gs, &sreg);
91 fault_regs.gs = sreg.sel;
92 hvm_get_segment_register(v, x86_seg_ss, &sreg);
93 fault_regs.ss = sreg.sel;
94 }
95 else
96 {
97 if ( !guest_mode(regs) )
98 {
99 context = CTXT_hypervisor;
100 fault_regs.esp = (unsigned long)&regs->esp;
101 fault_regs.ss = read_segment_register(ss);
102 fault_regs.ds = read_segment_register(ds);
103 fault_regs.es = read_segment_register(es);
104 fault_regs.fs = read_segment_register(fs);
105 fault_regs.gs = read_segment_register(gs);
106 fault_crs[2] = read_cr2();
107 }
108 else
109 {
110 context = CTXT_pv_guest;
111 fault_crs[2] = v->vcpu_info->arch.cr2;
112 }
114 fault_crs[0] = read_cr0();
115 fault_crs[3] = read_cr3();
116 fault_crs[4] = read_cr4();
117 }
119 print_xen_info();
120 printk("CPU: %d\n", smp_processor_id());
121 _show_registers(&fault_regs, fault_crs, context, v);
123 if ( this_cpu(ler_msr) && !guest_mode(regs) )
124 {
125 u32 from, to, hi;
126 rdmsr(this_cpu(ler_msr), from, hi);
127 rdmsr(this_cpu(ler_msr) + 1, to, hi);
128 printk("ler: %08x -> %08x\n", from, to);
129 }
130 }
132 void vcpu_show_registers(const struct vcpu *v)
133 {
134 unsigned long crs[8];
136 /* No need to handle HVM for now. */
137 if ( is_hvm_vcpu(v) )
138 return;
140 crs[0] = v->arch.guest_context.ctrlreg[0];
141 crs[2] = v->vcpu_info->arch.cr2;
142 crs[3] = pagetable_get_paddr(v->arch.guest_table);
143 crs[4] = v->arch.guest_context.ctrlreg[4];
145 _show_registers(&v->arch.guest_context.user_regs, crs, CTXT_pv_guest, v);
146 }
148 void show_page_walk(unsigned long addr)
149 {
150 unsigned long pfn, mfn, cr3 = read_cr3();
151 l3_pgentry_t l3e, *l3t;
152 l2_pgentry_t l2e, *l2t;
153 l1_pgentry_t l1e, *l1t;
155 printk("Pagetable walk from %08lx:\n", addr);
157 mfn = cr3 >> PAGE_SHIFT;
159 l3t = map_domain_page(mfn);
160 l3t += (cr3 & 0xFE0UL) >> 3;
161 l3e = l3t[l3_table_offset(addr)];
162 mfn = l3e_get_pfn(l3e);
163 pfn = mfn_valid(mfn) ? get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
164 printk(" L3[0x%03lx] = %"PRIpte" %08lx\n",
165 l3_table_offset(addr), l3e_get_intpte(l3e), pfn);
166 unmap_domain_page(l3t);
167 if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
168 return;
170 l2t = map_domain_page(mfn);
171 l2e = l2t[l2_table_offset(addr)];
172 mfn = l2e_get_pfn(l2e);
173 pfn = mfn_valid(mfn) ? get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
174 printk(" L2[0x%03lx] = %"PRIpte" %08lx %s\n",
175 l2_table_offset(addr), l2e_get_intpte(l2e), pfn,
176 (l2e_get_flags(l2e) & _PAGE_PSE) ? "(PSE)" : "");
177 unmap_domain_page(l2t);
178 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) ||
179 (l2e_get_flags(l2e) & _PAGE_PSE) )
180 return;
182 l1t = map_domain_page(mfn);
183 l1e = l1t[l1_table_offset(addr)];
184 mfn = l1e_get_pfn(l1e);
185 pfn = mfn_valid(mfn) ? get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
186 printk(" L1[0x%03lx] = %"PRIpte" %08lx\n",
187 l1_table_offset(addr), l1e_get_intpte(l1e), pfn);
188 unmap_domain_page(l1t);
189 }
191 DEFINE_PER_CPU(struct tss_struct *, doublefault_tss);
192 static unsigned char __attribute__ ((__section__ (".bss.page_aligned")))
193 boot_cpu_doublefault_space[PAGE_SIZE];
195 asmlinkage void do_double_fault(void)
196 {
197 struct tss_struct *tss;
198 unsigned int cpu;
200 watchdog_disable();
202 console_force_unlock();
204 asm ( "lsll %1, %0" : "=r" (cpu) : "rm" (PER_CPU_GDT_ENTRY << 3) );
206 /* Find information saved during fault and dump it to the console. */
207 tss = &init_tss[cpu];
208 printk("*** DOUBLE FAULT ***\n");
209 print_xen_info();
210 printk("CPU: %d\nEIP: %04x:[<%08x>]",
211 cpu, tss->cs, tss->eip);
212 print_symbol(" %s\n", tss->eip);
213 printk("EFLAGS: %08x\n", tss->eflags);
214 printk("CR3: %08x\n", tss->__cr3);
215 printk("eax: %08x ebx: %08x ecx: %08x edx: %08x\n",
216 tss->eax, tss->ebx, tss->ecx, tss->edx);
217 printk("esi: %08x edi: %08x ebp: %08x esp: %08x\n",
218 tss->esi, tss->edi, tss->ebp, tss->esp);
219 printk("ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n",
220 tss->ds, tss->es, tss->fs, tss->gs, tss->ss);
221 show_stack_overflow(cpu, tss->esp);
223 panic("DOUBLE FAULT -- system shutdown\n");
224 }
226 unsigned long do_iret(void)
227 {
228 struct cpu_user_regs *regs = guest_cpu_user_regs();
229 struct vcpu *v = current;
230 u32 eflags;
232 /* Check worst-case stack frame for overlap with Xen protected area. */
233 if ( unlikely(!access_ok(regs->esp, 40)) )
234 goto exit_and_crash;
236 /* Pop and restore EAX (clobbered by hypercall). */
237 if ( unlikely(__copy_from_user(&regs->eax, (void *)regs->esp, 4)) )
238 goto exit_and_crash;
239 regs->esp += 4;
241 /* Pop and restore CS and EIP. */
242 if ( unlikely(__copy_from_user(&regs->eip, (void *)regs->esp, 8)) )
243 goto exit_and_crash;
244 regs->esp += 8;
246 /*
247 * Pop, fix up and restore EFLAGS. We fix up in a local staging area
248 * to avoid firing the BUG_ON(IOPL) check in arch_get_info_guest.
249 */
250 if ( unlikely(__copy_from_user(&eflags, (void *)regs->esp, 4)) )
251 goto exit_and_crash;
252 regs->esp += 4;
253 regs->eflags = (eflags & ~X86_EFLAGS_IOPL) | X86_EFLAGS_IF;
255 if ( vm86_mode(regs) )
256 {
257 /* Return to VM86 mode: pop and restore ESP,SS,ES,DS,FS and GS. */
258 if ( __copy_from_user(&regs->esp, (void *)regs->esp, 24) )
259 goto exit_and_crash;
260 }
261 else if ( unlikely(ring_0(regs)) )
262 {
263 goto exit_and_crash;
264 }
265 else if ( !ring_1(regs) )
266 {
267 /* Return to ring 2/3: pop and restore ESP and SS. */
268 if ( __copy_from_user(&regs->esp, (void *)regs->esp, 8) )
269 goto exit_and_crash;
270 }
272 /* Restore affinity. */
273 if ((v->trap_priority >= VCPU_TRAP_NMI)
274 && !cpus_equal(v->cpu_affinity_tmp, v->cpu_affinity))
275 vcpu_set_affinity(v, &v->cpu_affinity_tmp);
277 /* Restore previous trap priority */
278 v->trap_priority = v->old_trap_priority;
280 /* Restore upcall mask from supplied EFLAGS.IF. */
281 vcpu_info(v, evtchn_upcall_mask) = !(eflags & X86_EFLAGS_IF);
283 /*
284 * The hypercall exit path will overwrite EAX with this return
285 * value.
286 */
287 return regs->eax;
289 exit_and_crash:
290 gdprintk(XENLOG_ERR, "Fatal error\n");
291 domain_crash(v->domain);
292 return 0;
293 }
295 static void set_task_gate(unsigned int n, unsigned int sel)
296 {
297 idt_table[n].b = 0;
298 wmb(); /* disable gate /then/ rewrite */
299 idt_table[n].a = sel << 16;
300 wmb(); /* rewrite /then/ enable gate */
301 idt_table[n].b = 0x8500;
302 }
304 void __devinit subarch_percpu_traps_init(void)
305 {
306 struct tss_struct *tss = this_cpu(doublefault_tss);
307 asmlinkage int hypercall(void);
309 if ( !tss )
310 {
311 /* The hypercall entry vector is only accessible from ring 1. */
312 _set_gate(idt_table+HYPERCALL_VECTOR, 14, 1, &hypercall);
314 tss = (void *)boot_cpu_doublefault_space;
315 this_cpu(doublefault_tss) = tss;
316 }
318 /*
319 * Make a separate task for double faults. This will get us debug output if
320 * we blow the kernel stack.
321 */
322 tss->ds = __HYPERVISOR_DS;
323 tss->es = __HYPERVISOR_DS;
324 tss->ss = __HYPERVISOR_DS;
325 tss->esp = (unsigned long)tss + PAGE_SIZE;
326 tss->__cr3 = __pa(idle_pg_table);
327 tss->cs = __HYPERVISOR_CS;
328 tss->eip = (unsigned long)do_double_fault;
329 tss->eflags = 2;
330 tss->bitmap = IOBMP_INVALID_OFFSET;
331 _set_tssldt_desc(
332 this_cpu(gdt_table) + DOUBLEFAULT_TSS_ENTRY - FIRST_RESERVED_GDT_ENTRY,
333 (unsigned long)tss, 235, 9);
335 set_task_gate(TRAP_double_fault, DOUBLEFAULT_TSS_ENTRY << 3);
336 }
338 void init_int80_direct_trap(struct vcpu *v)
339 {
340 struct trap_info *ti = &v->arch.guest_context.trap_ctxt[0x80];
342 /*
343 * We can't virtualise interrupt gates, as there's no way to get
344 * the CPU to automatically clear the events_mask variable. Also we
345 * must ensure that the CS is safe to poke into an interrupt gate.
346 *
347 * When running with supervisor_mode_kernel enabled a direct trap
348 * to the guest OS cannot be used because the INT instruction will
349 * switch to the Xen stack and we need to swap back to the guest
350 * kernel stack before passing control to the system call entry point.
351 */
352 if ( TI_GET_IF(ti) || !guest_gate_selector_okay(v->domain, ti->cs) ||
353 supervisor_mode_kernel )
354 {
355 v->arch.int80_desc.a = v->arch.int80_desc.b = 0;
356 return;
357 }
359 v->arch.int80_desc.a = (ti->cs << 16) | (ti->address & 0xffff);
360 v->arch.int80_desc.b =
361 (ti->address & 0xffff0000) | 0x8f00 | ((TI_GET_DPL(ti) & 3) << 13);
363 if ( v == current )
364 set_int80_direct_trap(v);
365 }
367 #ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL
368 static void do_update_sysenter(void *info)
369 {
370 xen_callback_t *address = info;
372 wrmsr(MSR_IA32_SYSENTER_CS, address->cs, 0);
373 wrmsr(MSR_IA32_SYSENTER_EIP, address->eip, 0);
374 }
375 #endif
377 static long register_guest_callback(struct callback_register *reg)
378 {
379 long ret = 0;
380 struct vcpu *v = current;
382 fixup_guest_code_selector(v->domain, reg->address.cs);
384 switch ( reg->type )
385 {
386 case CALLBACKTYPE_event:
387 v->arch.guest_context.event_callback_cs = reg->address.cs;
388 v->arch.guest_context.event_callback_eip = reg->address.eip;
389 break;
391 case CALLBACKTYPE_failsafe:
392 v->arch.guest_context.failsafe_callback_cs = reg->address.cs;
393 v->arch.guest_context.failsafe_callback_eip = reg->address.eip;
394 if ( reg->flags & CALLBACKF_mask_events )
395 set_bit(_VGCF_failsafe_disables_events,
396 &v->arch.guest_context.flags);
397 else
398 clear_bit(_VGCF_failsafe_disables_events,
399 &v->arch.guest_context.flags);
400 break;
402 #ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL
403 case CALLBACKTYPE_sysenter_deprecated:
404 if ( !cpu_has_sep )
405 ret = -EINVAL;
406 else if ( on_each_cpu(do_update_sysenter, &reg->address, 1) != 0 )
407 ret = -EIO;
408 break;
410 case CALLBACKTYPE_sysenter:
411 if ( !cpu_has_sep )
412 ret = -EINVAL;
413 else
414 do_update_sysenter(&reg->address);
415 break;
416 #endif
418 case CALLBACKTYPE_nmi:
419 ret = register_guest_nmi_callback(reg->address.eip);
420 break;
422 default:
423 ret = -ENOSYS;
424 break;
425 }
427 return ret;
428 }
430 static long unregister_guest_callback(struct callback_unregister *unreg)
431 {
432 long ret;
434 switch ( unreg->type )
435 {
436 case CALLBACKTYPE_event:
437 case CALLBACKTYPE_failsafe:
438 #ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL
439 case CALLBACKTYPE_sysenter_deprecated:
440 case CALLBACKTYPE_sysenter:
441 #endif
442 ret = -EINVAL;
443 break;
445 case CALLBACKTYPE_nmi:
446 ret = unregister_guest_nmi_callback();
447 break;
449 default:
450 ret = -ENOSYS;
451 break;
452 }
454 return ret;
455 }
458 long do_callback_op(int cmd, XEN_GUEST_HANDLE(const_void) arg)
459 {
460 long ret;
462 switch ( cmd )
463 {
464 case CALLBACKOP_register:
465 {
466 struct callback_register reg;
468 ret = -EFAULT;
469 if ( copy_from_guest(&reg, arg, 1) )
470 break;
472 ret = register_guest_callback(&reg);
473 }
474 break;
476 case CALLBACKOP_unregister:
477 {
478 struct callback_unregister unreg;
480 ret = -EFAULT;
481 if ( copy_from_guest(&unreg, arg, 1) )
482 break;
484 ret = unregister_guest_callback(&unreg);
485 }
486 break;
488 default:
489 ret = -ENOSYS;
490 break;
491 }
493 return ret;
494 }
496 long do_set_callbacks(unsigned long event_selector,
497 unsigned long event_address,
498 unsigned long failsafe_selector,
499 unsigned long failsafe_address)
500 {
501 struct callback_register event = {
502 .type = CALLBACKTYPE_event,
503 .address = { event_selector, event_address },
504 };
505 struct callback_register failsafe = {
506 .type = CALLBACKTYPE_failsafe,
507 .address = { failsafe_selector, failsafe_address },
508 };
510 register_guest_callback(&event);
511 register_guest_callback(&failsafe);
513 return 0;
514 }
516 static void hypercall_page_initialise_ring0_kernel(void *hypercall_page)
517 {
518 extern asmlinkage int hypercall(void);
519 char *p;
520 int i;
522 /* Fill in all the transfer points with template machine code. */
524 for ( i = 0; i < (PAGE_SIZE / 32); i++ )
525 {
526 p = (char *)(hypercall_page + (i * 32));
528 *(u8 *)(p+ 0) = 0x9c; /* pushf */
529 *(u8 *)(p+ 1) = 0xfa; /* cli */
530 *(u8 *)(p+ 2) = 0xb8; /* mov $<i>,%eax */
531 *(u32 *)(p+ 3) = i;
532 *(u8 *)(p+ 7) = 0x9a; /* lcall $__HYPERVISOR_CS,&hypercall */
533 *(u32 *)(p+ 8) = (u32)&hypercall;
534 *(u16 *)(p+12) = (u16)__HYPERVISOR_CS;
535 *(u8 *)(p+14) = 0xc3; /* ret */
536 }
538 /*
539 * HYPERVISOR_iret is special because it doesn't return and expects a
540 * special stack frame. Guests jump at this transfer point instead of
541 * calling it.
542 */
543 p = (char *)(hypercall_page + (__HYPERVISOR_iret * 32));
544 *(u8 *)(p+ 0) = 0x50; /* push %eax */
545 *(u8 *)(p+ 1) = 0x9c; /* pushf */
546 *(u8 *)(p+ 2) = 0xfa; /* cli */
547 *(u8 *)(p+ 3) = 0xb8; /* mov $<i>,%eax */
548 *(u32 *)(p+ 4) = __HYPERVISOR_iret;
549 *(u8 *)(p+ 8) = 0x9a; /* lcall $__HYPERVISOR_CS,&hypercall */
550 *(u32 *)(p+ 9) = (u32)&hypercall;
551 *(u16 *)(p+13) = (u16)__HYPERVISOR_CS;
552 }
554 static void hypercall_page_initialise_ring1_kernel(void *hypercall_page)
555 {
556 char *p;
557 int i;
559 /* Fill in all the transfer points with template machine code. */
561 for ( i = 0; i < (PAGE_SIZE / 32); i++ )
562 {
563 p = (char *)(hypercall_page + (i * 32));
564 *(u8 *)(p+ 0) = 0xb8; /* mov $<i>,%eax */
565 *(u32 *)(p+ 1) = i;
566 *(u16 *)(p+ 5) = 0x82cd; /* int $0x82 */
567 *(u8 *)(p+ 7) = 0xc3; /* ret */
568 }
570 /*
571 * HYPERVISOR_iret is special because it doesn't return and expects a
572 * special stack frame. Guests jump at this transfer point instead of
573 * calling it.
574 */
575 p = (char *)(hypercall_page + (__HYPERVISOR_iret * 32));
576 *(u8 *)(p+ 0) = 0x50; /* push %eax */
577 *(u8 *)(p+ 1) = 0xb8; /* mov $__HYPERVISOR_iret,%eax */
578 *(u32 *)(p+ 2) = __HYPERVISOR_iret;
579 *(u16 *)(p+ 6) = 0x82cd; /* int $0x82 */
580 }
582 void hypercall_page_initialise(struct domain *d, void *hypercall_page)
583 {
584 memset(hypercall_page, 0xCC, PAGE_SIZE);
585 if ( is_hvm_domain(d) )
586 hvm_hypercall_page_initialise(d, hypercall_page);
587 else if ( supervisor_mode_kernel )
588 hypercall_page_initialise_ring0_kernel(hypercall_page);
589 else
590 hypercall_page_initialise_ring1_kernel(hypercall_page);
591 }
593 /*
594 * Local variables:
595 * mode: C
596 * c-set-style: "BSD"
597 * c-basic-offset: 4
598 * tab-width: 4
599 * indent-tabs-mode: nil
600 * End:
601 */