ia64/xen-unstable

view xen/arch/x86/x86_32/traps.c @ 9618:db44b783fe09

Cleanups to do_callback_op() new hypercall changeset.

Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Thu Apr 06 17:25:24 2006 +0100 (2006-04-06)
parents be0a1f376223
children faad84c126e2
line source
2 #include <xen/config.h>
3 #include <xen/compile.h>
4 #include <xen/domain_page.h>
5 #include <xen/init.h>
6 #include <xen/sched.h>
7 #include <xen/lib.h>
8 #include <xen/console.h>
9 #include <xen/mm.h>
10 #include <xen/irq.h>
11 #include <xen/symbols.h>
12 #include <xen/reboot.h>
13 #include <asm/current.h>
14 #include <asm/flushtlb.h>
15 #include <asm/hvm/hvm.h>
16 #include <asm/hvm/support.h>
18 #include <public/callback.h>
20 /* All CPUs have their own IDT to allow int80 direct trap. */
21 idt_entry_t *idt_tables[NR_CPUS] = { 0 };
23 void show_registers(struct cpu_user_regs *regs)
24 {
25 struct cpu_user_regs fault_regs = *regs;
26 unsigned long fault_crs[8];
27 char taint_str[TAINT_STRING_MAX_LEN];
28 const char *context;
30 if ( hvm_guest(current) && guest_mode(regs) )
31 {
32 context = "hvm";
33 hvm_store_cpu_guest_regs(current, &fault_regs, fault_crs);
34 }
35 else
36 {
37 context = guest_mode(regs) ? "guest" : "hypervisor";
39 if ( !guest_mode(regs) )
40 {
41 fault_regs.esp = (unsigned long)&regs->esp;
42 fault_regs.ss = read_segment_register(ss);
43 fault_regs.ds = read_segment_register(ds);
44 fault_regs.es = read_segment_register(es);
45 fault_regs.fs = read_segment_register(fs);
46 fault_regs.gs = read_segment_register(gs);
47 }
49 fault_crs[0] = read_cr0();
50 fault_crs[3] = read_cr3();
51 }
53 printk("----[ Xen-%d.%d%s %s ]----\n",
54 XEN_VERSION, XEN_SUBVERSION, XEN_EXTRAVERSION,
55 print_tainted(taint_str));
56 printk("CPU: %d\nEIP: %04x:[<%08x>]",
57 smp_processor_id(), fault_regs.cs, fault_regs.eip);
58 if ( !guest_mode(regs) )
59 print_symbol(" %s", fault_regs.eip);
60 printk("\nEFLAGS: %08x CONTEXT: %s\n", fault_regs.eflags, context);
61 printk("eax: %08x ebx: %08x ecx: %08x edx: %08x\n",
62 fault_regs.eax, fault_regs.ebx, fault_regs.ecx, fault_regs.edx);
63 printk("esi: %08x edi: %08x ebp: %08x esp: %08x\n",
64 fault_regs.esi, fault_regs.edi, fault_regs.ebp, fault_regs.esp);
65 printk("cr0: %08lx cr3: %08lx\n", fault_crs[0], fault_crs[3]);
66 printk("ds: %04x es: %04x fs: %04x gs: %04x "
67 "ss: %04x cs: %04x\n",
68 fault_regs.ds, fault_regs.es, fault_regs.fs,
69 fault_regs.gs, fault_regs.ss, fault_regs.cs);
71 show_stack(regs);
72 }
74 void show_page_walk(unsigned long addr)
75 {
76 unsigned long pfn, mfn = read_cr3() >> PAGE_SHIFT;
77 #ifdef CONFIG_X86_PAE
78 l3_pgentry_t l3e, *l3t;
79 #endif
80 l2_pgentry_t l2e, *l2t;
81 l1_pgentry_t l1e, *l1t;
83 printk("Pagetable walk from %08lx:\n", addr);
85 #ifdef CONFIG_X86_PAE
86 l3t = map_domain_page(mfn);
87 l3e = l3t[l3_table_offset(addr)];
88 mfn = l3e_get_pfn(l3e);
89 pfn = get_gpfn_from_mfn(mfn);
90 printk(" L3 = %"PRIpte" %08lx\n", l3e_get_intpte(l3e), pfn);
91 unmap_domain_page(l3t);
92 if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
93 return;
94 #endif
96 l2t = map_domain_page(mfn);
97 l2e = l2t[l2_table_offset(addr)];
98 mfn = l2e_get_pfn(l2e);
99 pfn = get_gpfn_from_mfn(mfn);
100 printk(" L2 = %"PRIpte" %08lx %s\n", l2e_get_intpte(l2e), pfn,
101 (l2e_get_flags(l2e) & _PAGE_PSE) ? "(PSE)" : "");
102 unmap_domain_page(l2t);
103 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) ||
104 (l2e_get_flags(l2e) & _PAGE_PSE) )
105 return;
107 l1t = map_domain_page(mfn);
108 l1e = l1t[l1_table_offset(addr)];
109 mfn = l1e_get_pfn(l1e);
110 pfn = get_gpfn_from_mfn(mfn);
111 printk(" L1 = %"PRIpte" %08lx\n", l1e_get_intpte(l1e), pfn);
112 unmap_domain_page(l1t);
113 }
115 int __spurious_page_fault(unsigned long addr)
116 {
117 unsigned long mfn = read_cr3() >> PAGE_SHIFT;
118 #ifdef CONFIG_X86_PAE
119 l3_pgentry_t l3e, *l3t;
120 #endif
121 l2_pgentry_t l2e, *l2t;
122 l1_pgentry_t l1e, *l1t;
124 #ifdef CONFIG_X86_PAE
125 l3t = map_domain_page(mfn);
126 l3e = l3t[l3_table_offset(addr)];
127 mfn = l3e_get_pfn(l3e);
128 unmap_domain_page(l3t);
129 if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
130 return 0;
131 #endif
133 l2t = map_domain_page(mfn);
134 l2e = l2t[l2_table_offset(addr)];
135 mfn = l2e_get_pfn(l2e);
136 unmap_domain_page(l2t);
137 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
138 return 0;
139 if ( l2e_get_flags(l2e) & _PAGE_PSE )
140 return 1;
142 l1t = map_domain_page(mfn);
143 l1e = l1t[l1_table_offset(addr)];
144 mfn = l1e_get_pfn(l1e);
145 unmap_domain_page(l1t);
146 return !!(l1e_get_flags(l1e) & _PAGE_PRESENT);
147 }
149 #define DOUBLEFAULT_STACK_SIZE 1024
150 static struct tss_struct doublefault_tss;
151 static unsigned char doublefault_stack[DOUBLEFAULT_STACK_SIZE];
153 asmlinkage void do_double_fault(void)
154 {
155 struct tss_struct *tss = &doublefault_tss;
156 unsigned int cpu = ((tss->back_link>>3)-__FIRST_TSS_ENTRY)>>1;
158 watchdog_disable();
160 console_force_unlock();
162 /* Find information saved during fault and dump it to the console. */
163 tss = &init_tss[cpu];
164 printk("CPU: %d\nEIP: %04x:[<%08x>]",
165 cpu, tss->cs, tss->eip);
166 print_symbol(" %s\n", tss->eip);
167 printk("EFLAGS: %08x\n", tss->eflags);
168 printk("CR3: %08x\n", tss->__cr3);
169 printk("eax: %08x ebx: %08x ecx: %08x edx: %08x\n",
170 tss->eax, tss->ebx, tss->ecx, tss->edx);
171 printk("esi: %08x edi: %08x ebp: %08x esp: %08x\n",
172 tss->esi, tss->edi, tss->ebp, tss->esp);
173 printk("ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n",
174 tss->ds, tss->es, tss->fs, tss->gs, tss->ss);
175 printk("************************************\n");
176 printk("CPU%d DOUBLE FAULT -- system shutdown\n", cpu);
177 printk("System needs manual reset.\n");
178 printk("************************************\n");
180 /* Lock up the console to prevent spurious output from other CPUs. */
181 console_force_lock();
183 /* Wait for manual reset. */
184 machine_halt();
185 }
187 unsigned long do_iret(void)
188 {
189 struct cpu_user_regs *regs = guest_cpu_user_regs();
190 u32 eflags;
192 /* Check worst-case stack frame for overlap with Xen protected area. */
193 if ( unlikely(!access_ok(regs->esp, 40)) )
194 domain_crash_synchronous();
196 /* Pop and restore EAX (clobbered by hypercall). */
197 if ( unlikely(__copy_from_user(&regs->eax, (void __user *)regs->esp, 4)) )
198 domain_crash_synchronous();
199 regs->esp += 4;
201 /* Pop and restore CS and EIP. */
202 if ( unlikely(__copy_from_user(&regs->eip, (void __user *)regs->esp, 8)) )
203 domain_crash_synchronous();
204 regs->esp += 8;
206 /*
207 * Pop, fix up and restore EFLAGS. We fix up in a local staging area
208 * to avoid firing the BUG_ON(IOPL) check in arch_getdomaininfo_ctxt.
209 */
210 if ( unlikely(__copy_from_user(&eflags, (void __user *)regs->esp, 4)) )
211 domain_crash_synchronous();
212 regs->esp += 4;
213 regs->eflags = (eflags & ~X86_EFLAGS_IOPL) | X86_EFLAGS_IF;
215 if ( vm86_mode(regs) )
216 {
217 /* Return to VM86 mode: pop and restore ESP,SS,ES,DS,FS and GS. */
218 if ( __copy_from_user(&regs->esp, (void __user *)regs->esp, 24) )
219 domain_crash_synchronous();
220 }
221 else if ( unlikely(ring_0(regs)) )
222 {
223 domain_crash_synchronous();
224 }
225 else if ( !ring_1(regs) )
226 {
227 /* Return to ring 2/3: pop and restore ESP and SS. */
228 if ( __copy_from_user(&regs->esp, (void __user *)regs->esp, 8) )
229 domain_crash_synchronous();
230 }
232 /* No longer in NMI context. */
233 clear_bit(_VCPUF_nmi_masked, &current->vcpu_flags);
235 /* Restore upcall mask from supplied EFLAGS.IF. */
236 current->vcpu_info->evtchn_upcall_mask = !(eflags & X86_EFLAGS_IF);
238 /*
239 * The hypercall exit path will overwrite EAX with this return
240 * value.
241 */
242 return regs->eax;
243 }
245 BUILD_SMP_INTERRUPT(deferred_nmi, TRAP_deferred_nmi)
246 fastcall void smp_deferred_nmi(struct cpu_user_regs *regs)
247 {
248 asmlinkage void do_nmi(struct cpu_user_regs *);
249 ack_APIC_irq();
250 do_nmi(regs);
251 }
253 void __init percpu_traps_init(void)
254 {
255 struct tss_struct *tss = &doublefault_tss;
256 asmlinkage int hypercall(void);
258 if ( smp_processor_id() != 0 )
259 return;
261 /* CPU0 uses the master IDT. */
262 idt_tables[0] = idt_table;
264 /* The hypercall entry vector is only accessible from ring 1. */
265 _set_gate(idt_table+HYPERCALL_VECTOR, 14, 1, &hypercall);
267 set_intr_gate(TRAP_deferred_nmi, &deferred_nmi);
269 /*
270 * Make a separate task for double faults. This will get us debug output if
271 * we blow the kernel stack.
272 */
273 memset(tss, 0, sizeof(*tss));
274 tss->ds = __HYPERVISOR_DS;
275 tss->es = __HYPERVISOR_DS;
276 tss->ss = __HYPERVISOR_DS;
277 tss->esp = (unsigned long)
278 &doublefault_stack[DOUBLEFAULT_STACK_SIZE];
279 tss->__cr3 = __pa(idle_pg_table);
280 tss->cs = __HYPERVISOR_CS;
281 tss->eip = (unsigned long)do_double_fault;
282 tss->eflags = 2;
283 tss->bitmap = IOBMP_INVALID_OFFSET;
284 _set_tssldt_desc(
285 gdt_table + __DOUBLEFAULT_TSS_ENTRY - FIRST_RESERVED_GDT_ENTRY,
286 (unsigned long)tss, 235, 9);
288 set_task_gate(TRAP_double_fault, __DOUBLEFAULT_TSS_ENTRY<<3);
289 }
291 void init_int80_direct_trap(struct vcpu *v)
292 {
293 struct trap_info *ti = &v->arch.guest_context.trap_ctxt[0x80];
295 /*
296 * We can't virtualise interrupt gates, as there's no way to get
297 * the CPU to automatically clear the events_mask variable. Also we
298 * must ensure that the CS is safe to poke into an interrupt gate.
299 *
300 * When running with supervisor_mode_kernel enabled a direct trap
301 * to the guest OS cannot be used because the INT instruction will
302 * switch to the Xen stack and we need to swap back to the guest
303 * kernel stack before passing control to the system call entry point.
304 */
305 if ( TI_GET_IF(ti) || !guest_gate_selector_okay(ti->cs) ||
306 supervisor_mode_kernel )
307 {
308 v->arch.int80_desc.a = v->arch.int80_desc.b = 0;
309 return;
310 }
312 v->arch.int80_desc.a = (ti->cs << 16) | (ti->address & 0xffff);
313 v->arch.int80_desc.b =
314 (ti->address & 0xffff0000) | 0x8f00 | ((TI_GET_DPL(ti) & 3) << 13);
316 if ( v == current )
317 set_int80_direct_trap(v);
318 }
320 static long register_guest_callback(struct callback_register *reg)
321 {
322 long ret = 0;
323 struct vcpu *v = current;
325 fixup_guest_code_selector(reg->address.cs);
327 switch ( reg->type )
328 {
329 case CALLBACKTYPE_event:
330 v->arch.guest_context.event_callback_cs = reg->address.cs;
331 v->arch.guest_context.event_callback_eip = reg->address.eip;
332 break;
334 case CALLBACKTYPE_failsafe:
335 v->arch.guest_context.failsafe_callback_cs = reg->address.cs;
336 v->arch.guest_context.failsafe_callback_eip = reg->address.eip;
337 break;
339 default:
340 ret = -EINVAL;
341 break;
342 }
344 return ret;
345 }
347 static long unregister_guest_callback(struct callback_unregister *unreg)
348 {
349 long ret;
351 switch ( unreg->type )
352 {
353 default:
354 ret = -EINVAL;
355 break;
356 }
358 return ret;
359 }
362 long do_callback_op(int cmd, GUEST_HANDLE(void) arg)
363 {
364 long ret;
366 switch ( cmd )
367 {
368 case CALLBACKOP_register:
369 {
370 struct callback_register reg;
372 ret = -EFAULT;
373 if ( copy_from_guest(&reg, arg, 1) )
374 break;
376 ret = register_guest_callback(&reg);
377 }
378 break;
380 case CALLBACKOP_unregister:
381 {
382 struct callback_unregister unreg;
384 ret = -EFAULT;
385 if ( copy_from_guest(&unreg, arg, 1) )
386 break;
388 ret = unregister_guest_callback(&unreg);
389 }
390 break;
392 default:
393 ret = -EINVAL;
394 break;
395 }
397 return ret;
398 }
400 long do_set_callbacks(unsigned long event_selector,
401 unsigned long event_address,
402 unsigned long failsafe_selector,
403 unsigned long failsafe_address)
404 {
405 struct callback_register event = {
406 .type = CALLBACKTYPE_event,
407 .address = { event_selector, event_address },
408 };
409 struct callback_register failsafe = {
410 .type = CALLBACKTYPE_failsafe,
411 .address = { failsafe_selector, failsafe_address },
412 };
414 register_guest_callback(&event);
415 register_guest_callback(&failsafe);
417 return 0;
418 }
420 static void hypercall_page_initialise_ring0_kernel(void *hypercall_page)
421 {
422 extern asmlinkage int hypercall(void);
423 char *p;
424 int i;
426 /* Fill in all the transfer points with template machine code. */
428 for ( i = 0; i < NR_hypercalls; i++ )
429 {
430 p = (char *)(hypercall_page + (i * 32));
432 *(u8 *)(p+ 0) = 0x9c; /* pushf */
433 *(u8 *)(p+ 1) = 0xfa; /* cli */
434 *(u8 *)(p+ 2) = 0xb8; /* mov $<i>,%eax */
435 *(u32 *)(p+ 3) = i;
436 *(u8 *)(p+ 7) = 0x9a; /* lcall $__HYPERVISOR_CS,&hypercall */
437 *(u32 *)(p+ 8) = (u32)&hypercall;
438 *(u16 *)(p+12) = (u16)__HYPERVISOR_CS;
439 *(u8 *)(p+14) = 0xc3; /* ret */
440 }
442 /*
443 * HYPERVISOR_iret is special because it doesn't return and expects a
444 * special stack frame. Guests jump at this transfer point instead of
445 * calling it.
446 */
447 p = (char *)(hypercall_page + (__HYPERVISOR_iret * 32));
448 *(u8 *)(p+ 0) = 0x50; /* push %eax */
449 *(u8 *)(p+ 1) = 0x9c; /* pushf */
450 *(u8 *)(p+ 2) = 0xfa; /* cli */
451 *(u8 *)(p+ 3) = 0xb8; /* mov $<i>,%eax */
452 *(u32 *)(p+ 4) = __HYPERVISOR_iret;
453 *(u8 *)(p+ 8) = 0x9a; /* lcall $__HYPERVISOR_CS,&hypercall */
454 *(u32 *)(p+ 9) = (u32)&hypercall;
455 *(u16 *)(p+13) = (u16)__HYPERVISOR_CS;
456 }
458 static void hypercall_page_initialise_ring1_kernel(void *hypercall_page)
459 {
460 char *p;
461 int i;
463 /* Fill in all the transfer points with template machine code. */
465 for ( i = 0; i < (PAGE_SIZE / 32); i++ )
466 {
467 p = (char *)(hypercall_page + (i * 32));
468 *(u8 *)(p+ 0) = 0xb8; /* mov $<i>,%eax */
469 *(u32 *)(p+ 1) = i;
470 *(u16 *)(p+ 5) = 0x82cd; /* int $0x82 */
471 *(u8 *)(p+ 7) = 0xc3; /* ret */
472 }
474 /*
475 * HYPERVISOR_iret is special because it doesn't return and expects a
476 * special stack frame. Guests jump at this transfer point instead of
477 * calling it.
478 */
479 p = (char *)(hypercall_page + (__HYPERVISOR_iret * 32));
480 *(u8 *)(p+ 0) = 0x50; /* push %eax */
481 *(u8 *)(p+ 1) = 0xb8; /* mov $__HYPERVISOR_iret,%eax */
482 *(u32 *)(p+ 2) = __HYPERVISOR_iret;
483 *(u16 *)(p+ 6) = 0x82cd; /* int $0x82 */
484 }
486 void hypercall_page_initialise(void *hypercall_page)
487 {
488 if ( supervisor_mode_kernel )
489 hypercall_page_initialise_ring0_kernel(hypercall_page);
490 else
491 hypercall_page_initialise_ring1_kernel(hypercall_page);
492 }
494 /*
495 * Local variables:
496 * mode: C
497 * c-set-style: "BSD"
498 * c-basic-offset: 4
499 * tab-width: 4
500 * indent-tabs-mode: nil
501 * End:
502 */