ia64/xen-unstable

view xen/arch/x86/x86_32/traps.c @ 10892:0d2ba35c0cf2

[XEN] Add hypercall support for HVM guests. This is
fairly useless at the moment, since all of the hypercalls
fail, since copy_from_user doesn't work correctly in HVM
domains.

Signed-off-by: Steven Smith <ssmith@xensource.com>

Add a CPUID hypervisor platform interface at leaf
0x40000000. Allow hypercall transfer page to be filled
in via MSR 0x40000000.

Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Tue Aug 01 17:18:05 2006 +0100 (2006-08-01)
parents b786bfb058eb
children 16aa4b417c6b
line source
2 #include <xen/config.h>
3 #include <xen/version.h>
4 #include <xen/domain_page.h>
5 #include <xen/init.h>
6 #include <xen/sched.h>
7 #include <xen/lib.h>
8 #include <xen/console.h>
9 #include <xen/mm.h>
10 #include <xen/irq.h>
11 #include <xen/symbols.h>
12 #include <xen/reboot.h>
13 #include <xen/nmi.h>
14 #include <asm/current.h>
15 #include <asm/flushtlb.h>
16 #include <asm/hvm/hvm.h>
17 #include <asm/hvm/support.h>
19 #include <public/callback.h>
21 /* All CPUs have their own IDT to allow int80 direct trap. */
22 idt_entry_t *idt_tables[NR_CPUS] = { 0 };
24 void show_registers(struct cpu_user_regs *regs)
25 {
26 struct cpu_user_regs fault_regs = *regs;
27 unsigned long fault_crs[8];
28 char taint_str[TAINT_STRING_MAX_LEN];
29 const char *context;
31 if ( hvm_guest(current) && guest_mode(regs) )
32 {
33 context = "hvm";
34 hvm_store_cpu_guest_regs(current, &fault_regs, fault_crs);
35 }
36 else
37 {
38 context = guest_mode(regs) ? "guest" : "hypervisor";
40 if ( !guest_mode(regs) )
41 {
42 fault_regs.esp = (unsigned long)&regs->esp;
43 fault_regs.ss = read_segment_register(ss);
44 fault_regs.ds = read_segment_register(ds);
45 fault_regs.es = read_segment_register(es);
46 fault_regs.fs = read_segment_register(fs);
47 fault_regs.gs = read_segment_register(gs);
48 }
50 fault_crs[0] = read_cr0();
51 fault_crs[3] = read_cr3();
52 }
54 printk("----[ Xen-%d.%d%s %s ]----\n",
55 xen_major_version(), xen_minor_version(), xen_extra_version(),
56 print_tainted(taint_str));
57 printk("CPU: %d\nEIP: %04x:[<%08x>]",
58 smp_processor_id(), fault_regs.cs, fault_regs.eip);
59 if ( !guest_mode(regs) )
60 print_symbol(" %s", fault_regs.eip);
61 printk("\nEFLAGS: %08x CONTEXT: %s\n", fault_regs.eflags, context);
62 printk("eax: %08x ebx: %08x ecx: %08x edx: %08x\n",
63 fault_regs.eax, fault_regs.ebx, fault_regs.ecx, fault_regs.edx);
64 printk("esi: %08x edi: %08x ebp: %08x esp: %08x\n",
65 fault_regs.esi, fault_regs.edi, fault_regs.ebp, fault_regs.esp);
66 printk("cr0: %08lx cr3: %08lx\n", fault_crs[0], fault_crs[3]);
67 printk("ds: %04x es: %04x fs: %04x gs: %04x "
68 "ss: %04x cs: %04x\n",
69 fault_regs.ds, fault_regs.es, fault_regs.fs,
70 fault_regs.gs, fault_regs.ss, fault_regs.cs);
71 }
73 void show_page_walk(unsigned long addr)
74 {
75 unsigned long pfn, mfn, cr3 = read_cr3();
76 #ifdef CONFIG_X86_PAE
77 l3_pgentry_t l3e, *l3t;
78 #endif
79 l2_pgentry_t l2e, *l2t;
80 l1_pgentry_t l1e, *l1t;
82 printk("Pagetable walk from %08lx:\n", addr);
84 mfn = cr3 >> PAGE_SHIFT;
86 #ifdef CONFIG_X86_PAE
87 l3t = map_domain_page(mfn);
88 l3t += (cr3 & 0xFE0UL) >> 3;
89 l3e = l3t[l3_table_offset(addr)];
90 mfn = l3e_get_pfn(l3e);
91 pfn = get_gpfn_from_mfn(mfn);
92 printk(" L3 = %"PRIpte" %08lx\n", l3e_get_intpte(l3e), pfn);
93 unmap_domain_page(l3t);
94 if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
95 return;
96 #endif
98 l2t = map_domain_page(mfn);
99 l2e = l2t[l2_table_offset(addr)];
100 mfn = l2e_get_pfn(l2e);
101 pfn = get_gpfn_from_mfn(mfn);
102 printk(" L2 = %"PRIpte" %08lx %s\n", l2e_get_intpte(l2e), pfn,
103 (l2e_get_flags(l2e) & _PAGE_PSE) ? "(PSE)" : "");
104 unmap_domain_page(l2t);
105 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) ||
106 (l2e_get_flags(l2e) & _PAGE_PSE) )
107 return;
109 l1t = map_domain_page(mfn);
110 l1e = l1t[l1_table_offset(addr)];
111 mfn = l1e_get_pfn(l1e);
112 pfn = get_gpfn_from_mfn(mfn);
113 printk(" L1 = %"PRIpte" %08lx\n", l1e_get_intpte(l1e), pfn);
114 unmap_domain_page(l1t);
115 }
117 #define DOUBLEFAULT_STACK_SIZE 1024
118 static struct tss_struct doublefault_tss;
119 static unsigned char doublefault_stack[DOUBLEFAULT_STACK_SIZE];
121 asmlinkage void do_double_fault(void)
122 {
123 struct tss_struct *tss = &doublefault_tss;
124 unsigned int cpu = ((tss->back_link>>3)-__FIRST_TSS_ENTRY)>>1;
125 char taint_str[TAINT_STRING_MAX_LEN];
127 watchdog_disable();
129 console_force_unlock();
131 /* Find information saved during fault and dump it to the console. */
132 tss = &init_tss[cpu];
133 printk("*** DOUBLE FAULT: Xen-%d.%d%s %s\n",
134 xen_major_version(), xen_minor_version(), xen_extra_version(),
135 print_tainted(taint_str));
136 printk("CPU: %d\nEIP: %04x:[<%08x>]",
137 cpu, tss->cs, tss->eip);
138 print_symbol(" %s\n", tss->eip);
139 printk("EFLAGS: %08x\n", tss->eflags);
140 printk("CR3: %08x\n", tss->__cr3);
141 printk("eax: %08x ebx: %08x ecx: %08x edx: %08x\n",
142 tss->eax, tss->ebx, tss->ecx, tss->edx);
143 printk("esi: %08x edi: %08x ebp: %08x esp: %08x\n",
144 tss->esi, tss->edi, tss->ebp, tss->esp);
145 printk("ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n",
146 tss->ds, tss->es, tss->fs, tss->gs, tss->ss);
147 show_stack_overflow(tss->esp);
148 printk("************************************\n");
149 printk("CPU%d DOUBLE FAULT -- system shutdown\n", cpu);
150 printk("System needs manual reset.\n");
151 printk("************************************\n");
153 /* Lock up the console to prevent spurious output from other CPUs. */
154 console_force_lock();
156 /* Wait for manual reset. */
157 machine_halt();
158 }
160 unsigned long do_iret(void)
161 {
162 struct cpu_user_regs *regs = guest_cpu_user_regs();
163 u32 eflags;
165 /* Check worst-case stack frame for overlap with Xen protected area. */
166 if ( unlikely(!access_ok(regs->esp, 40)) )
167 domain_crash_synchronous();
169 /* Pop and restore EAX (clobbered by hypercall). */
170 if ( unlikely(__copy_from_user(&regs->eax, (void __user *)regs->esp, 4)) )
171 domain_crash_synchronous();
172 regs->esp += 4;
174 /* Pop and restore CS and EIP. */
175 if ( unlikely(__copy_from_user(&regs->eip, (void __user *)regs->esp, 8)) )
176 domain_crash_synchronous();
177 regs->esp += 8;
179 /*
180 * Pop, fix up and restore EFLAGS. We fix up in a local staging area
181 * to avoid firing the BUG_ON(IOPL) check in arch_getdomaininfo_ctxt.
182 */
183 if ( unlikely(__copy_from_user(&eflags, (void __user *)regs->esp, 4)) )
184 domain_crash_synchronous();
185 regs->esp += 4;
186 regs->eflags = (eflags & ~X86_EFLAGS_IOPL) | X86_EFLAGS_IF;
188 if ( vm86_mode(regs) )
189 {
190 /* Return to VM86 mode: pop and restore ESP,SS,ES,DS,FS and GS. */
191 if ( __copy_from_user(&regs->esp, (void __user *)regs->esp, 24) )
192 domain_crash_synchronous();
193 }
194 else if ( unlikely(ring_0(regs)) )
195 {
196 domain_crash_synchronous();
197 }
198 else if ( !ring_1(regs) )
199 {
200 /* Return to ring 2/3: pop and restore ESP and SS. */
201 if ( __copy_from_user(&regs->esp, (void __user *)regs->esp, 8) )
202 domain_crash_synchronous();
203 }
205 /* No longer in NMI context. */
206 clear_bit(_VCPUF_nmi_masked, &current->vcpu_flags);
208 /* Restore upcall mask from supplied EFLAGS.IF. */
209 current->vcpu_info->evtchn_upcall_mask = !(eflags & X86_EFLAGS_IF);
211 /*
212 * The hypercall exit path will overwrite EAX with this return
213 * value.
214 */
215 return regs->eax;
216 }
218 #include <asm/asm_defns.h>
219 BUILD_SMP_INTERRUPT(deferred_nmi, TRAP_deferred_nmi)
220 fastcall void smp_deferred_nmi(struct cpu_user_regs *regs)
221 {
222 asmlinkage void do_nmi(struct cpu_user_regs *);
223 ack_APIC_irq();
224 do_nmi(regs);
225 }
227 void __init percpu_traps_init(void)
228 {
229 struct tss_struct *tss = &doublefault_tss;
230 asmlinkage int hypercall(void);
232 if ( smp_processor_id() != 0 )
233 return;
235 /* CPU0 uses the master IDT. */
236 idt_tables[0] = idt_table;
238 /* The hypercall entry vector is only accessible from ring 1. */
239 _set_gate(idt_table+HYPERCALL_VECTOR, 14, 1, &hypercall);
241 set_intr_gate(TRAP_deferred_nmi, &deferred_nmi);
243 /*
244 * Make a separate task for double faults. This will get us debug output if
245 * we blow the kernel stack.
246 */
247 memset(tss, 0, sizeof(*tss));
248 tss->ds = __HYPERVISOR_DS;
249 tss->es = __HYPERVISOR_DS;
250 tss->ss = __HYPERVISOR_DS;
251 tss->esp = (unsigned long)
252 &doublefault_stack[DOUBLEFAULT_STACK_SIZE];
253 tss->__cr3 = __pa(idle_pg_table);
254 tss->cs = __HYPERVISOR_CS;
255 tss->eip = (unsigned long)do_double_fault;
256 tss->eflags = 2;
257 tss->bitmap = IOBMP_INVALID_OFFSET;
258 _set_tssldt_desc(
259 gdt_table + __DOUBLEFAULT_TSS_ENTRY - FIRST_RESERVED_GDT_ENTRY,
260 (unsigned long)tss, 235, 9);
262 set_task_gate(TRAP_double_fault, __DOUBLEFAULT_TSS_ENTRY<<3);
263 }
265 void init_int80_direct_trap(struct vcpu *v)
266 {
267 struct trap_info *ti = &v->arch.guest_context.trap_ctxt[0x80];
269 /*
270 * We can't virtualise interrupt gates, as there's no way to get
271 * the CPU to automatically clear the events_mask variable. Also we
272 * must ensure that the CS is safe to poke into an interrupt gate.
273 *
274 * When running with supervisor_mode_kernel enabled a direct trap
275 * to the guest OS cannot be used because the INT instruction will
276 * switch to the Xen stack and we need to swap back to the guest
277 * kernel stack before passing control to the system call entry point.
278 */
279 if ( TI_GET_IF(ti) || !guest_gate_selector_okay(ti->cs) ||
280 supervisor_mode_kernel )
281 {
282 v->arch.int80_desc.a = v->arch.int80_desc.b = 0;
283 return;
284 }
286 v->arch.int80_desc.a = (ti->cs << 16) | (ti->address & 0xffff);
287 v->arch.int80_desc.b =
288 (ti->address & 0xffff0000) | 0x8f00 | ((TI_GET_DPL(ti) & 3) << 13);
290 if ( v == current )
291 set_int80_direct_trap(v);
292 }
294 #ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL
295 static void do_update_sysenter(void *info)
296 {
297 xen_callback_t *address = info;
299 wrmsr(MSR_IA32_SYSENTER_CS, address->cs, 0);
300 wrmsr(MSR_IA32_SYSENTER_EIP, address->eip, 0);
301 }
302 #endif
304 static long register_guest_callback(struct callback_register *reg)
305 {
306 long ret = 0;
307 struct vcpu *v = current;
309 fixup_guest_code_selector(reg->address.cs);
311 switch ( reg->type )
312 {
313 case CALLBACKTYPE_event:
314 v->arch.guest_context.event_callback_cs = reg->address.cs;
315 v->arch.guest_context.event_callback_eip = reg->address.eip;
316 break;
318 case CALLBACKTYPE_failsafe:
319 v->arch.guest_context.failsafe_callback_cs = reg->address.cs;
320 v->arch.guest_context.failsafe_callback_eip = reg->address.eip;
321 if ( reg->flags & CALLBACKF_mask_events )
322 set_bit(_VGCF_failsafe_disables_events,
323 &v->arch.guest_context.flags);
324 else
325 clear_bit(_VGCF_failsafe_disables_events,
326 &v->arch.guest_context.flags);
327 break;
329 #ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL
330 case CALLBACKTYPE_sysenter:
331 if ( ! cpu_has_sep )
332 ret = -EINVAL;
333 else if ( on_each_cpu(do_update_sysenter, &reg->address, 1, 1) != 0 )
334 ret = -EIO;
335 break;
336 #endif
338 case CALLBACKTYPE_nmi:
339 ret = register_guest_nmi_callback(reg->address.eip);
340 break;
342 default:
343 ret = -EINVAL;
344 break;
345 }
347 return ret;
348 }
350 static long unregister_guest_callback(struct callback_unregister *unreg)
351 {
352 long ret;
354 switch ( unreg->type )
355 {
356 case CALLBACKTYPE_nmi:
357 ret = unregister_guest_nmi_callback();
358 break;
360 default:
361 ret = -EINVAL;
362 break;
363 }
365 return ret;
366 }
369 long do_callback_op(int cmd, XEN_GUEST_HANDLE(void) arg)
370 {
371 long ret;
373 switch ( cmd )
374 {
375 case CALLBACKOP_register:
376 {
377 struct callback_register reg;
379 ret = -EFAULT;
380 if ( copy_from_guest(&reg, arg, 1) )
381 break;
383 ret = register_guest_callback(&reg);
384 }
385 break;
387 case CALLBACKOP_unregister:
388 {
389 struct callback_unregister unreg;
391 ret = -EFAULT;
392 if ( copy_from_guest(&unreg, arg, 1) )
393 break;
395 ret = unregister_guest_callback(&unreg);
396 }
397 break;
399 default:
400 ret = -EINVAL;
401 break;
402 }
404 return ret;
405 }
407 long do_set_callbacks(unsigned long event_selector,
408 unsigned long event_address,
409 unsigned long failsafe_selector,
410 unsigned long failsafe_address)
411 {
412 struct callback_register event = {
413 .type = CALLBACKTYPE_event,
414 .address = { event_selector, event_address },
415 };
416 struct callback_register failsafe = {
417 .type = CALLBACKTYPE_failsafe,
418 .address = { failsafe_selector, failsafe_address },
419 };
421 register_guest_callback(&event);
422 register_guest_callback(&failsafe);
424 return 0;
425 }
427 static void hypercall_page_initialise_ring0_kernel(void *hypercall_page)
428 {
429 extern asmlinkage int hypercall(void);
430 char *p;
431 int i;
433 /* Fill in all the transfer points with template machine code. */
435 for ( i = 0; i < (PAGE_SIZE / 32); i++ )
436 {
437 p = (char *)(hypercall_page + (i * 32));
439 *(u8 *)(p+ 0) = 0x9c; /* pushf */
440 *(u8 *)(p+ 1) = 0xfa; /* cli */
441 *(u8 *)(p+ 2) = 0xb8; /* mov $<i>,%eax */
442 *(u32 *)(p+ 3) = i;
443 *(u8 *)(p+ 7) = 0x9a; /* lcall $__HYPERVISOR_CS,&hypercall */
444 *(u32 *)(p+ 8) = (u32)&hypercall;
445 *(u16 *)(p+12) = (u16)__HYPERVISOR_CS;
446 *(u8 *)(p+14) = 0xc3; /* ret */
447 }
449 /*
450 * HYPERVISOR_iret is special because it doesn't return and expects a
451 * special stack frame. Guests jump at this transfer point instead of
452 * calling it.
453 */
454 p = (char *)(hypercall_page + (__HYPERVISOR_iret * 32));
455 *(u8 *)(p+ 0) = 0x50; /* push %eax */
456 *(u8 *)(p+ 1) = 0x9c; /* pushf */
457 *(u8 *)(p+ 2) = 0xfa; /* cli */
458 *(u8 *)(p+ 3) = 0xb8; /* mov $<i>,%eax */
459 *(u32 *)(p+ 4) = __HYPERVISOR_iret;
460 *(u8 *)(p+ 8) = 0x9a; /* lcall $__HYPERVISOR_CS,&hypercall */
461 *(u32 *)(p+ 9) = (u32)&hypercall;
462 *(u16 *)(p+13) = (u16)__HYPERVISOR_CS;
463 }
465 static void hypercall_page_initialise_ring1_kernel(void *hypercall_page)
466 {
467 char *p;
468 int i;
470 /* Fill in all the transfer points with template machine code. */
472 for ( i = 0; i < (PAGE_SIZE / 32); i++ )
473 {
474 p = (char *)(hypercall_page + (i * 32));
475 *(u8 *)(p+ 0) = 0xb8; /* mov $<i>,%eax */
476 *(u32 *)(p+ 1) = i;
477 *(u16 *)(p+ 5) = 0x82cd; /* int $0x82 */
478 *(u8 *)(p+ 7) = 0xc3; /* ret */
479 }
481 /*
482 * HYPERVISOR_iret is special because it doesn't return and expects a
483 * special stack frame. Guests jump at this transfer point instead of
484 * calling it.
485 */
486 p = (char *)(hypercall_page + (__HYPERVISOR_iret * 32));
487 *(u8 *)(p+ 0) = 0x50; /* push %eax */
488 *(u8 *)(p+ 1) = 0xb8; /* mov $__HYPERVISOR_iret,%eax */
489 *(u32 *)(p+ 2) = __HYPERVISOR_iret;
490 *(u16 *)(p+ 6) = 0x82cd; /* int $0x82 */
491 }
493 void hypercall_page_initialise(struct domain *d, void *hypercall_page)
494 {
495 if ( hvm_guest(d->vcpu[0]) )
496 hvm_hypercall_page_initialise(d, hypercall_page);
497 else if ( supervisor_mode_kernel )
498 hypercall_page_initialise_ring0_kernel(hypercall_page);
499 else
500 hypercall_page_initialise_ring1_kernel(hypercall_page);
501 }
503 /*
504 * Local variables:
505 * mode: C
506 * c-set-style: "BSD"
507 * c-basic-offset: 4
508 * tab-width: 4
509 * indent-tabs-mode: nil
510 * End:
511 */