ia64/xen-unstable

view xen/arch/x86/x86_64/traps.c @ 10892:0d2ba35c0cf2

[XEN] Add hypercall support for HVM guests. This is
fairly useless at the moment, since all of the hypercalls
fail, since copy_from_user doesn't work correctly in HVM
domains.

Signed-off-by: Steven Smith <ssmith@xensource.com>

Add a CPUID hypervisor platform interface at leaf
0x40000000. Allow hypercall transfer page to be filled
in via MSR 0x40000000.

Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Tue Aug 01 17:18:05 2006 +0100 (2006-08-01)
parents b786bfb058eb
children 16aa4b417c6b
line source
2 #include <xen/config.h>
3 #include <xen/version.h>
4 #include <xen/init.h>
5 #include <xen/sched.h>
6 #include <xen/lib.h>
7 #include <xen/errno.h>
8 #include <xen/mm.h>
9 #include <xen/irq.h>
10 #include <xen/symbols.h>
11 #include <xen/console.h>
12 #include <xen/sched.h>
13 #include <xen/reboot.h>
14 #include <xen/nmi.h>
15 #include <asm/current.h>
16 #include <asm/flushtlb.h>
17 #include <asm/msr.h>
18 #include <asm/shadow.h>
19 #include <asm/hvm/hvm.h>
20 #include <asm/hvm/support.h>
22 #include <public/callback.h>
24 void show_registers(struct cpu_user_regs *regs)
25 {
26 struct cpu_user_regs fault_regs = *regs;
27 unsigned long fault_crs[8];
28 char taint_str[TAINT_STRING_MAX_LEN];
29 const char *context;
31 if ( hvm_guest(current) && guest_mode(regs) )
32 {
33 context = "hvm";
34 hvm_store_cpu_guest_regs(current, &fault_regs, fault_crs);
35 }
36 else
37 {
38 context = guest_mode(regs) ? "guest" : "hypervisor";
39 fault_crs[0] = read_cr0();
40 fault_crs[3] = read_cr3();
41 fault_regs.ds = read_segment_register(ds);
42 fault_regs.es = read_segment_register(es);
43 fault_regs.fs = read_segment_register(fs);
44 fault_regs.gs = read_segment_register(gs);
45 }
47 printk("----[ Xen-%d.%d%s %s ]----\n",
48 xen_major_version(), xen_minor_version(), xen_extra_version(),
49 print_tainted(taint_str));
50 printk("CPU: %d\nRIP: %04x:[<%016lx>]",
51 smp_processor_id(), fault_regs.cs, fault_regs.rip);
52 if ( !guest_mode(regs) )
53 print_symbol(" %s", fault_regs.rip);
54 printk("\nRFLAGS: %016lx CONTEXT: %s\n", fault_regs.rflags, context);
55 printk("rax: %016lx rbx: %016lx rcx: %016lx\n",
56 fault_regs.rax, fault_regs.rbx, fault_regs.rcx);
57 printk("rdx: %016lx rsi: %016lx rdi: %016lx\n",
58 fault_regs.rdx, fault_regs.rsi, fault_regs.rdi);
59 printk("rbp: %016lx rsp: %016lx r8: %016lx\n",
60 fault_regs.rbp, fault_regs.rsp, fault_regs.r8);
61 printk("r9: %016lx r10: %016lx r11: %016lx\n",
62 fault_regs.r9, fault_regs.r10, fault_regs.r11);
63 printk("r12: %016lx r13: %016lx r14: %016lx\n",
64 fault_regs.r12, fault_regs.r13, fault_regs.r14);
65 printk("r15: %016lx cr0: %016lx cr3: %016lx\n",
66 fault_regs.r15, fault_crs[0], fault_crs[3]);
67 printk("ds: %04x es: %04x fs: %04x gs: %04x "
68 "ss: %04x cs: %04x\n",
69 fault_regs.ds, fault_regs.es, fault_regs.fs,
70 fault_regs.gs, fault_regs.ss, fault_regs.cs);
71 }
73 void show_page_walk(unsigned long addr)
74 {
75 unsigned long pfn, mfn = read_cr3() >> PAGE_SHIFT;
76 l4_pgentry_t l4e, *l4t;
77 l3_pgentry_t l3e, *l3t;
78 l2_pgentry_t l2e, *l2t;
79 l1_pgentry_t l1e, *l1t;
81 printk("Pagetable walk from %016lx:\n", addr);
83 l4t = mfn_to_virt(mfn);
84 l4e = l4t[l4_table_offset(addr)];
85 mfn = l4e_get_pfn(l4e);
86 pfn = get_gpfn_from_mfn(mfn);
87 printk(" L4 = %"PRIpte" %016lx\n", l4e_get_intpte(l4e), pfn);
88 if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) )
89 return;
91 l3t = mfn_to_virt(mfn);
92 l3e = l3t[l3_table_offset(addr)];
93 mfn = l3e_get_pfn(l3e);
94 pfn = get_gpfn_from_mfn(mfn);
95 printk(" L3 = %"PRIpte" %016lx\n", l3e_get_intpte(l3e), pfn);
96 if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
97 return;
99 l2t = mfn_to_virt(mfn);
100 l2e = l2t[l2_table_offset(addr)];
101 mfn = l2e_get_pfn(l2e);
102 pfn = get_gpfn_from_mfn(mfn);
103 printk(" L2 = %"PRIpte" %016lx %s\n", l2e_get_intpte(l2e), pfn,
104 (l2e_get_flags(l2e) & _PAGE_PSE) ? "(PSE)" : "");
105 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) ||
106 (l2e_get_flags(l2e) & _PAGE_PSE) )
107 return;
109 l1t = mfn_to_virt(mfn);
110 l1e = l1t[l1_table_offset(addr)];
111 mfn = l1e_get_pfn(l1e);
112 pfn = get_gpfn_from_mfn(mfn);
113 printk(" L1 = %"PRIpte" %016lx\n", l1e_get_intpte(l1e), pfn);
114 }
116 asmlinkage void double_fault(void);
117 asmlinkage void do_double_fault(struct cpu_user_regs *regs)
118 {
119 unsigned int cpu, tr;
120 char taint_str[TAINT_STRING_MAX_LEN];
122 asm ( "str %0" : "=r" (tr) );
123 cpu = ((tr >> 3) - __FIRST_TSS_ENTRY) >> 2;
125 watchdog_disable();
127 console_force_unlock();
129 /* Find information saved during fault and dump it to the console. */
130 printk("*** DOUBLE FAULT: Xen-%d.%d%s %s\n",
131 xen_major_version(), xen_minor_version(), xen_extra_version(),
132 print_tainted(taint_str));
133 printk("CPU: %d\nRIP: %04x:[<%016lx>]",
134 cpu, regs->cs, regs->rip);
135 print_symbol(" %s", regs->rip);
136 printk("\nRFLAGS: %016lx\n", regs->rflags);
137 printk("rax: %016lx rbx: %016lx rcx: %016lx\n",
138 regs->rax, regs->rbx, regs->rcx);
139 printk("rdx: %016lx rsi: %016lx rdi: %016lx\n",
140 regs->rdx, regs->rsi, regs->rdi);
141 printk("rbp: %016lx rsp: %016lx r8: %016lx\n",
142 regs->rbp, regs->rsp, regs->r8);
143 printk("r9: %016lx r10: %016lx r11: %016lx\n",
144 regs->r9, regs->r10, regs->r11);
145 printk("r12: %016lx r13: %016lx r14: %016lx\n",
146 regs->r12, regs->r13, regs->r14);
147 printk("r15: %016lx\n", regs->r15);
148 show_stack_overflow(regs->rsp);
149 printk("************************************\n");
150 printk("CPU%d DOUBLE FAULT -- system shutdown\n", cpu);
151 printk("System needs manual reset.\n");
152 printk("************************************\n");
154 /* Lock up the console to prevent spurious output from other CPUs. */
155 console_force_lock();
157 /* Wait for manual reset. */
158 machine_halt();
159 }
161 void toggle_guest_mode(struct vcpu *v)
162 {
163 v->arch.flags ^= TF_kernel_mode;
164 __asm__ __volatile__ ( "swapgs" );
165 update_pagetables(v);
166 write_ptbase(v);
167 }
169 unsigned long do_iret(void)
170 {
171 struct cpu_user_regs *regs = guest_cpu_user_regs();
172 struct iret_context iret_saved;
173 struct vcpu *v = current;
175 if ( unlikely(copy_from_user(&iret_saved, (void *)regs->rsp,
176 sizeof(iret_saved))) )
177 {
178 DPRINTK("Fault while reading IRET context from guest stack\n");
179 domain_crash_synchronous();
180 }
182 /* Returning to user mode? */
183 if ( (iret_saved.cs & 3) == 3 )
184 {
185 if ( unlikely(pagetable_is_null(v->arch.guest_table_user)) )
186 {
187 DPRINTK("Guest switching to user mode with no user page tables\n");
188 domain_crash_synchronous();
189 }
190 toggle_guest_mode(v);
191 }
193 regs->rip = iret_saved.rip;
194 regs->cs = iret_saved.cs | 3; /* force guest privilege */
195 regs->rflags = (iret_saved.rflags & ~(EF_IOPL|EF_VM)) | EF_IE;
196 regs->rsp = iret_saved.rsp;
197 regs->ss = iret_saved.ss | 3; /* force guest privilege */
199 if ( !(iret_saved.flags & VGCF_IN_SYSCALL) )
200 {
201 regs->entry_vector = 0;
202 regs->r11 = iret_saved.r11;
203 regs->rcx = iret_saved.rcx;
204 }
206 /* No longer in NMI context. */
207 clear_bit(_VCPUF_nmi_masked, &current->vcpu_flags);
209 /* Restore upcall mask from supplied EFLAGS.IF. */
210 current->vcpu_info->evtchn_upcall_mask = !(iret_saved.rflags & EF_IE);
212 /* Saved %rax gets written back to regs->rax in entry.S. */
213 return iret_saved.rax;
214 }
216 asmlinkage void syscall_enter(void);
217 void __init percpu_traps_init(void)
218 {
219 char *stack_bottom, *stack;
220 int cpu = smp_processor_id();
222 if ( cpu == 0 )
223 {
224 /* Specify dedicated interrupt stacks for NMIs and double faults. */
225 set_intr_gate(TRAP_double_fault, &double_fault);
226 idt_table[TRAP_double_fault].a |= 1UL << 32; /* IST1 */
227 idt_table[TRAP_nmi].a |= 2UL << 32; /* IST2 */
228 }
230 stack_bottom = (char *)get_stack_bottom();
231 stack = (char *)((unsigned long)stack_bottom & ~(STACK_SIZE - 1));
233 /* Double-fault handler has its own per-CPU 1kB stack. */
234 init_tss[cpu].ist[0] = (unsigned long)&stack[1024];
236 /* NMI handler has its own per-CPU 1kB stack. */
237 init_tss[cpu].ist[1] = (unsigned long)&stack[2048];
239 /*
240 * Trampoline for SYSCALL entry from long mode.
241 */
243 /* Skip the NMI and DF stacks. */
244 stack = &stack[2048];
245 wrmsr(MSR_LSTAR, (unsigned long)stack, ((unsigned long)stack>>32));
247 /* movq %rsp, saversp(%rip) */
248 stack[0] = 0x48;
249 stack[1] = 0x89;
250 stack[2] = 0x25;
251 *(u32 *)&stack[3] = (stack_bottom - &stack[7]) - 16;
253 /* leaq saversp(%rip), %rsp */
254 stack[7] = 0x48;
255 stack[8] = 0x8d;
256 stack[9] = 0x25;
257 *(u32 *)&stack[10] = (stack_bottom - &stack[14]) - 16;
259 /* pushq %r11 */
260 stack[14] = 0x41;
261 stack[15] = 0x53;
263 /* pushq $__GUEST_CS64 */
264 stack[16] = 0x68;
265 *(u32 *)&stack[17] = __GUEST_CS64;
267 /* jmp syscall_enter */
268 stack[21] = 0xe9;
269 *(u32 *)&stack[22] = (char *)syscall_enter - &stack[26];
271 /*
272 * Trampoline for SYSCALL entry from compatibility mode.
273 */
275 /* Skip the long-mode entry trampoline. */
276 stack = &stack[26];
277 wrmsr(MSR_CSTAR, (unsigned long)stack, ((unsigned long)stack>>32));
279 /* movq %rsp, saversp(%rip) */
280 stack[0] = 0x48;
281 stack[1] = 0x89;
282 stack[2] = 0x25;
283 *(u32 *)&stack[3] = (stack_bottom - &stack[7]) - 16;
285 /* leaq saversp(%rip), %rsp */
286 stack[7] = 0x48;
287 stack[8] = 0x8d;
288 stack[9] = 0x25;
289 *(u32 *)&stack[10] = (stack_bottom - &stack[14]) - 16;
291 /* pushq %r11 */
292 stack[14] = 0x41;
293 stack[15] = 0x53;
295 /* pushq $__GUEST_CS32 */
296 stack[16] = 0x68;
297 *(u32 *)&stack[17] = __GUEST_CS32;
299 /* jmp syscall_enter */
300 stack[21] = 0xe9;
301 *(u32 *)&stack[22] = (char *)syscall_enter - &stack[26];
303 /*
304 * Common SYSCALL parameters.
305 */
307 wrmsr(MSR_STAR, 0, (FLAT_RING3_CS32<<16) | __HYPERVISOR_CS);
308 wrmsr(MSR_SYSCALL_MASK, EF_VM|EF_RF|EF_NT|EF_DF|EF_IE|EF_TF, 0U);
309 }
311 static long register_guest_callback(struct callback_register *reg)
312 {
313 long ret = 0;
314 struct vcpu *v = current;
316 switch ( reg->type )
317 {
318 case CALLBACKTYPE_event:
319 v->arch.guest_context.event_callback_eip = reg->address;
320 break;
322 case CALLBACKTYPE_failsafe:
323 v->arch.guest_context.failsafe_callback_eip = reg->address;
324 if ( reg->flags & CALLBACKF_mask_events )
325 set_bit(_VGCF_failsafe_disables_events,
326 &v->arch.guest_context.flags);
327 else
328 clear_bit(_VGCF_failsafe_disables_events,
329 &v->arch.guest_context.flags);
330 break;
332 case CALLBACKTYPE_syscall:
333 v->arch.guest_context.syscall_callback_eip = reg->address;
334 if ( reg->flags & CALLBACKF_mask_events )
335 set_bit(_VGCF_syscall_disables_events,
336 &v->arch.guest_context.flags);
337 else
338 clear_bit(_VGCF_syscall_disables_events,
339 &v->arch.guest_context.flags);
340 break;
342 case CALLBACKTYPE_nmi:
343 ret = register_guest_nmi_callback(reg->address);
344 break;
346 default:
347 ret = -EINVAL;
348 break;
349 }
351 return ret;
352 }
354 static long unregister_guest_callback(struct callback_unregister *unreg)
355 {
356 long ret;
358 switch ( unreg->type )
359 {
360 case CALLBACKTYPE_nmi:
361 ret = unregister_guest_nmi_callback();
362 break;
364 default:
365 ret = -EINVAL;
366 break;
367 }
369 return ret;
370 }
373 long do_callback_op(int cmd, XEN_GUEST_HANDLE(void) arg)
374 {
375 long ret;
377 switch ( cmd )
378 {
379 case CALLBACKOP_register:
380 {
381 struct callback_register reg;
383 ret = -EFAULT;
384 if ( copy_from_guest(&reg, arg, 1) )
385 break;
387 ret = register_guest_callback(&reg);
388 }
389 break;
391 case CALLBACKOP_unregister:
392 {
393 struct callback_unregister unreg;
395 ret = -EFAULT;
396 if ( copy_from_guest(&unreg, arg, 1) )
397 break;
399 ret = unregister_guest_callback(&unreg);
400 }
401 break;
403 default:
404 ret = -EINVAL;
405 break;
406 }
408 return ret;
409 }
411 long do_set_callbacks(unsigned long event_address,
412 unsigned long failsafe_address,
413 unsigned long syscall_address)
414 {
415 struct callback_register event = {
416 .type = CALLBACKTYPE_event,
417 .address = event_address,
418 };
419 struct callback_register failsafe = {
420 .type = CALLBACKTYPE_failsafe,
421 .address = failsafe_address,
422 };
423 struct callback_register syscall = {
424 .type = CALLBACKTYPE_syscall,
425 .address = syscall_address,
426 };
428 register_guest_callback(&event);
429 register_guest_callback(&failsafe);
430 register_guest_callback(&syscall);
432 return 0;
433 }
435 static void hypercall_page_initialise_ring3_kernel(void *hypercall_page)
436 {
437 char *p;
438 int i;
440 /* Fill in all the transfer points with template machine code. */
441 for ( i = 0; i < (PAGE_SIZE / 32); i++ )
442 {
443 p = (char *)(hypercall_page + (i * 32));
444 *(u8 *)(p+ 0) = 0x51; /* push %rcx */
445 *(u16 *)(p+ 1) = 0x5341; /* push %r11 */
446 *(u8 *)(p+ 3) = 0xb8; /* mov $<i>,%eax */
447 *(u32 *)(p+ 4) = i;
448 *(u16 *)(p+ 8) = 0x050f; /* syscall */
449 *(u16 *)(p+10) = 0x5b41; /* pop %r11 */
450 *(u8 *)(p+12) = 0x59; /* pop %rcx */
451 *(u8 *)(p+13) = 0xc3; /* ret */
452 }
454 /*
455 * HYPERVISOR_iret is special because it doesn't return and expects a
456 * special stack frame. Guests jump at this transfer point instead of
457 * calling it.
458 */
459 p = (char *)(hypercall_page + (__HYPERVISOR_iret * 32));
460 *(u8 *)(p+ 0) = 0x51; /* push %rcx */
461 *(u16 *)(p+ 1) = 0x5341; /* push %r11 */
462 *(u8 *)(p+ 3) = 0x50; /* push %rax */
463 *(u8 *)(p+ 4) = 0xb8; /* mov $__HYPERVISOR_iret,%eax */
464 *(u32 *)(p+ 5) = __HYPERVISOR_iret;
465 *(u16 *)(p+ 9) = 0x050f; /* syscall */
466 }
468 void hypercall_page_initialise(struct domain *d, void *hypercall_page)
469 {
470 if ( hvm_guest(d->vcpu[0]) )
471 hvm_hypercall_page_initialise(d, hypercall_page);
472 else
473 hypercall_page_initialise_ring3_kernel(hypercall_page);
474 }
476 /*
477 * Local variables:
478 * mode: C
479 * c-set-style: "BSD"
480 * c-basic-offset: 4
481 * tab-width: 4
482 * indent-tabs-mode: nil
483 * End:
484 */