direct-io.hg

view xen/arch/x86/x86_64/compat/traps.c @ 14445:522a1cd17b6d

[XEN] Implement faster int 0x80 handling for compat mode guests.

Using the GPF handler to spot the software interrupt and pass it back
to the guest increases the base syscall time by a factor of 2.7
compared with 32on32 using direct trap to ring 1. (0.3270->0.8680
microseconds, measured with lmbench lat_syscall).

Since the 64 bit IDT can only contain 64 bit segment selectors we
cannot trap directly to compat mode ring 1. However implementing a
dedicated 64 bit ring 0 trap handler allows us to avoid much of the
GPF handler overhead and reduces the overhead to 1.7 times
(0.3270->0.5497 microseconds).

Signed-off-by: Ian Campbell <ian.campbell@xensource.com>
author Ian Campbell <ian.campbell@xensource.com>
date Tue Mar 20 14:33:15 2007 +0000 (2007-03-20)
parents 5442b2458e1b
children f830c5719e74
line source
1 #ifdef CONFIG_COMPAT
3 #include <xen/event.h>
4 #include <asm/regs.h>
5 #include <compat/callback.h>
6 #include <compat/arch-x86_32.h>
8 void compat_show_guest_stack(struct cpu_user_regs *regs, int debug_stack_lines)
9 {
10 unsigned int i, *stack, addr;
12 stack = (unsigned int *)(unsigned long)regs->_esp;
13 printk("Guest stack trace from esp=%08lx:\n ", (unsigned long)stack);
15 for ( i = 0; i < debug_stack_lines * 8; i++ )
16 {
17 if ( (((long)stack + 3) & (STACK_SIZE - 4)) == 0 )
18 break;
19 if ( get_user(addr, stack) )
20 {
21 if ( i != 0 )
22 printk("\n ");
23 printk("Fault while accessing guest memory.");
24 i = 1;
25 break;
26 }
27 if ( (i != 0) && ((i % 8) == 0) )
28 printk("\n ");
29 printk(" %08x", addr);
30 stack++;
31 }
32 if ( i == 0 )
33 printk("Stack empty.");
34 printk("\n");
35 }
37 unsigned int compat_iret(void)
38 {
39 struct cpu_user_regs *regs = guest_cpu_user_regs();
40 u32 eflags;
42 /* Restore EAX (clobbered by hypercall). */
43 if ( unlikely(__get_user(regs->_eax, (u32 __user *)regs->rsp)) )
44 goto exit_and_crash;
46 /* Restore CS and EIP. */
47 if ( unlikely(__get_user(regs->_eip, (u32 __user *)regs->rsp + 1)) ||
48 unlikely(__get_user(regs->cs, (u32 __user *)regs->rsp + 2)) )
49 goto exit_and_crash;
51 /*
52 * Fix up and restore EFLAGS. We fix up in a local staging area
53 * to avoid firing the BUG_ON(IOPL) check in arch_get_info_guest.
54 */
55 if ( unlikely(__get_user(eflags, (u32 __user *)regs->rsp + 3)) )
56 goto exit_and_crash;
57 regs->_eflags = (eflags & ~X86_EFLAGS_IOPL) | X86_EFLAGS_IF;
59 if ( unlikely(eflags & X86_EFLAGS_VM) )
60 {
61 /*
62 * Cannot return to VM86 mode: inject a GP fault instead. Note that
63 * the GP fault is reported on the first VM86 mode instruction, not on
64 * the IRET (which is why we can simply leave the stack frame as-is
65 * (except for perhaps having to copy it), which in turn seems better
66 * than teaching create_bounce_frame() to needlessly deal with vm86
67 * mode frames).
68 */
69 const struct trap_info *ti;
70 u32 x, ksp = current->arch.guest_context.kernel_sp - 40;
71 unsigned int i;
72 int rc = 0;
74 gdprintk(XENLOG_ERR, "VM86 mode unavailable (ksp:%08X->%08X)\n",
75 regs->_esp, ksp);
76 if ( ksp < regs->_esp )
77 {
78 for (i = 1; i < 10; ++i)
79 {
80 rc |= __get_user(x, (u32 __user *)regs->rsp + i);
81 rc |= __put_user(x, (u32 __user *)(unsigned long)ksp + i);
82 }
83 }
84 else if ( ksp > regs->_esp )
85 {
86 for (i = 9; i > 0; ++i)
87 {
88 rc |= __get_user(x, (u32 __user *)regs->rsp + i);
89 rc |= __put_user(x, (u32 __user *)(unsigned long)ksp + i);
90 }
91 }
92 if ( rc )
93 goto exit_and_crash;
94 regs->_esp = ksp;
95 regs->ss = current->arch.guest_context.kernel_ss;
97 ti = &current->arch.guest_context.trap_ctxt[13];
98 if ( TI_GET_IF(ti) )
99 eflags &= ~X86_EFLAGS_IF;
100 regs->_eflags = eflags & ~(X86_EFLAGS_VM|X86_EFLAGS_RF|
101 X86_EFLAGS_NT|X86_EFLAGS_TF);
103 if ( unlikely(__put_user(0, (u32 __user *)regs->rsp)) )
104 goto exit_and_crash;
105 regs->_eip = ti->address;
106 regs->cs = ti->cs;
107 }
108 else if ( unlikely(ring_0(regs)) )
109 goto exit_and_crash;
110 else if ( !ring_1(regs) )
111 {
112 /* Return to ring 2/3: restore ESP and SS. */
113 if ( __get_user(regs->ss, (u32 __user *)regs->rsp + 5)
114 || __get_user(regs->_esp, (u32 __user *)regs->rsp + 4))
115 goto exit_and_crash;
116 }
117 else
118 regs->_esp += 16;
120 /* No longer in NMI context. */
121 clear_bit(_VCPUF_nmi_masked, &current->vcpu_flags);
123 /* Restore upcall mask from supplied EFLAGS.IF. */
124 vcpu_info(current, evtchn_upcall_mask) = !(eflags & X86_EFLAGS_IF);
126 /*
127 * The hypercall exit path will overwrite EAX with this return
128 * value.
129 */
130 return regs->_eax;
132 exit_and_crash:
133 gdprintk(XENLOG_ERR, "Fatal error\n");
134 domain_crash(current->domain);
135 return 0;
136 }
138 static long compat_register_guest_callback(struct compat_callback_register *reg)
139 {
140 long ret = 0;
141 struct vcpu *v = current;
143 fixup_guest_code_selector(v->domain, reg->address.cs);
145 switch ( reg->type )
146 {
147 case CALLBACKTYPE_event:
148 v->arch.guest_context.event_callback_cs = reg->address.cs;
149 v->arch.guest_context.event_callback_eip = reg->address.eip;
150 break;
152 case CALLBACKTYPE_failsafe:
153 v->arch.guest_context.failsafe_callback_cs = reg->address.cs;
154 v->arch.guest_context.failsafe_callback_eip = reg->address.eip;
155 if ( reg->flags & CALLBACKF_mask_events )
156 set_bit(_VGCF_failsafe_disables_events,
157 &v->arch.guest_context.flags);
158 else
159 clear_bit(_VGCF_failsafe_disables_events,
160 &v->arch.guest_context.flags);
161 break;
163 case CALLBACKTYPE_nmi:
164 ret = register_guest_nmi_callback(reg->address.eip);
165 break;
167 default:
168 ret = -EINVAL;
169 break;
170 }
172 return ret;
173 }
175 static long compat_unregister_guest_callback(struct compat_callback_unregister *unreg)
176 {
177 long ret;
179 switch ( unreg->type )
180 {
181 case CALLBACKTYPE_nmi:
182 ret = unregister_guest_nmi_callback();
183 break;
185 default:
186 ret = -EINVAL;
187 break;
188 }
190 return ret;
191 }
194 long compat_callback_op(int cmd, XEN_GUEST_HANDLE(void) arg)
195 {
196 long ret;
198 switch ( cmd )
199 {
200 case CALLBACKOP_register:
201 {
202 struct compat_callback_register reg;
204 ret = -EFAULT;
205 if ( copy_from_guest(&reg, arg, 1) )
206 break;
208 ret = compat_register_guest_callback(&reg);
209 }
210 break;
212 case CALLBACKOP_unregister:
213 {
214 struct compat_callback_unregister unreg;
216 ret = -EFAULT;
217 if ( copy_from_guest(&unreg, arg, 1) )
218 break;
220 ret = compat_unregister_guest_callback(&unreg);
221 }
222 break;
224 default:
225 ret = -EINVAL;
226 break;
227 }
229 return ret;
230 }
232 long compat_set_callbacks(unsigned long event_selector,
233 unsigned long event_address,
234 unsigned long failsafe_selector,
235 unsigned long failsafe_address)
236 {
237 struct compat_callback_register event = {
238 .type = CALLBACKTYPE_event,
239 .address = {
240 .cs = event_selector,
241 .eip = event_address
242 }
243 };
244 struct compat_callback_register failsafe = {
245 .type = CALLBACKTYPE_failsafe,
246 .address = {
247 .cs = failsafe_selector,
248 .eip = failsafe_address
249 }
250 };
252 compat_register_guest_callback(&event);
253 compat_register_guest_callback(&failsafe);
255 return 0;
256 }
258 DEFINE_XEN_GUEST_HANDLE(trap_info_compat_t);
260 int compat_set_trap_table(XEN_GUEST_HANDLE(trap_info_compat_t) traps)
261 {
262 struct compat_trap_info cur;
263 struct trap_info *dst = current->arch.guest_context.trap_ctxt;
264 long rc = 0;
266 /* If no table is presented then clear the entire virtual IDT. */
267 if ( guest_handle_is_null(traps) )
268 {
269 memset(dst, 0, 256 * sizeof(*dst));
270 return 0;
271 }
273 for ( ; ; )
274 {
275 if ( hypercall_preempt_check() )
276 {
277 rc = hypercall_create_continuation(
278 __HYPERVISOR_set_trap_table, "h", traps);
279 break;
280 }
282 if ( copy_from_guest(&cur, traps, 1) )
283 {
284 rc = -EFAULT;
285 break;
286 }
288 if ( cur.address == 0 )
289 break;
291 fixup_guest_code_selector(current->domain, cur.cs);
293 XLAT_trap_info(dst + cur.vector, &cur);
295 if ( cur.vector == 0x80 )
296 init_int80_direct_trap(current);
298 guest_handle_add_offset(traps, 1);
299 }
301 return rc;
302 }
304 #endif /* CONFIG_COMPAT */
306 static void hypercall_page_initialise_ring1_kernel(void *hypercall_page)
307 {
308 char *p;
309 int i;
311 /* Fill in all the transfer points with template machine code. */
313 for ( i = 0; i < (PAGE_SIZE / 32); i++ )
314 {
315 p = (char *)(hypercall_page + (i * 32));
316 *(u8 *)(p+ 0) = 0xb8; /* mov $<i>,%eax */
317 *(u32 *)(p+ 1) = i;
318 *(u16 *)(p+ 5) = 0x82cd; /* int $0x82 */
319 *(u8 *)(p+ 7) = 0xc3; /* ret */
320 }
322 /*
323 * HYPERVISOR_iret is special because it doesn't return and expects a
324 * special stack frame. Guests jump at this transfer point instead of
325 * calling it.
326 */
327 p = (char *)(hypercall_page + (__HYPERVISOR_iret * 32));
328 *(u8 *)(p+ 0) = 0x50; /* push %eax */
329 *(u8 *)(p+ 1) = 0xb8; /* mov $__HYPERVISOR_iret,%eax */
330 *(u32 *)(p+ 2) = __HYPERVISOR_iret;
331 *(u16 *)(p+ 6) = 0x82cd; /* int $0x82 */
332 }
334 /*
335 * Local variables:
336 * mode: C
337 * c-set-style: "BSD"
338 * c-basic-offset: 4
339 * tab-width: 4
340 * indent-tabs-mode: nil
341 * End:
342 */