ia64/xen-unstable

changeset 16207:aeebd173c3fa

x86-64: syscall/sysenter support for 32-bit apps for both 32-bit apps
in 64-bit pv guests and 32on64.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Keir Fraser <keir@xensource.com>
author Keir Fraser <keir@xensource.com>
date Wed Oct 24 15:22:57 2007 +0100 (2007-10-24)
parents 7b5b65fbaf61
children 185a13c03255
files xen/arch/x86/cpu/amd.c xen/arch/x86/domain.c xen/arch/x86/traps.c xen/arch/x86/x86_32/traps.c xen/arch/x86/x86_64/asm-offsets.c xen/arch/x86/x86_64/compat/entry.S xen/arch/x86/x86_64/compat/traps.c xen/arch/x86/x86_64/entry.S xen/arch/x86/x86_64/traps.c xen/include/asm-x86/cpufeature.h xen/include/asm-x86/domain.h xen/include/public/callback.h xen/include/public/xen-compat.h
line diff
     1.1 --- a/xen/arch/x86/cpu/amd.c	Wed Oct 24 14:36:18 2007 +0100
     1.2 +++ b/xen/arch/x86/cpu/amd.c	Wed Oct 24 15:22:57 2007 +0100
     1.3 @@ -369,6 +369,11 @@ static void __init init_amd(struct cpuin
     1.4  	if (c->x86 < 6)
     1.5  		clear_bit(X86_FEATURE_MCE, c->x86_capability);
     1.6  
     1.7 +#ifdef __x86_64__
     1.8 +	/* AMD CPUs do not support SYSENTER outside of legacy mode. */
     1.9 +	clear_bit(X86_FEATURE_SEP, c->x86_capability);
    1.10 +#endif
    1.11 +
    1.12  	/* Prevent TSC drift in non single-processor, single-core platforms. */
    1.13  	if ((smp_processor_id() == 1) && c1_ramping_may_cause_clock_drift(c))
    1.14  		disable_c1_ramping();
     2.1 --- a/xen/arch/x86/domain.c	Wed Oct 24 14:36:18 2007 +0100
     2.2 +++ b/xen/arch/x86/domain.c	Wed Oct 24 15:22:57 2007 +0100
     2.3 @@ -418,6 +418,10 @@ int vcpu_initialise(struct vcpu *v)
     2.4      v->arch.perdomain_ptes =
     2.5          d->arch.mm_perdomain_pt + (v->vcpu_id << GDT_LDT_VCPU_SHIFT);
     2.6  
     2.7 +#ifdef __x86_64__
     2.8 +    v->arch.sysexit_cs = 3;
     2.9 +#endif
    2.10 +
    2.11      return (is_pv_32on64_vcpu(v) ? setup_compat_l4(v) : 0);
    2.12  }
    2.13  
    2.14 @@ -1298,12 +1302,10 @@ void context_switch(struct vcpu *prev, s
    2.15                is_pv_32on64_vcpu(prev) != is_pv_32on64_vcpu(next)) )
    2.16          {
    2.17              uint64_t efer = read_efer();
    2.18 -
    2.19 +            if ( !(efer & EFER_SCE) )
    2.20 +                write_efer(efer | EFER_SCE);
    2.21              flush_tlb_one_local(GDT_VIRT_START(next) +
    2.22                                  FIRST_RESERVED_GDT_BYTE);
    2.23 -
    2.24 -            if ( !is_pv_32on64_vcpu(next) == !(efer & EFER_SCE) )
    2.25 -                write_efer(efer ^ EFER_SCE);
    2.26          }
    2.27  #endif
    2.28  
     3.1 --- a/xen/arch/x86/traps.c	Wed Oct 24 14:36:18 2007 +0100
     3.2 +++ b/xen/arch/x86/traps.c	Wed Oct 24 15:22:57 2007 +0100
     3.3 @@ -617,16 +617,21 @@ static int emulate_forced_invalid_op(str
     3.4          clear_bit(X86_FEATURE_DE,  &d);
     3.5          clear_bit(X86_FEATURE_PSE, &d);
     3.6          clear_bit(X86_FEATURE_PGE, &d);
     3.7 +        if ( !cpu_has_sep )
     3.8 +            clear_bit(X86_FEATURE_SEP, &d);
     3.9 +#ifdef __i386__
    3.10          if ( !supervisor_mode_kernel )
    3.11              clear_bit(X86_FEATURE_SEP, &d);
    3.12 +#endif
    3.13          if ( !IS_PRIV(current->domain) )
    3.14              clear_bit(X86_FEATURE_MTRR, &d);
    3.15      }
    3.16      else if ( regs->eax == 0x80000001 )
    3.17      {
    3.18          /* Modify Feature Information. */
    3.19 -        if ( is_pv_32bit_vcpu(current) )
    3.20 -            clear_bit(X86_FEATURE_SYSCALL % 32, &d);
    3.21 +#ifdef __i386__
    3.22 +        clear_bit(X86_FEATURE_SYSCALL % 32, &d);
    3.23 +#endif
    3.24          clear_bit(X86_FEATURE_RDTSCP % 32, &d);
    3.25      }
    3.26      else
    3.27 @@ -2095,6 +2100,17 @@ asmlinkage int do_debug(struct cpu_user_
    3.28  
    3.29      if ( !guest_mode(regs) )
    3.30      {
    3.31 +#ifdef __x86_64__
    3.32 +        void sysenter_entry(void);
    3.33 +        void sysenter_eflags_saved(void);
    3.34 +        /* In SYSENTER entry path we cannot zap TF until EFLAGS is saved. */
    3.35 +        if ( (regs->rip >= (unsigned long)sysenter_entry) &&
    3.36 +             (regs->rip < (unsigned long)sysenter_eflags_saved) )
    3.37 +            goto out;
    3.38 +        WARN_ON(regs->rip != (unsigned long)sysenter_eflags_saved);
    3.39 +#else
    3.40 +        WARN_ON(1);
    3.41 +#endif
    3.42          /* Clear TF just for absolute sanity. */
    3.43          regs->eflags &= ~EF_TF;
    3.44          /*
     4.1 --- a/xen/arch/x86/x86_32/traps.c	Wed Oct 24 14:36:18 2007 +0100
     4.2 +++ b/xen/arch/x86/x86_32/traps.c	Wed Oct 24 15:22:57 2007 +0100
     4.3 @@ -355,12 +355,19 @@ static long register_guest_callback(stru
     4.4          break;
     4.5  
     4.6  #ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL
     4.7 -    case CALLBACKTYPE_sysenter:
     4.8 -        if ( ! cpu_has_sep )
     4.9 +    case CALLBACKTYPE_sysenter_deprecated:
    4.10 +        if ( !cpu_has_sep )
    4.11              ret = -EINVAL;
    4.12          else if ( on_each_cpu(do_update_sysenter, &reg->address, 1, 1) != 0 )
    4.13              ret = -EIO;
    4.14          break;
    4.15 +
    4.16 +    case CALLBACKTYPE_sysenter:
    4.17 +        if ( !cpu_has_sep )
    4.18 +            ret = -EINVAL;
    4.19 +        else
    4.20 +            do_update_sysenter(&reg->address);
    4.21 +        break;
    4.22  #endif
    4.23  
    4.24      case CALLBACKTYPE_nmi:
    4.25 @@ -384,6 +391,7 @@ static long unregister_guest_callback(st
    4.26      case CALLBACKTYPE_event:
    4.27      case CALLBACKTYPE_failsafe:
    4.28  #ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL
    4.29 +    case CALLBACKTYPE_sysenter_deprecated:
    4.30      case CALLBACKTYPE_sysenter:
    4.31  #endif
    4.32          ret = -EINVAL;
     5.1 --- a/xen/arch/x86/x86_64/asm-offsets.c	Wed Oct 24 14:36:18 2007 +0100
     5.2 +++ b/xen/arch/x86/x86_64/asm-offsets.c	Wed Oct 24 15:22:57 2007 +0100
     5.3 @@ -71,6 +71,20 @@ void __dummy__(void)
     5.4             arch.guest_context.failsafe_callback_cs);
     5.5      OFFSET(VCPU_syscall_addr, struct vcpu,
     5.6             arch.guest_context.syscall_callback_eip);
     5.7 +    OFFSET(VCPU_syscall32_addr, struct vcpu, arch.syscall32_callback_eip);
     5.8 +    OFFSET(VCPU_syscall32_sel, struct vcpu, arch.syscall32_callback_cs);
     5.9 +    OFFSET(VCPU_syscall32_disables_events, struct vcpu,
    5.10 +           arch.syscall32_disables_events);
    5.11 +    OFFSET(VCPU_sysenter_addr, struct vcpu, arch.sysenter_callback_eip);
    5.12 +    OFFSET(VCPU_sysenter_sel, struct vcpu, arch.sysenter_callback_cs);
    5.13 +    OFFSET(VCPU_sysenter_disables_events, struct vcpu,
    5.14 +           arch.sysenter_disables_events);
    5.15 +    OFFSET(VCPU_sysexit_addr, struct vcpu, arch.sysexit_eip);
    5.16 +    OFFSET(VCPU_sysexit_sel, struct vcpu, arch.sysexit_cs);
    5.17 +    OFFSET(VCPU_gp_fault_addr, struct vcpu,
    5.18 +           arch.guest_context.trap_ctxt[TRAP_gp_fault].address);
    5.19 +    OFFSET(VCPU_gp_fault_sel, struct vcpu,
    5.20 +           arch.guest_context.trap_ctxt[TRAP_gp_fault].cs);
    5.21      OFFSET(VCPU_kernel_sp, struct vcpu, arch.guest_context.kernel_sp);
    5.22      OFFSET(VCPU_kernel_ss, struct vcpu, arch.guest_context.kernel_ss);
    5.23      OFFSET(VCPU_guest_context_flags, struct vcpu, arch.guest_context.flags);
     6.1 --- a/xen/arch/x86/x86_64/compat/entry.S	Wed Oct 24 14:36:18 2007 +0100
     6.2 +++ b/xen/arch/x86/x86_64/compat/entry.S	Wed Oct 24 15:22:57 2007 +0100
     6.3 @@ -204,6 +204,39 @@ ENTRY(compat_post_handle_exception)
     6.4          movb  $0,TRAPBOUNCE_flags(%rdx)
     6.5          jmp   compat_test_all_events
     6.6  
     6.7 +ENTRY(compat_syscall)
     6.8 +        cmpb  $0,VCPU_syscall32_disables_events(%rbx)
     6.9 +        movzwl VCPU_syscall32_sel(%rbx),%esi
    6.10 +        movq  VCPU_syscall32_addr(%rbx),%rax
    6.11 +        setne %cl
    6.12 +        leaq  VCPU_trap_bounce(%rbx),%rdx
    6.13 +        testl $~3,%esi
    6.14 +        leal  (,%rcx,TBF_INTERRUPT),%ecx
    6.15 +        jz    2f
    6.16 +1:      movq  %rax,TRAPBOUNCE_eip(%rdx)
    6.17 +        movw  %si,TRAPBOUNCE_cs(%rdx)
    6.18 +        movb  %cl,TRAPBOUNCE_flags(%rdx)
    6.19 +        call  compat_create_bounce_frame
    6.20 +        jmp   compat_test_all_events
    6.21 +2:      movl  $TRAP_gp_fault,UREGS_entry_vector(%rsp)
    6.22 +        movq  VCPU_gp_fault_addr(%rbx),%rax
    6.23 +        movzwl VCPU_gp_fault_sel(%rbx),%esi
    6.24 +        movb  $(TBF_EXCEPTION|TBF_EXCEPTION_ERRCODE|TBF_INTERRUPT),%cl
    6.25 +        movl  $0,TRAPBOUNCE_error_code(%rdx)
    6.26 +        jmp   1b
    6.27 +
    6.28 +ENTRY(compat_sysenter)
    6.29 +        cmpl  $TRAP_gp_fault,UREGS_entry_vector(%rsp)
    6.30 +        movzwl VCPU_sysenter_sel(%rbx),%eax
    6.31 +        movzwl VCPU_gp_fault_sel(%rbx),%ecx
    6.32 +        cmovel %ecx,%eax
    6.33 +        testl $~3,%eax
    6.34 +        movl  $FLAT_COMPAT_USER_SS,UREGS_ss(%rsp)
    6.35 +        cmovzl %ecx,%eax
    6.36 +        movw  %ax,TRAPBOUNCE_cs(%rdx)
    6.37 +        call  compat_create_bounce_frame
    6.38 +        jmp   compat_test_all_events
    6.39 +
    6.40  ENTRY(compat_int80_direct_trap)
    6.41          call  compat_create_bounce_frame
    6.42          jmp   compat_test_all_events
    6.43 @@ -246,7 +279,7 @@ 2:
    6.44          setz  %ch                       # %ch == !saved_upcall_mask
    6.45          movl  UREGS_eflags+8(%rsp),%eax
    6.46          andl  $~X86_EFLAGS_IF,%eax
    6.47 -        shlb  $1,%ch                    # Bit 9 (EFLAGS.IF)
    6.48 +        addb  %ch,%ch                   # Bit 9 (EFLAGS.IF)
    6.49          orb   %ch,%ah                   # Fold EFLAGS.IF into %eax
    6.50  .Lft6:  movl  %eax,%fs:2*4(%rsi)        # EFLAGS
    6.51          movl  UREGS_rip+8(%rsp),%eax
     7.1 --- a/xen/arch/x86/x86_64/compat/traps.c	Wed Oct 24 14:36:18 2007 +0100
     7.2 +++ b/xen/arch/x86/x86_64/compat/traps.c	Wed Oct 24 15:22:57 2007 +0100
     7.3 @@ -165,12 +165,31 @@ static long compat_register_guest_callba
     7.4                        &v->arch.guest_context.flags);
     7.5          break;
     7.6  
     7.7 +    case CALLBACKTYPE_syscall32:
     7.8 +        v->arch.syscall32_callback_cs     = reg->address.cs;
     7.9 +        v->arch.syscall32_callback_eip    = reg->address.eip;
    7.10 +        v->arch.syscall32_disables_events =
    7.11 +            (reg->flags & CALLBACKF_mask_events) != 0;
    7.12 +        break;
    7.13 +
    7.14 +    case CALLBACKTYPE_sysenter:
    7.15 +        v->arch.sysenter_callback_cs     = reg->address.cs;
    7.16 +        v->arch.sysenter_callback_eip    = reg->address.eip;
    7.17 +        v->arch.sysenter_disables_events =
    7.18 +            (reg->flags & CALLBACKF_mask_events) != 0;
    7.19 +        break;
    7.20 +
    7.21 +    case CALLBACKTYPE_sysexit:
    7.22 +        v->arch.sysexit_cs  = reg->address.cs | 3;
    7.23 +        v->arch.sysexit_eip = reg->address.eip;
    7.24 +        break;
    7.25 +
    7.26      case CALLBACKTYPE_nmi:
    7.27          ret = register_guest_nmi_callback(reg->address.eip);
    7.28          break;
    7.29  
    7.30      default:
    7.31 -        ret = -EINVAL;
    7.32 +        ret = -ENOSYS;
    7.33          break;
    7.34      }
    7.35  
    7.36 @@ -184,12 +203,20 @@ static long compat_unregister_guest_call
    7.37  
    7.38      switch ( unreg->type )
    7.39      {
    7.40 +    case CALLBACKTYPE_event:
    7.41 +    case CALLBACKTYPE_failsafe:
    7.42 +    case CALLBACKTYPE_syscall32:
    7.43 +    case CALLBACKTYPE_sysenter:
    7.44 +    case CALLBACKTYPE_sysexit:
    7.45 +        ret = -EINVAL;
    7.46 +        break;
    7.47 +
    7.48      case CALLBACKTYPE_nmi:
    7.49          ret = unregister_guest_nmi_callback();
    7.50          break;
    7.51  
    7.52      default:
    7.53 -        ret = -EINVAL;
    7.54 +        ret = -ENOSYS;
    7.55          break;
    7.56      }
    7.57  
     8.1 --- a/xen/arch/x86/x86_64/entry.S	Wed Oct 24 14:36:18 2007 +0100
     8.2 +++ b/xen/arch/x86/x86_64/entry.S	Wed Oct 24 15:22:57 2007 +0100
     8.3 @@ -27,13 +27,20 @@
     8.4  /* %rbx: struct vcpu */
     8.5  switch_to_kernel:
     8.6          leaq  VCPU_trap_bounce(%rbx),%rdx
     8.7 -        movq  VCPU_syscall_addr(%rbx),%rax
     8.8 +        /* TB_eip = (32-bit syscall && syscall32_addr) ?
     8.9 +         *          syscall32_addr : syscall_addr */
    8.10 +        xor   %eax,%eax
    8.11 +        cmpw  $FLAT_USER_CS32,UREGS_cs(%rsp)
    8.12 +        cmoveq VCPU_syscall32_addr(%rbx),%rax
    8.13 +        testq %rax,%rax
    8.14 +        cmovzq VCPU_syscall_addr(%rbx),%rax
    8.15          movq  %rax,TRAPBOUNCE_eip(%rdx)
    8.16 -        movb  $0,TRAPBOUNCE_flags(%rdx)
    8.17 -        bt    $_VGCF_syscall_disables_events,VCPU_guest_context_flags(%rbx)
    8.18 -        jnc   1f
    8.19 -        movb  $TBF_INTERRUPT,TRAPBOUNCE_flags(%rdx)
    8.20 -1:      call  create_bounce_frame
    8.21 +        /* TB_flags = VGCF_syscall_disables_events ? TBF_INTERRUPT : 0 */
    8.22 +        btl   $_VGCF_syscall_disables_events,VCPU_guest_context_flags(%rbx)
    8.23 +        setc  %cl
    8.24 +        leal  (,%rcx,TBF_INTERRUPT),%ecx
    8.25 +        movb  %cl,TRAPBOUNCE_flags(%rdx)
    8.26 +        call  create_bounce_frame
    8.27          andl  $~X86_EFLAGS_DF,UREGS_eflags(%rsp)
    8.28          jmp   test_all_events
    8.29  
    8.30 @@ -47,7 +54,7 @@ restore_all_guest:
    8.31          addq  $8,%rsp
    8.32          popq  %rcx                    # RIP
    8.33          popq  %r11                    # CS
    8.34 -        cmpw  $FLAT_KERNEL_CS32,%r11
    8.35 +        cmpw  $FLAT_USER_CS32,%r11
    8.36          popq  %r11                    # RFLAGS
    8.37          popq  %rsp                    # RSP
    8.38          je    1f
    8.39 @@ -128,6 +135,9 @@ ENTRY(syscall_enter)
    8.40          movq  24(%rsp),%r11 /* Re-load user RFLAGS into %r11 before SAVE_ALL */
    8.41          SAVE_ALL
    8.42          GET_CURRENT(%rbx)
    8.43 +        movq  VCPU_domain(%rbx),%rcx
    8.44 +        testb $1,DOMAIN_is_32bit_pv(%rcx)
    8.45 +        jnz   compat_syscall
    8.46          testb $TF_kernel_mode,VCPU_thread_flags(%rbx)
    8.47          jz    switch_to_kernel
    8.48  
    8.49 @@ -241,6 +251,43 @@ bad_hypercall:
    8.50          movq $-ENOSYS,UREGS_rax(%rsp)
    8.51          jmp  test_all_events
    8.52  
    8.53 +ENTRY(sysenter_entry)
    8.54 +        sti
    8.55 +        pushq $FLAT_USER_SS
    8.56 +        pushq $0
    8.57 +        pushfq
    8.58 +        .globl sysenter_eflags_saved
    8.59 +sysenter_eflags_saved:
    8.60 +        pushq $0
    8.61 +        pushq $0
    8.62 +        pushq $0
    8.63 +        movl  $TRAP_syscall,4(%rsp)
    8.64 +        SAVE_ALL
    8.65 +        GET_CURRENT(%rbx)
    8.66 +        movq  VCPU_sysexit_addr(%rbx),%rax
    8.67 +        movzwl VCPU_sysexit_sel(%rbx),%edx
    8.68 +        cmpb  $0,VCPU_sysenter_disables_events(%rbx)
    8.69 +        movq  %rax,UREGS_rip(%rsp)
    8.70 +        movl  %edx,UREGS_cs(%rsp)
    8.71 +        movq  VCPU_sysenter_addr(%rbx),%rax
    8.72 +        setne %cl
    8.73 +        leaq  VCPU_trap_bounce(%rbx),%rdx
    8.74 +        testq %rax,%rax
    8.75 +        leal  (,%rcx,TBF_INTERRUPT),%ecx
    8.76 +        jz    2f
    8.77 +1:      movq  VCPU_domain(%rbx),%rdi
    8.78 +        movq  %rax,TRAPBOUNCE_eip(%rdx)
    8.79 +        movb  %cl,TRAPBOUNCE_flags(%rdx)
    8.80 +        testb $1,DOMAIN_is_32bit_pv(%rdi)
    8.81 +        jnz   compat_sysenter
    8.82 +        call  create_bounce_frame
    8.83 +        jmp   test_all_events
    8.84 +2:      movl  %eax,TRAPBOUNCE_error_code(%rdx)
    8.85 +        movq  VCPU_gp_fault_addr(%rbx),%rax
    8.86 +        movb  $(TBF_EXCEPTION|TBF_EXCEPTION_ERRCODE|TBF_INTERRUPT),%cl
    8.87 +        movl  $TRAP_gp_fault,UREGS_entry_vector(%rsp)
    8.88 +        jmp   1b
    8.89 +
    8.90  ENTRY(int80_direct_trap)
    8.91          pushq $0
    8.92          SAVE_ALL
    8.93 @@ -313,9 +360,9 @@ 1:      movb  TRAPBOUNCE_flags(%rdx),%cl
    8.94          shrq  $32,%rax
    8.95          testb $0xFF,%al                 # Bits 0-7: saved_upcall_mask
    8.96          setz  %ch                       # %ch == !saved_upcall_mask
    8.97 -        movq  UREGS_eflags+8(%rsp),%rax
    8.98 -        andq  $~X86_EFLAGS_IF,%rax
    8.99 -        shlb  $1,%ch                    # Bit 9 (EFLAGS.IF)
   8.100 +        movl  UREGS_eflags+8(%rsp),%eax
   8.101 +        andl  $~X86_EFLAGS_IF,%eax
   8.102 +        addb  %ch,%ch                   # Bit 9 (EFLAGS.IF)
   8.103          orb   %ch,%ah                   # Fold EFLAGS.IF into %eax
   8.104  .Lft5:  movq  %rax,16(%rsi)             # RFLAGS
   8.105          movq  UREGS_rip+8(%rsp),%rax
     9.1 --- a/xen/arch/x86/x86_64/traps.c	Wed Oct 24 14:36:18 2007 +0100
     9.2 +++ b/xen/arch/x86/x86_64/traps.c	Wed Oct 24 15:22:57 2007 +0100
     9.3 @@ -22,6 +22,7 @@
     9.4  #include <public/callback.h>
     9.5  
     9.6  asmlinkage void syscall_enter(void);
     9.7 +asmlinkage void sysenter_entry(void);
     9.8  asmlinkage void compat_hypercall(void);
     9.9  asmlinkage void int80_direct_trap(void);
    9.10  
    9.11 @@ -350,12 +351,21 @@ void __devinit subarch_percpu_traps_init
    9.12  
    9.13      /* Trampoline for SYSCALL entry from long mode. */
    9.14      stack = &stack[IST_MAX * PAGE_SIZE]; /* Skip the IST stacks. */
    9.15 -    wrmsr(MSR_LSTAR, (unsigned long)stack, ((unsigned long)stack>>32));
    9.16 +    wrmsrl(MSR_LSTAR, (unsigned long)stack);
    9.17      stack += write_stack_trampoline(stack, stack_bottom, FLAT_KERNEL_CS64);
    9.18  
    9.19 +    if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL )
    9.20 +    {
    9.21 +        /* SYSENTER entry. */
    9.22 +        wrmsrl(MSR_IA32_SYSENTER_ESP, (unsigned long)stack_bottom);
    9.23 +        wrmsrl(MSR_IA32_SYSENTER_EIP, (unsigned long)sysenter_entry);
    9.24 +        wrmsr(MSR_IA32_SYSENTER_CS, __HYPERVISOR_CS, 0);
    9.25 +    }
    9.26 +
    9.27      /* Trampoline for SYSCALL entry from compatibility mode. */
    9.28 -    wrmsr(MSR_CSTAR, (unsigned long)stack, ((unsigned long)stack>>32));
    9.29 -    stack += write_stack_trampoline(stack, stack_bottom, FLAT_KERNEL_CS32);
    9.30 +    stack = (char *)L1_CACHE_ALIGN((unsigned long)stack);
    9.31 +    wrmsrl(MSR_CSTAR, (unsigned long)stack);
    9.32 +    stack += write_stack_trampoline(stack, stack_bottom, FLAT_USER_CS32);
    9.33  
    9.34      /* Common SYSCALL parameters. */
    9.35      wrmsr(MSR_STAR, 0, (FLAT_RING3_CS32<<16) | __HYPERVISOR_CS);
    9.36 @@ -380,6 +390,9 @@ static long register_guest_callback(stru
    9.37      long ret = 0;
    9.38      struct vcpu *v = current;
    9.39  
    9.40 +    if ( !is_canonical_address(reg->address) )
    9.41 +        return -EINVAL;
    9.42 +
    9.43      switch ( reg->type )
    9.44      {
    9.45      case CALLBACKTYPE_event:
    9.46 @@ -406,6 +419,23 @@ static long register_guest_callback(stru
    9.47                        &v->arch.guest_context.flags);
    9.48          break;
    9.49  
    9.50 +    case CALLBACKTYPE_syscall32:
    9.51 +        v->arch.syscall32_callback_eip = reg->address;
    9.52 +        v->arch.syscall32_disables_events =
    9.53 +            !!(reg->flags & CALLBACKF_mask_events);
    9.54 +        break;
    9.55 +
    9.56 +    case CALLBACKTYPE_sysenter:
    9.57 +        v->arch.sysenter_callback_eip = reg->address;
    9.58 +        v->arch.sysenter_disables_events =
    9.59 +            !!(reg->flags & CALLBACKF_mask_events);
    9.60 +        break;
    9.61 +
    9.62 +    case CALLBACKTYPE_sysexit:
    9.63 +        v->arch.sysexit_eip = reg->address;
    9.64 +        v->arch.sysexit_cs = FLAT_USER_CS32;
    9.65 +        break;
    9.66 +
    9.67      case CALLBACKTYPE_nmi:
    9.68          ret = register_guest_nmi_callback(reg->address);
    9.69          break;
    9.70 @@ -427,6 +457,9 @@ static long unregister_guest_callback(st
    9.71      case CALLBACKTYPE_event:
    9.72      case CALLBACKTYPE_failsafe:
    9.73      case CALLBACKTYPE_syscall:
    9.74 +    case CALLBACKTYPE_syscall32:
    9.75 +    case CALLBACKTYPE_sysenter:
    9.76 +    case CALLBACKTYPE_sysexit:
    9.77          ret = -EINVAL;
    9.78          break;
    9.79  
    10.1 --- a/xen/include/asm-x86/cpufeature.h	Wed Oct 24 14:36:18 2007 +0100
    10.2 +++ b/xen/include/asm-x86/cpufeature.h	Wed Oct 24 15:22:57 2007 +0100
    10.3 @@ -155,7 +155,7 @@
    10.4  #define cpu_has_pge		1
    10.5  #define cpu_has_pat		1
    10.6  #define cpu_has_apic		boot_cpu_has(X86_FEATURE_APIC)
    10.7 -#define cpu_has_sep		0
    10.8 +#define cpu_has_sep		boot_cpu_has(X86_FEATURE_SEP)
    10.9  #define cpu_has_mtrr		1
   10.10  #define cpu_has_mmx		1
   10.11  #define cpu_has_fxsr		1
    11.1 --- a/xen/include/asm-x86/domain.h	Wed Oct 24 14:36:18 2007 +0100
    11.2 +++ b/xen/include/asm-x86/domain.h	Wed Oct 24 15:22:57 2007 +0100
    11.3 @@ -301,6 +301,14 @@ struct arch_vcpu
    11.4  #endif
    11.5  #ifdef CONFIG_X86_64
    11.6      struct trap_bounce int80_bounce;
    11.7 +    unsigned long      syscall32_callback_eip;
    11.8 +    unsigned long      sysenter_callback_eip;
    11.9 +    unsigned long      sysexit_eip;
   11.10 +    unsigned short     syscall32_callback_cs;
   11.11 +    unsigned short     sysenter_callback_cs;
   11.12 +    unsigned short     sysexit_cs;
   11.13 +    bool_t             syscall32_disables_events;
   11.14 +    bool_t             sysenter_disables_events;
   11.15  #endif
   11.16  
   11.17      /* Virtual Machine Extensions */
    12.1 --- a/xen/include/public/callback.h	Wed Oct 24 14:36:18 2007 +0100
    12.2 +++ b/xen/include/public/callback.h	Wed Oct 24 15:22:57 2007 +0100
    12.3 @@ -36,17 +36,48 @@
    12.4   * @extra_args == Operation-specific extra arguments (NULL if none).
    12.5   */
    12.6  
    12.7 +/* ia64, x86: Callback for event delivery. */
    12.8  #define CALLBACKTYPE_event                 0
    12.9 +
   12.10 +/* x86: Failsafe callback when guest state cannot be restored by Xen. */
   12.11  #define CALLBACKTYPE_failsafe              1
   12.12 -#define CALLBACKTYPE_syscall               2 /* x86_64 only */
   12.13 +
   12.14 +/* x86/64 hypervisor: Syscall by 64-bit guest app ('64-on-64-on-64'). */
   12.15 +#define CALLBACKTYPE_syscall               2
   12.16 +
   12.17  /*
   12.18 - * sysenter is only available on x86_32 with the
   12.19 - * supervisor_mode_kernel option enabled.
   12.20 + * x86/32 hypervisor: Only available on x86/32 when supervisor_mode_kernel
   12.21 + *     feature is enabled. Do not use this callback type in new code.
   12.22   */
   12.23 -#define CALLBACKTYPE_sysenter              3
   12.24 +#define CALLBACKTYPE_sysenter_deprecated   3
   12.25 +
   12.26 +/* x86: Callback for NMI delivery. */
   12.27  #define CALLBACKTYPE_nmi                   4
   12.28  
   12.29  /*
   12.30 + * x86: sysenter is only available as follows:
   12.31 + * - 32-bit hypervisor: with the supervisor_mode_kernel feature enabled
   12.32 + * - 64-bit hypervisor: 32-bit guest applications on Intel CPUs
   12.33 + *                      ('32-on-32-on-64', '32-on-64-on-64')
   12.34 + *                      [nb. also 64-bit guest applications on Intel CPUs
   12.35 + *                           ('64-on-64-on-64'), but syscall is preferred]
   12.36 + */
   12.37 +#define CALLBACKTYPE_sysenter              5
   12.38 +
   12.39 +/*
   12.40 + * x86/64 hypervisor: used to fill a sysenter frame's return address, if the
   12.41 + * guest desires to have a non-NULL value there. If the guest kernel is
   12.42 + * 64-bit then the sysexit code selector is always set to FLAT_USER_CS32.
   12.43 + */
   12.44 +#define CALLBACKTYPE_sysexit               6
   12.45 +
   12.46 +/*
   12.47 + * x86/64 hypervisor: Syscall by 32-bit guest app on AMD CPUs
   12.48 + *                    ('32-on-32-on-64', '32-on-64-on-64')
   12.49 + */
   12.50 +#define CALLBACKTYPE_syscall32             7
   12.51 +
   12.52 +/*
   12.53   * Disable event deliver during callback? This flag is ignored for event and
   12.54   * NMI callbacks: event delivery is unconditionally disabled.
   12.55   */
   12.56 @@ -79,6 +110,11 @@ struct callback_unregister {
   12.57  typedef struct callback_unregister callback_unregister_t;
   12.58  DEFINE_XEN_GUEST_HANDLE(callback_unregister_t);
   12.59  
   12.60 +#if __XEN_INTERFACE_VERSION__ < 0x00030207
   12.61 +#undef CALLBACKTYPE_sysenter
   12.62 +#define CALLBACKTYPE_sysenter CALLBACKTYPE_sysenter_deprecated
   12.63 +#endif
   12.64 +
   12.65  #endif /* __XEN_PUBLIC_CALLBACK_H__ */
   12.66  
   12.67  /*
    13.1 --- a/xen/include/public/xen-compat.h	Wed Oct 24 14:36:18 2007 +0100
    13.2 +++ b/xen/include/public/xen-compat.h	Wed Oct 24 15:22:57 2007 +0100
    13.3 @@ -27,7 +27,7 @@
    13.4  #ifndef __XEN_PUBLIC_XEN_COMPAT_H__
    13.5  #define __XEN_PUBLIC_XEN_COMPAT_H__
    13.6  
    13.7 -#define __XEN_LATEST_INTERFACE_VERSION__ 0x00030206
    13.8 +#define __XEN_LATEST_INTERFACE_VERSION__ 0x00030207
    13.9  
   13.10  #if defined(__XEN__) || defined(__XEN_TOOLS__)
   13.11  /* Xen is built with matching headers and implements the latest interface. */