ia64/xen-unstable

changeset 3761:118e0a3af9b0

bitkeeper revision 1.1159.1.564 (420b44edsb8XzPev-TiGW16GSsCW6g)

More x86_64 stuff. Added hypercalls to register a user-space pagetable,
modify FS/GS base addresses, and switch to user mode. User mode switches
back to kernel mode automatically on executing SYSCALL instruction.
Still todo: 1. getdomaininfo needs to include pagetable_user
2. get writable and shadow pagetables working
3. testing
Signed-off-by: keir.fraser@cl.cam.ac.uk
author kaf24@scramble.cl.cam.ac.uk
date Thu Feb 10 11:26:37 2005 +0000 (2005-02-10)
parents 736089c11af9
children 0a4b76b6b5a0 26ba37bce96e
files xen/arch/x86/domain.c xen/arch/x86/mm.c xen/arch/x86/setup.c xen/arch/x86/vmx_io.c xen/arch/x86/vmx_vmcs.c xen/arch/x86/x86_32/entry.S xen/arch/x86/x86_64/entry.S xen/arch/x86/x86_64/mm.c xen/arch/x86/x86_64/traps.c xen/include/asm-x86/domain.h xen/include/asm-x86/msr.h xen/include/asm-x86/processor.h xen/include/asm-x86/x86_32/current.h xen/include/asm-x86/x86_64/current.h xen/include/public/arch-x86_64.h xen/include/public/xen.h
line diff
     1.1 --- a/xen/arch/x86/domain.c	Thu Feb 10 02:27:48 2005 +0000
     1.2 +++ b/xen/arch/x86/domain.c	Thu Feb 10 11:26:37 2005 +0000
     1.3 @@ -256,6 +256,8 @@ void arch_do_createdomain(struct exec_do
     1.4  
     1.5      SET_DEFAULT_FAST_TRAP(&ed->arch);
     1.6  
     1.7 +    ed->arch.flags = TF_kernel_mode;
     1.8 +
     1.9      if ( d->id == IDLE_DOMAIN_ID )
    1.10      {
    1.11          ed->arch.schedule_tail = continue_idle_task;
    1.12 @@ -287,8 +289,6 @@ void arch_do_createdomain(struct exec_do
    1.13          d->arch.mm_perdomain_l3[l3_table_offset(PERDOMAIN_VIRT_START)] = 
    1.14              mk_l3_pgentry(__pa(d->arch.mm_perdomain_l2) | __PAGE_HYPERVISOR);
    1.15  #endif
    1.16 -
    1.17 -        ed->arch.flags = TF_kernel_mode;
    1.18      }
    1.19  }
    1.20  
    1.21 @@ -550,6 +550,172 @@ void new_thread(struct exec_domain *d,
    1.22  }
    1.23  
    1.24  
    1.25 +#ifdef __x86_64__
    1.26 +
    1.27 +#define loadsegment(seg,value) ({               \
    1.28 +    int __r = 1;                                \
    1.29 +    __asm__ __volatile__ (                      \
    1.30 +        "1: movl %k1,%%" #seg "\n2:\n"          \
    1.31 +        ".section .fixup,\"ax\"\n"              \
    1.32 +        "3: xorl %k0,%k0\n"                     \
    1.33 +        "   movl %k0,%%" #seg "\n"              \
    1.34 +        "   jmp 2b\n"                           \
    1.35 +        ".previous\n"                           \
    1.36 +        ".section __ex_table,\"a\"\n"           \
    1.37 +        "   .align 8\n"                         \
    1.38 +        "   .quad 1b,3b\n"                      \
    1.39 +        ".previous"                             \
    1.40 +        : "=r" (__r) : "r" (value), "0" (__r) );\
    1.41 +    __r; })
    1.42 +
    1.43 +static void switch_segments(
    1.44 +    struct xen_regs *regs, struct exec_domain *p, struct exec_domain *n)
    1.45 +{
    1.46 +    int all_segs_okay = 1;
    1.47 +
    1.48 +    if ( !is_idle_task(p->domain) )
    1.49 +    {
    1.50 +        __asm__ __volatile__ ( "movl %%ds,%0" : "=m" (p->arch.user_ctxt.ds) );
    1.51 +        __asm__ __volatile__ ( "movl %%es,%0" : "=m" (p->arch.user_ctxt.es) );
    1.52 +        __asm__ __volatile__ ( "movl %%fs,%0" : "=m" (p->arch.user_ctxt.fs) );
    1.53 +        __asm__ __volatile__ ( "movl %%gs,%0" : "=m" (p->arch.user_ctxt.gs) );
    1.54 +    }
    1.55 +
    1.56 +    /* Either selector != 0 ==> reload. */
    1.57 +    if ( unlikely(p->arch.user_ctxt.ds |
    1.58 +                  n->arch.user_ctxt.ds) )
    1.59 +        all_segs_okay &= loadsegment(ds, n->arch.user_ctxt.ds);
    1.60 +
    1.61 +    /* Either selector != 0 ==> reload. */
    1.62 +    if ( unlikely(p->arch.user_ctxt.es |
    1.63 +                  n->arch.user_ctxt.es) )
    1.64 +        all_segs_okay &= loadsegment(es, n->arch.user_ctxt.es);
    1.65 +
    1.66 +    /*
    1.67 +     * Either selector != 0 ==> reload.
    1.68 +     * Also reload to reset FS_BASE if it was non-zero.
    1.69 +     */
    1.70 +    if ( unlikely(p->arch.user_ctxt.fs |
    1.71 +                  p->arch.user_ctxt.fs_base |
    1.72 +                  n->arch.user_ctxt.fs) )
    1.73 +    {
    1.74 +        all_segs_okay &= loadsegment(fs, n->arch.user_ctxt.fs);
    1.75 +        if ( p->arch.user_ctxt.fs ) /* != 0 selector kills fs_base */
    1.76 +            p->arch.user_ctxt.fs_base = 0;
    1.77 +    }
    1.78 +
    1.79 +    /*
    1.80 +     * Either selector != 0 ==> reload.
    1.81 +     * Also reload to reset GS_BASE if it was non-zero.
    1.82 +     */
    1.83 +    if ( unlikely(p->arch.user_ctxt.gs |
    1.84 +                  p->arch.user_ctxt.gs_base_user |
    1.85 +                  n->arch.user_ctxt.gs) )
    1.86 +    {
    1.87 +        /* Reset GS_BASE with user %gs? */
    1.88 +        if ( p->arch.user_ctxt.gs || !n->arch.user_ctxt.gs_base_user )
    1.89 +            all_segs_okay &= loadsegment(gs, n->arch.user_ctxt.gs);
    1.90 +        if ( p->arch.user_ctxt.gs ) /* != 0 selector kills gs_base_user */
    1.91 +            p->arch.user_ctxt.gs_base_user = 0;
    1.92 +    }
    1.93 +
    1.94 +    /* This can only be non-zero if selector is NULL. */
    1.95 +    if ( n->arch.user_ctxt.fs_base )
    1.96 +        wrmsr(MSR_FS_BASE,
    1.97 +              n->arch.user_ctxt.fs_base,
    1.98 +              n->arch.user_ctxt.fs_base>>32);
    1.99 +
   1.100 +    /* This can only be non-zero if selector is NULL. */
   1.101 +    if ( n->arch.user_ctxt.gs_base_user )
   1.102 +        wrmsr(MSR_GS_BASE,
   1.103 +              n->arch.user_ctxt.gs_base_user,
   1.104 +              n->arch.user_ctxt.gs_base_user>>32);
   1.105 +
   1.106 +    /* This can only be non-zero if selector is NULL. */
   1.107 +    if ( p->arch.user_ctxt.gs_base_kernel |
   1.108 +         n->arch.user_ctxt.gs_base_kernel )
   1.109 +        wrmsr(MSR_SHADOW_GS_BASE,
   1.110 +              n->arch.user_ctxt.gs_base_kernel,
   1.111 +              n->arch.user_ctxt.gs_base_kernel>>32);
   1.112 +
   1.113 +    /* If in kernel mode then switch the GS bases around. */
   1.114 +    if ( n->arch.flags & TF_kernel_mode )
   1.115 +        __asm__ __volatile__ ( "swapgs" );
   1.116 +
   1.117 +    if ( unlikely(!all_segs_okay) )
   1.118 +    {
   1.119 +        unsigned long *rsp =
   1.120 +            (n->arch.flags & TF_kernel_mode) ?
   1.121 +            (unsigned long *)regs->rsp : 
   1.122 +            (unsigned long *)n->arch.kernel_sp;
   1.123 +
   1.124 +        if ( put_user(regs->ss,     rsp- 1) |
   1.125 +             put_user(regs->rsp,    rsp- 2) |
   1.126 +             put_user(regs->rflags, rsp- 3) |
   1.127 +             put_user(regs->cs,     rsp- 4) |
   1.128 +             put_user(regs->rip,    rsp- 5) |
   1.129 +             put_user(regs->gs,     rsp- 6) |
   1.130 +             put_user(regs->fs,     rsp- 7) |
   1.131 +             put_user(regs->es,     rsp- 8) |
   1.132 +             put_user(regs->ds,     rsp- 9) |
   1.133 +             put_user(regs->r11,    rsp-10) |
   1.134 +             put_user(regs->rcx,    rsp-11) )
   1.135 +        {
   1.136 +            DPRINTK("Error while creating failsafe callback frame.\n");
   1.137 +            domain_crash();
   1.138 +        }
   1.139 +
   1.140 +        if ( !(n->arch.flags & TF_kernel_mode) )
   1.141 +        {
   1.142 +            n->arch.flags |= TF_kernel_mode;
   1.143 +            __asm__ __volatile__ ( "swapgs" );
   1.144 +            write_ptbase(n);
   1.145 +        }
   1.146 +
   1.147 +        regs->entry_vector  = TRAP_syscall;
   1.148 +        regs->rflags       &= 0xFFFCBEFFUL;
   1.149 +        regs->ss            = __GUEST_SS;
   1.150 +        regs->rsp           = (unsigned long)(rsp-11);
   1.151 +        regs->cs            = __GUEST_CS;
   1.152 +        regs->rip           = n->arch.failsafe_address;
   1.153 +    }
   1.154 +}
   1.155 +
   1.156 +long do_switch_to_user(void)
   1.157 +{
   1.158 +    struct xen_regs       *regs = get_execution_context();
   1.159 +    struct switch_to_user  stu;
   1.160 +    struct exec_domain    *ed = current;
   1.161 +
   1.162 +    if ( unlikely(copy_from_user(&stu, (void *)regs->rsp, sizeof(stu))) )
   1.163 +        return -EFAULT;
   1.164 +
   1.165 +    ed->arch.flags &= ~TF_kernel_mode;
   1.166 +    __asm__ __volatile__ ( "swapgs" );
   1.167 +    write_ptbase(ed);
   1.168 +
   1.169 +    regs->rip    = stu.rip;
   1.170 +    regs->cs     = stu.cs;
   1.171 +    regs->rflags = stu.rflags;
   1.172 +    regs->rsp    = stu.rsp;
   1.173 +    regs->ss     = stu.ss;
   1.174 +
   1.175 +    if ( !(stu.flags & ECF_IN_SYSCALL) )
   1.176 +    {
   1.177 +        regs->entry_vector = 0;
   1.178 +        regs->r11 = stu.r11;
   1.179 +        regs->rcx = stu.rcx;
   1.180 +    }
   1.181 +    
   1.182 +    return regs->rax;
   1.183 +}
   1.184 +
   1.185 +#elif defined(__i386__)
   1.186 +
   1.187 +#define switch_segments(_r, _p, _n) ((void)0)
   1.188 +
   1.189 +#endif
   1.190 +
   1.191  /*
   1.192   * This special macro can be used to load a debugging register
   1.193   */
   1.194 @@ -566,21 +732,12 @@ void switch_to(struct exec_domain *prev_
   1.195  #ifdef CONFIG_VMX
   1.196      unsigned long vmx_domain = next_p->arch.arch_vmx.flags; 
   1.197  #endif
   1.198 -#ifdef __x86_64__
   1.199 -    int all_segs_okay = 1;
   1.200 -#endif
   1.201  
   1.202      __cli();
   1.203  
   1.204      /* Switch guest general-register state. */
   1.205      if ( !is_idle_task(prev_p->domain) )
   1.206      {
   1.207 -#ifdef __x86_64__
   1.208 -        __asm__ __volatile__ ( "movl %%ds,%0" : "=m" (stack_ec->ds) );
   1.209 -        __asm__ __volatile__ ( "movl %%es,%0" : "=m" (stack_ec->es) );
   1.210 -        __asm__ __volatile__ ( "movl %%fs,%0" : "=m" (stack_ec->fs) );
   1.211 -        __asm__ __volatile__ ( "movl %%gs,%0" : "=m" (stack_ec->gs) );
   1.212 -#endif
   1.213          memcpy(&prev_p->arch.user_ctxt,
   1.214                 stack_ec, 
   1.215                 sizeof(*stack_ec));
   1.216 @@ -624,7 +781,7 @@ void switch_to(struct exec_domain *prev_
   1.217          SET_FAST_TRAP(&next_p->arch);
   1.218  
   1.219  #ifdef __i386__
   1.220 -        /* Switch the guest OS ring-1 stack. */
   1.221 +        /* Switch the kernel ring-1 stack. */
   1.222          tss->esp1 = next_p->arch.kernel_sp;
   1.223          tss->ss1  = next_p->arch.kernel_ss;
   1.224  #endif
   1.225 @@ -660,126 +817,7 @@ void switch_to(struct exec_domain *prev_
   1.226  
   1.227      __sti();
   1.228  
   1.229 -#ifdef __x86_64__
   1.230 -
   1.231 -#define loadsegment(seg,value) ({               \
   1.232 -    int __r = 1;                                \
   1.233 -    __asm__ __volatile__ (                      \
   1.234 -        "1: movl %k1,%%" #seg "\n2:\n"          \
   1.235 -        ".section .fixup,\"ax\"\n"              \
   1.236 -        "3: xorl %k0,%k0\n"                     \
   1.237 -        "   movl %k0,%%" #seg "\n"              \
   1.238 -        "   jmp 2b\n"                           \
   1.239 -        ".previous\n"                           \
   1.240 -        ".section __ex_table,\"a\"\n"           \
   1.241 -        "   .align 8\n"                         \
   1.242 -        "   .quad 1b,3b\n"                      \
   1.243 -        ".previous"                             \
   1.244 -        : "=r" (__r) : "r" (value), "0" (__r) );\
   1.245 -    __r; })
   1.246 -
   1.247 -    /* Either selector != 0 ==> reload. */
   1.248 -    if ( unlikely(prev_p->arch.user_ctxt.ds) ||
   1.249 -         unlikely(next_p->arch.user_ctxt.ds) )
   1.250 -        all_segs_okay &= loadsegment(ds, next_p->arch.user_ctxt.ds);
   1.251 -
   1.252 -    /* Either selector != 0 ==> reload. */
   1.253 -    if ( unlikely(prev_p->arch.user_ctxt.es) ||
   1.254 -         unlikely(next_p->arch.user_ctxt.es) )
   1.255 -        all_segs_okay &= loadsegment(es, next_p->arch.user_ctxt.es);
   1.256 -
   1.257 -    /*
   1.258 -     * Either selector != 0 ==> reload.
   1.259 -     * Also reload to reset FS_BASE if it was non-zero.
   1.260 -     */
   1.261 -    if ( unlikely(prev_p->arch.user_ctxt.fs) ||
   1.262 -         unlikely(prev_p->arch.user_ctxt.fs_base) ||
   1.263 -         unlikely(next_p->arch.user_ctxt.fs) )
   1.264 -    {
   1.265 -        all_segs_okay &= loadsegment(fs, next_p->arch.user_ctxt.fs);
   1.266 -        if ( prev_p->arch.user_ctxt.fs ) /* != 0 selector kills fs_base */
   1.267 -            prev_p->arch.user_ctxt.fs_base = 0;
   1.268 -    }
   1.269 -
   1.270 -    /*
   1.271 -     * Either selector != 0 ==> reload.
   1.272 -     * Also reload to reset GS_BASE if it was non-zero.
   1.273 -     */
   1.274 -    if ( unlikely(prev_p->arch.user_ctxt.gs) ||
   1.275 -         unlikely(prev_p->arch.user_ctxt.gs_base_os) ||
   1.276 -         unlikely(prev_p->arch.user_ctxt.gs_base_app) ||
   1.277 -         unlikely(next_p->arch.user_ctxt.gs) )
   1.278 -    {
   1.279 -        /* Reset GS_BASE with user %gs. */
   1.280 -        all_segs_okay &= loadsegment(gs, next_p->arch.user_ctxt.gs);
   1.281 -        /* Reset KERNEL_GS_BASE if we won't be doing it later. */
   1.282 -        if ( !next_p->arch.user_ctxt.gs_base_os )
   1.283 -            wrmsr(MSR_KERNEL_GS_BASE, 0, 0);
   1.284 -        if ( prev_p->arch.user_ctxt.gs ) /* != 0 selector kills app gs_base */
   1.285 -            prev_p->arch.user_ctxt.gs_base_app = 0;
   1.286 -    }
   1.287 -
   1.288 -    /* This can only be non-zero if selector is NULL. */
   1.289 -    if ( next_p->arch.user_ctxt.fs_base )
   1.290 -        wrmsr(MSR_FS_BASE,
   1.291 -              next_p->arch.user_ctxt.fs_base,
   1.292 -              next_p->arch.user_ctxt.fs_base>>32);
   1.293 -
   1.294 -    /* This can only be non-zero if selector is NULL. */
   1.295 -    if ( next_p->arch.user_ctxt.gs_base_os )
   1.296 -        wrmsr(MSR_KERNEL_GS_BASE,
   1.297 -              next_p->arch.user_ctxt.gs_base_os,
   1.298 -              next_p->arch.user_ctxt.gs_base_os>>32);
   1.299 -
   1.300 -    /* This can only be non-zero if selector is NULL. */
   1.301 -    if ( next_p->arch.user_ctxt.gs_base_app )
   1.302 -        wrmsr(MSR_GS_BASE,
   1.303 -              next_p->arch.user_ctxt.gs_base_app,
   1.304 -              next_p->arch.user_ctxt.gs_base_app>>32);
   1.305 -
   1.306 -    /* If in guest-OS mode, switch the GS bases around. */
   1.307 -    if ( next_p->arch.flags & TF_kernel_mode )
   1.308 -        __asm__ __volatile__ ( "swapgs" );
   1.309 -
   1.310 -    if ( unlikely(!all_segs_okay) )
   1.311 -    {
   1.312 -        unsigned long *rsp =
   1.313 -            (next_p->arch.flags & TF_kernel_mode) ?
   1.314 -            (unsigned long *)stack_ec->rsp : 
   1.315 -            (unsigned long *)next_p->arch.kernel_sp;
   1.316 -
   1.317 -        if ( put_user(stack_ec->ss,     rsp- 1) |
   1.318 -             put_user(stack_ec->rsp,    rsp- 2) |
   1.319 -             put_user(stack_ec->rflags, rsp- 3) |
   1.320 -             put_user(stack_ec->cs,     rsp- 4) |
   1.321 -             put_user(stack_ec->rip,    rsp- 5) |
   1.322 -             put_user(stack_ec->gs,     rsp- 6) |
   1.323 -             put_user(stack_ec->fs,     rsp- 7) |
   1.324 -             put_user(stack_ec->es,     rsp- 8) |
   1.325 -             put_user(stack_ec->ds,     rsp- 9) |
   1.326 -             put_user(stack_ec->r11,    rsp-10) |
   1.327 -             put_user(stack_ec->rcx,    rsp-11) )
   1.328 -        {
   1.329 -            DPRINTK("Error while creating failsafe callback frame.\n");
   1.330 -            domain_crash();
   1.331 -        }
   1.332 -
   1.333 -        if ( !(next_p->arch.flags & TF_kernel_mode) )
   1.334 -        {
   1.335 -            next_p->arch.flags |= TF_kernel_mode;
   1.336 -            __asm__ __volatile__ ( "swapgs" );
   1.337 -            /* XXX switch page tables XXX */
   1.338 -        }
   1.339 -
   1.340 -        stack_ec->entry_vector  = TRAP_syscall;
   1.341 -        stack_ec->rflags       &= 0xFFFCBEFFUL;
   1.342 -        stack_ec->ss            = __GUEST_SS;
   1.343 -        stack_ec->rsp           = (unsigned long)(rsp-11);
   1.344 -        stack_ec->cs            = __GUEST_CS;
   1.345 -        stack_ec->rip           = next_p->arch.failsafe_address;
   1.346 -    }
   1.347 -
   1.348 -#endif /* __x86_64__ */
   1.349 +    switch_segments(stack_ec, prev_p, next_p);
   1.350  }
   1.351  
   1.352  
   1.353 @@ -935,13 +973,23 @@ void domain_relinquish_memory(struct dom
   1.354      /* Exit shadow mode before deconstructing final guest page table. */
   1.355      shadow_mode_disable(d);
   1.356  
   1.357 -    /* Drop the in-use reference to the page-table base. */
   1.358 +    /* Drop the in-use references to page-table bases. */
   1.359      for_each_exec_domain ( d, ed )
   1.360      {
   1.361          if ( pagetable_val(ed->arch.pagetable) != 0 )
   1.362 -            put_page_and_type(&frame_table[pagetable_val(ed->arch.pagetable) >>
   1.363 -                                           PAGE_SHIFT]);
   1.364 -        ed->arch.pagetable = mk_pagetable(0);
   1.365 +        {
   1.366 +            put_page_and_type(
   1.367 +                &frame_table[pagetable_val(ed->arch.pagetable) >> PAGE_SHIFT]);
   1.368 +            ed->arch.pagetable = mk_pagetable(0);
   1.369 +        }
   1.370 +
   1.371 +        if ( pagetable_val(ed->arch.pagetable_user) != 0 )
   1.372 +        {
   1.373 +            put_page_and_type(
   1.374 +                &frame_table[pagetable_val(ed->arch.pagetable_user) >>
   1.375 +                            PAGE_SHIFT]);
   1.376 +            ed->arch.pagetable_user = mk_pagetable(0);
   1.377 +        }
   1.378      }
   1.379  
   1.380  #ifdef CONFIG_VMX
     2.1 --- a/xen/arch/x86/mm.c	Thu Feb 10 02:27:48 2005 +0000
     2.2 +++ b/xen/arch/x86/mm.c	Thu Feb 10 11:26:37 2005 +0000
     2.3 @@ -209,6 +209,10 @@ void write_ptbase(struct exec_domain *ed
     2.4  #else
     2.5      if ( unlikely(shadow_mode(d)) )
     2.6          pa = pagetable_val(ed->arch.shadow_table);    
     2.7 +#ifdef __x86_64__
     2.8 +    else if ( !(ed->arch.flags & TF_kernel_mode) )
     2.9 +        pa = pagetable_val(ed->arch.pagetable_user);
    2.10 +#endif
    2.11      else
    2.12          pa = pagetable_val(ed->arch.pagetable);
    2.13  #endif
    2.14 @@ -1341,6 +1345,24 @@ static int do_extended_command(unsigned 
    2.15          okay = new_guest_cr3(pfn);
    2.16          break;
    2.17          
    2.18 +#ifdef __x86_64__
    2.19 +    case MMUEXT_NEW_USER_BASEPTR:
    2.20 +        okay = get_page_and_type_from_pagenr(pfn, PGT_root_page_table, d);
    2.21 +        if ( unlikely(!okay) )
    2.22 +        {
    2.23 +            MEM_LOG("Error while installing new baseptr %p", pfn);
    2.24 +        }
    2.25 +        else
    2.26 +        {
    2.27 +            unsigned long old_pfn =
    2.28 +                pagetable_val(ed->arch.pagetable_user) >> PAGE_SHIFT;
    2.29 +            ed->arch.pagetable_user = mk_pagetable(pfn << PAGE_SHIFT);
    2.30 +            if ( old_pfn != 0 )
    2.31 +                put_page_and_type(&frame_table[old_pfn]);
    2.32 +        }
    2.33 +        break;
    2.34 +#endif
    2.35 +        
    2.36      case MMUEXT_TLB_FLUSH:
    2.37          percpu_info[cpu].deferred_ops |= DOP_FLUSH_TLB;
    2.38          break;
     3.1 --- a/xen/arch/x86/setup.c	Thu Feb 10 02:27:48 2005 +0000
     3.2 +++ b/xen/arch/x86/setup.c	Thu Feb 10 11:26:37 2005 +0000
     3.3 @@ -325,9 +325,9 @@ void __init cpu_init(void)
     3.4      memset(t->io_bitmap, ~0, sizeof(t->io_bitmap));
     3.5  #if defined(__i386__)
     3.6      t->ss0  = __HYPERVISOR_DS;
     3.7 -    t->esp0 = get_stack_top();
     3.8 +    t->esp0 = get_stack_bottom();
     3.9  #elif defined(__x86_64__)
    3.10 -    t->rsp0 = get_stack_top();
    3.11 +    t->rsp0 = get_stack_bottom();
    3.12  #endif
    3.13      set_tss_desc(nr,t);
    3.14      load_TR(nr);
     4.1 --- a/xen/arch/x86/vmx_io.c	Thu Feb 10 02:27:48 2005 +0000
     4.2 +++ b/xen/arch/x86/vmx_io.c	Thu Feb 10 11:26:37 2005 +0000
     4.3 @@ -382,7 +382,7 @@ void vmx_do_resume(struct exec_domain *d
     4.4  {
     4.5      __vmwrite(HOST_CR3, pagetable_val(d->arch.monitor_table));
     4.6      __vmwrite(GUEST_CR3, pagetable_val(d->arch.shadow_table));
     4.7 -    __vmwrite(HOST_ESP, (unsigned long) get_stack_top());
     4.8 +    __vmwrite(HOST_ESP, (unsigned long)get_stack_bottom());
     4.9  
    4.10      if (event_pending(d)) {
    4.11          if (test_bit(IOPACKET_PORT, &d->domain->shared_info->evtchn_pending[0])) 
     5.1 --- a/xen/arch/x86/vmx_vmcs.c	Thu Feb 10 02:27:48 2005 +0000
     5.2 +++ b/xen/arch/x86/vmx_vmcs.c	Thu Feb 10 11:26:37 2005 +0000
     5.3 @@ -222,7 +222,7 @@ void vmx_do_launch(struct exec_domain *e
     5.4      ed->arch.shadow_table = ed->arch.pagetable;
     5.5      __vmwrite(GUEST_CR3, pagetable_val(ed->arch.pagetable));
     5.6      __vmwrite(HOST_CR3, pagetable_val(ed->arch.monitor_table));
     5.7 -    __vmwrite(HOST_ESP, (unsigned long) get_stack_top());
     5.8 +    __vmwrite(HOST_ESP, (unsigned long)get_stack_bottom());
     5.9  
    5.10      ed->arch.schedule_tail = arch_vmx_do_resume;
    5.11  }
     6.1 --- a/xen/arch/x86/x86_32/entry.S	Thu Feb 10 02:27:48 2005 +0000
     6.2 +++ b/xen/arch/x86/x86_32/entry.S	Thu Feb 10 11:26:37 2005 +0000
     6.3 @@ -81,7 +81,7 @@
     6.4   *   (9)  u32 fs;
     6.5   *   (8)  u32 ds;
     6.6   *   (7)  u32 es;
     6.7 - *               <- get_stack_top() (= HOST_ESP)
     6.8 + *               <- get_stack_bottom() (= HOST_ESP)
     6.9   *   (6)  u32 ss;
    6.10   *   (5)  u32 esp;
    6.11   *   (4)  u32 eflags;
    6.12 @@ -89,8 +89,8 @@
    6.13   *   (2)  u32 eip;
    6.14   * (2/1)  u16 entry_vector;
    6.15   * (1/1)  u16 error_code;
    6.16 - * However, get_stack_top() acturally returns 20 bytes below the real
    6.17 - * top of the stack to allow space for:
    6.18 + * However, get_stack_bottom() actually returns 20 bytes before the real
    6.19 + * bottom of the stack to allow space for:
    6.20   * domain pointer, DS, ES, FS, GS. Therefore, we effectively skip 6 registers.
    6.21   */
    6.22  #define VMX_MONITOR_EFLAGS	0x202 /* IF on */
    6.23 @@ -173,8 +173,8 @@ vmx_process_softirqs:
    6.24  
    6.25          ALIGN
    6.26  restore_all_guest:
    6.27 -        testb $TF_failsafe_return,EDOMAIN_thread_flags(%ebx)
    6.28 -        jnz  failsafe_callback
    6.29 +        btr  $_TF_failsafe_return,EDOMAIN_thread_flags(%ebx)
    6.30 +        jc   failsafe_callback
    6.31          testl $X86_EFLAGS_VM,XREGS_eflags(%esp)
    6.32          jnz  restore_all_vm86
    6.33  FLT1:   movl XREGS_ds(%esp),%ds
    6.34 @@ -216,9 +216,8 @@ FIX1:   SET_XEN_SEGMENTS(a)
    6.35  DBLFLT1:GET_CURRENT(%ebx)
    6.36          jmp   test_all_events
    6.37  DBLFIX1:GET_CURRENT(%ebx)
    6.38 -        testb $TF_failsafe_return,EDOMAIN_thread_flags(%ebx)
    6.39 -        jnz   domain_crash             # cannot reenter failsafe code
    6.40 -        orb   $TF_failsafe_return,EDOMAIN_thread_flags(%ebx)
    6.41 +        bts   $_TF_failsafe_return,EDOMAIN_thread_flags(%ebx)
    6.42 +        jc    domain_crash             # cannot reenter failsafe code
    6.43          jmp   test_all_events          # will return via failsafe code
    6.44  .previous
    6.45  .section __pre_ex_table,"a"
    6.46 @@ -235,7 +234,6 @@ DBLFIX1:GET_CURRENT(%ebx)
    6.47  /* No special register assumptions */
    6.48  failsafe_callback:
    6.49          GET_CURRENT(%ebx)
    6.50 -        andb $~TF_failsafe_return,EDOMAIN_thread_flags(%ebx)
    6.51          leal EDOMAIN_trap_bounce(%ebx),%edx
    6.52          movl EDOMAIN_failsafe_addr(%ebx),%eax
    6.53          movl %eax,TRAPBOUNCE_eip(%edx)
    6.54 @@ -282,8 +280,6 @@ ENTRY(hypercall)
    6.55          GET_CURRENT(%ebx)
    6.56  	andl $(NR_hypercalls-1),%eax
    6.57  	call *SYMBOL_NAME(hypercall_table)(,%eax,4)
    6.58 -
    6.59 -ret_from_hypercall:
    6.60          movl %eax,XREGS_eax(%esp)       # save the return value
    6.61  
    6.62  test_all_events:
     7.1 --- a/xen/arch/x86/x86_64/entry.S	Thu Feb 10 02:27:48 2005 +0000
     7.2 +++ b/xen/arch/x86/x86_64/entry.S	Thu Feb 10 11:26:37 2005 +0000
     7.3 @@ -20,8 +20,8 @@
     7.4  
     7.5          ALIGN
     7.6  restore_all_guest:
     7.7 -        testb $TF_failsafe_return,EDOMAIN_thread_flags(%rbx)
     7.8 -        jnz   failsafe_callback
     7.9 +        btr   $_TF_failsafe_return,EDOMAIN_thread_flags(%rbx)
    7.10 +        jc    failsafe_callback
    7.11          RESTORE_ALL
    7.12          testw $TRAP_syscall,4(%rsp)
    7.13          jz    1f
    7.14 @@ -50,9 +50,8 @@ FIX1:   popq  -15*8-8(%rsp)            #
    7.15  DBLFLT1:GET_CURRENT(%rbx)
    7.16          jmp   test_all_events
    7.17  DBLFIX1:GET_CURRENT(%rbx)
    7.18 -        testb $TF_failsafe_return,EDOMAIN_thread_flags(%rbx)
    7.19 -        jnz   domain_crash             # cannot reenter failsafe code
    7.20 -        orb   $TF_failsafe_return,EDOMAIN_thread_flags(%rbx)
    7.21 +        bts   $_TF_failsafe_return,EDOMAIN_thread_flags(%rbx)
    7.22 +        jc    domain_crash             # cannot reenter failsafe code
    7.23          jmp   test_all_events          # will return via failsafe code
    7.24  .previous
    7.25  .section __pre_ex_table,"a"
    7.26 @@ -65,7 +64,6 @@ DBLFIX1:GET_CURRENT(%rbx)
    7.27  /* No special register assumptions */
    7.28  failsafe_callback:
    7.29          GET_CURRENT(%rbx)
    7.30 -        andb $~TF_failsafe_return,EDOMAIN_thread_flags(%rbx)
    7.31          leaq EDOMAIN_trap_bounce(%rbx),%rdx
    7.32          movq EDOMAIN_failsafe_addr(%rbx),%rax
    7.33          movq %rax,TRAPBOUNCE_eip(%rdx)
    7.34 @@ -97,8 +95,7 @@ restore_all_xen:
    7.35   * NB. We must move %r10 to %rcx for C function-calling ABI.
    7.36   */
    7.37          ALIGN
    7.38 -ENTRY(hypercall)
    7.39 -        sti
    7.40 +ENTRY(syscall_enter)
    7.41          movl  $__GUEST_SS,8(%rsp)
    7.42          pushq %r11
    7.43          pushq $__GUEST_CS
    7.44 @@ -106,13 +103,20 @@ ENTRY(hypercall)
    7.45          pushq $0
    7.46          movl  $TRAP_syscall,4(%rsp)
    7.47          SAVE_ALL
    7.48 +        GET_CURRENT(%rbx)
    7.49 +        bts   $_TF_kernel_mode,EDOMAIN_thread_flags(%rbx)
    7.50 +        jc    hypercall
    7.51 +        swapgs
    7.52 +        movq  %rbx,%rdi
    7.53 +        call  SYMBOL_NAME(write_ptbase)
    7.54 +        jmp   restore_all_guest
    7.55 +
    7.56 +hypercall:
    7.57 +        sti
    7.58          movq  %r10,%rcx
    7.59          andq  $(NR_hypercalls-1),%rax
    7.60 -        leaq  SYMBOL_NAME(hypercall_table)(%rip),%rbx
    7.61 -        callq *(%rbx,%rax,8)
    7.62 -        GET_CURRENT(%rbx)
    7.63 -
    7.64 -ret_from_hypercall:
    7.65 +        leaq  SYMBOL_NAME(hypercall_table)(%rip),%r10
    7.66 +        callq *(%r10,%rax,8)
    7.67          movq %rax,XREGS_rax(%rsp)       # save the return value
    7.68  
    7.69  test_all_events:
    7.70 @@ -154,7 +158,7 @@ create_bounce_frame:
    7.71          movq  XREGS_rsp+8(%rsp),%rsi
    7.72          testb $TF_kernel_mode,EDOMAIN_thread_flags(%rbx)
    7.73          jnz   1f
    7.74 -        /* Push new frame at registered guest-OS stack top. */
    7.75 +        /* Push new frame at registered guest-OS stack base. */
    7.76          movq  EDOMAIN_kernel_sp(%rbx),%rsi
    7.77  1:      movq  $HYPERVISOR_VIRT_START,%rax
    7.78          cmpq  %rax,%rsi
    7.79 @@ -203,11 +207,11 @@ FLT15:  movq  %rax,(%rsi)               
    7.80          /* Rewrite our stack frame and return to guest-OS mode. */
    7.81          /* IA32 Ref. Vol. 3: TF, VM, RF and NT flags are cleared on trap. */
    7.82          movb  $0,TRAPBOUNCE_flags(%rdx)
    7.83 -        testb $TF_kernel_mode,EDOMAIN_thread_flags(%rbx)
    7.84 -        jnz   1f
    7.85 -        orb   $TF_kernel_mode,EDOMAIN_thread_flags(%rbx)
    7.86 +        bts   $_TF_kernel_mode,EDOMAIN_thread_flags(%rbx)
    7.87 +        jc    1f
    7.88          swapgs
    7.89 -        /* XXX switch page tables XXX */
    7.90 +        movq  %rbx,%rdi
    7.91 +        call  SYMBOL_NAME(write_ptbase)
    7.92  1:      movl  $TRAP_syscall,XREGS_entry_vector+8(%rsp)
    7.93          andl  $0xfffcbeff,XREGS_eflags+8(%rsp)
    7.94          movl  $__GUEST_SS,XREGS_ss+8(%rsp)
    7.95 @@ -425,7 +429,7 @@ ENTRY(hypercall_table)
    7.96          .quad SYMBOL_NAME(do_set_debugreg)
    7.97          .quad SYMBOL_NAME(do_get_debugreg)
    7.98          .quad SYMBOL_NAME(do_update_descriptor)  /* 10 */
    7.99 -        .quad SYMBOL_NAME(do_ni_hypercall) # do_set_fast_trap
   7.100 +        .quad SYMBOL_NAME(do_ni_hypercall)
   7.101          .quad SYMBOL_NAME(do_dom_mem_op)
   7.102          .quad SYMBOL_NAME(do_multicall)
   7.103          .quad SYMBOL_NAME(do_update_va_mapping)
   7.104 @@ -437,8 +441,9 @@ ENTRY(hypercall_table)
   7.105          .quad SYMBOL_NAME(do_grant_table_op)     /* 20 */
   7.106          .quad SYMBOL_NAME(do_vm_assist)
   7.107          .quad SYMBOL_NAME(do_update_va_mapping_otherdomain)
   7.108 -        .quad SYMBOL_NAME(do_ni_hypercall) # do_switch_vm86
   7.109 +        .quad SYMBOL_NAME(do_switch_to_user)
   7.110          .quad SYMBOL_NAME(do_boot_vcpu)
   7.111 +        .quad SYMBOL_NAME(do_set_segment_base)   /* 25 */
   7.112          .rept NR_hypercalls-((.-hypercall_table)/4)
   7.113          .quad SYMBOL_NAME(do_ni_hypercall)
   7.114          .endr
     8.1 --- a/xen/arch/x86/x86_64/mm.c	Thu Feb 10 02:27:48 2005 +0000
     8.2 +++ b/xen/arch/x86/x86_64/mm.c	Thu Feb 10 11:26:37 2005 +0000
     8.3 @@ -26,7 +26,7 @@
     8.4  #include <asm/page.h>
     8.5  #include <asm/flushtlb.h>
     8.6  #include <asm/fixmap.h>
     8.7 -#include <asm/domain_page.h>
     8.8 +#include <asm/msr.h>
     8.9  
    8.10  void *safe_page_alloc(void)
    8.11  {
    8.12 @@ -238,6 +238,34 @@ long do_stack_switch(unsigned long ss, u
    8.13      return 0;
    8.14  }
    8.15  
    8.16 +long do_set_segment_base(unsigned int which, unsigned long base)
    8.17 +{
    8.18 +    struct exec_domain *ed = current;
    8.19 +
    8.20 +    switch ( which )
    8.21 +    {
    8.22 +    case SEGBASE_FS:
    8.23 +        ed->arch.user_ctxt.fs_base = base;
    8.24 +        wrmsr(MSR_FS_BASE, base, base>>32);
    8.25 +        break;
    8.26 +
    8.27 +    case SEGBASE_GS_USER:
    8.28 +        ed->arch.user_ctxt.gs_base_user = base;
    8.29 +        wrmsr(MSR_SHADOW_GS_BASE, base, base>>32);
    8.30 +        break;
    8.31 +
    8.32 +    case SEGBASE_GS_KERNEL:
    8.33 +        ed->arch.user_ctxt.gs_base_kernel = base;
    8.34 +        wrmsr(MSR_GS_BASE, base, base>>32);
    8.35 +        break;
    8.36 +
    8.37 +    default:
    8.38 +        return -EINVAL;
    8.39 +    }
    8.40 +
    8.41 +    return 0;
    8.42 +}
    8.43 +
    8.44  
    8.45  /* Returns TRUE if given descriptor is valid for GDT or LDT. */
    8.46  int check_descriptor(struct desc_struct *d)
     9.1 --- a/xen/arch/x86/x86_64/traps.c	Thu Feb 10 02:27:48 2005 +0000
     9.2 +++ b/xen/arch/x86/x86_64/traps.c	Thu Feb 10 11:26:37 2005 +0000
     9.3 @@ -153,12 +153,14 @@ asmlinkage void do_double_fault(struct x
     9.4          __asm__ __volatile__ ( "hlt" );
     9.5  }
     9.6  
     9.7 -asmlinkage void hypercall(void);
     9.8 +asmlinkage void syscall_enter(void);
     9.9  void __init percpu_traps_init(void)
    9.10  {
    9.11 -    char *stack_top = (char *)get_stack_top();
    9.12 -    char *stack     = (char *)((unsigned long)stack_top & ~(STACK_SIZE - 1));
    9.13 -    int   cpu       = smp_processor_id();
    9.14 +    char *stack_bottom, *stack;
    9.15 +    int   cpu = smp_processor_id();
    9.16 +
    9.17 +    stack_bottom = (char *)get_stack_bottom();
    9.18 +    stack        = (char *)((unsigned long)stack_bottom & ~(STACK_SIZE - 1));
    9.19  
    9.20      /* Double-fault handler has its own per-CPU 1kB stack. */
    9.21      init_tss[cpu].ist[0] = (unsigned long)&stack[1024];
    9.22 @@ -181,17 +183,17 @@ void __init percpu_traps_init(void)
    9.23      stack[0] = 0x48;
    9.24      stack[1] = 0x89;
    9.25      stack[2] = 0x25;
    9.26 -    *(u32 *)&stack[3] = (stack_top - &stack[7]) - 16;
    9.27 +    *(u32 *)&stack[3] = (stack_bottom - &stack[7]) - 16;
    9.28  
    9.29      /* leaq saversp(%rip), %rsp */
    9.30      stack[7] = 0x48;
    9.31      stack[8] = 0x8d;
    9.32      stack[9] = 0x25;
    9.33 -    *(u32 *)&stack[10] = (stack_top - &stack[14]) - 16;
    9.34 +    *(u32 *)&stack[10] = (stack_bottom - &stack[14]) - 16;
    9.35  
    9.36 -    /* jmp hypercall */
    9.37 +    /* jmp syscall_enter */
    9.38      stack[14] = 0xe9;
    9.39 -    *(u32 *)&stack[15] = (char *)hypercall - &stack[19];
    9.40 +    *(u32 *)&stack[15] = (char *)syscall_enter - &stack[19];
    9.41  
    9.42      /*
    9.43       * Trampoline for SYSCALL entry from compatibility mode.
    9.44 @@ -205,17 +207,17 @@ void __init percpu_traps_init(void)
    9.45      stack[0] = 0x48;
    9.46      stack[1] = 0x89;
    9.47      stack[2] = 0x25;
    9.48 -    *(u32 *)&stack[3] = (stack_top - &stack[7]) - 16;
    9.49 +    *(u32 *)&stack[3] = (stack_bottom - &stack[7]) - 16;
    9.50  
    9.51      /* leaq saversp(%rip), %rsp */
    9.52      stack[7] = 0x48;
    9.53      stack[8] = 0x8d;
    9.54      stack[9] = 0x25;
    9.55 -    *(u32 *)&stack[10] = (stack_top - &stack[14]) - 16;
    9.56 +    *(u32 *)&stack[10] = (stack_bottom - &stack[14]) - 16;
    9.57  
    9.58 -    /* jmp hypercall */
    9.59 +    /* jmp syscall_enter */
    9.60      stack[14] = 0xe9;
    9.61 -    *(u32 *)&stack[15] = (char *)hypercall - &stack[19];
    9.62 +    *(u32 *)&stack[15] = (char *)syscall_enter - &stack[19];
    9.63  
    9.64      /*
    9.65       * Common SYSCALL parameters.
    10.1 --- a/xen/include/asm-x86/domain.h	Thu Feb 10 02:27:48 2005 +0000
    10.2 +++ b/xen/include/asm-x86/domain.h	Thu Feb 10 11:26:37 2005 +0000
    10.3 @@ -98,6 +98,7 @@ struct arch_exec_domain
    10.4       */
    10.5      l1_pgentry_t *perdomain_ptes;
    10.6      pagetable_t  pagetable;
    10.7 +    pagetable_t  pagetable_user;  /* x86/64: user-space pagetable. */
    10.8  
    10.9      pagetable_t  monitor_table;
   10.10      pagetable_t  phys_table;            /* 1:1 pagetable */
    11.1 --- a/xen/include/asm-x86/msr.h	Thu Feb 10 02:27:48 2005 +0000
    11.2 +++ b/xen/include/asm-x86/msr.h	Thu Feb 10 11:26:37 2005 +0000
    11.3 @@ -63,7 +63,7 @@
    11.4  #define MSR_SYSCALL_MASK 0xc0000084	/* EFLAGS mask for syscall */
    11.5  #define MSR_FS_BASE 0xc0000100		/* 64bit GS base */
    11.6  #define MSR_GS_BASE 0xc0000101		/* 64bit FS base */
    11.7 -#define MSR_KERNEL_GS_BASE  0xc0000102	/* SwapGS GS shadow (or USER_GS from kernel) */ 
    11.8 +#define MSR_SHADOW_GS_BASE  0xc0000102	/* SwapGS GS shadow */ 
    11.9  /* EFER bits: */ 
   11.10  #define _EFER_SCE 0  /* SYSCALL/SYSRET */
   11.11  #define _EFER_LME 8  /* Long mode enable */
    12.1 --- a/xen/include/asm-x86/processor.h	Thu Feb 10 02:27:48 2005 +0000
    12.2 +++ b/xen/include/asm-x86/processor.h	Thu Feb 10 11:26:37 2005 +0000
    12.3 @@ -130,8 +130,10 @@
    12.4  #define TBF_FAILSAFE          16
    12.5  
    12.6  /* arch_exec_domain' flags values */
    12.7 -#define TF_failsafe_return     1
    12.8 -#define TF_kernel_mode        2
    12.9 +#define _TF_failsafe_return    0
   12.10 +#define _TF_kernel_mode        1
   12.11 +#define TF_failsafe_return     (1<<_TF_failsafe_return)
   12.12 +#define TF_kernel_mode         (1<<_TF_kernel_mode)
   12.13  
   12.14  #ifndef __ASSEMBLY__
   12.15  
    13.1 --- a/xen/include/asm-x86/x86_32/current.h	Thu Feb 10 02:27:48 2005 +0000
    13.2 +++ b/xen/include/asm-x86/x86_32/current.h	Thu Feb 10 11:26:37 2005 +0000
    13.3 @@ -34,11 +34,11 @@ static inline execution_context_t *get_e
    13.4  }
    13.5  
    13.6  /*
    13.7 - * Get the top-of-stack, as stored in the per-CPU TSS. This is actually
    13.8 - * 20 bytes below the real top of the stack to allow space for:
    13.9 + * Get the bottom-of-stack, as stored in the per-CPU TSS. This is actually
   13.10 + * 20 bytes before the real bottom of the stack to allow space for:
   13.11   *  domain pointer, DS, ES, FS, GS.
   13.12   */
   13.13 -static inline unsigned long get_stack_top(void)
   13.14 +static inline unsigned long get_stack_bottom(void)
   13.15  {
   13.16      unsigned long p;
   13.17      __asm__ ( "andl %%esp,%0; addl %2,%0" 
    14.1 --- a/xen/include/asm-x86/x86_64/current.h	Thu Feb 10 02:27:48 2005 +0000
    14.2 +++ b/xen/include/asm-x86/x86_64/current.h	Thu Feb 10 11:26:37 2005 +0000
    14.3 @@ -34,11 +34,11 @@ static inline execution_context_t *get_e
    14.4  }
    14.5  
    14.6  /*
    14.7 - * Get the top-of-stack, as stored in the per-CPU TSS. This is actually
    14.8 - * 64 bytes below the real top of the stack to allow space for:
    14.9 + * Get the bottom-of-stack, as stored in the per-CPU TSS. This is actually
   14.10 + * 64 bytes before the real bottom of the stack to allow space for:
   14.11   *  domain pointer, DS, ES, FS, GS, FS_BASE, GS_BASE_OS, GS_BASE_APP
   14.12   */
   14.13 -static inline unsigned long get_stack_top(void)
   14.14 +static inline unsigned long get_stack_bottom(void)
   14.15  {
   14.16      unsigned long p;
   14.17      __asm__ ( "orq %%rsp,%0; andq $~7,%0" 
    15.1 --- a/xen/include/public/arch-x86_64.h	Thu Feb 10 02:27:48 2005 +0000
    15.2 +++ b/xen/include/public/arch-x86_64.h	Thu Feb 10 11:26:37 2005 +0000
    15.3 @@ -77,12 +77,38 @@
    15.4  #define HYPERVISOR_VIRT_END   (0xFFFF880000000000UL)
    15.5  #endif
    15.6  
    15.7 +#ifndef __ASSEMBLY__
    15.8 +
    15.9  /* The machine->physical mapping table starts at this address, read-only. */
   15.10  #ifndef machine_to_phys_mapping
   15.11  #define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START)
   15.12  #endif
   15.13  
   15.14 -#ifndef __ASSEMBLY__
   15.15 +/*
   15.16 + * int HYPERVISOR_set_segment_base(unsigned int which, unsigned long base)
   15.17 + *  @which == SEGBASE_*  ;  @base == 64-bit base address
   15.18 + * Returns 0 on success.
   15.19 + */
   15.20 +#define SEGBASE_FS          0
   15.21 +#define SEGBASE_GS_USER     1
   15.22 +#define SEGBASE_GS_KERNEL   2
   15.23 +
   15.24 +/*
   15.25 + * int HYPERVISOR_switch_to_user(void)
   15.26 + *  All arguments are on the kernel stack, in the following format.
   15.27 + * Never returns if successful. Current kernel context is lost.
   15.28 + * If flags contains ECF_IN_SYSCALL:
   15.29 + *   Restore RIP, RFLAGS, RSP. 
   15.30 + *   Discard R11, RCX, CS, SS.
   15.31 + * Otherwise:
   15.32 + *   Restore R11, RCX, CS:RIP, RFLAGS, SS:RSP.
   15.33 + * All other registers are saved on hypercall entry and restored to user.
   15.34 + */
   15.35 +struct switch_to_user {
   15.36 +    /* Top of stack (%rsp at point of hypercall). */
   15.37 +    u64 r11, rcx, flags, rip, cs, rflags, rsp, ss;
   15.38 +    /* Bottom of switch_to_user stack frame. */
   15.39 +} PACKED;
   15.40  
   15.41  /* NB. Both the following are 64 bits each. */
   15.42  typedef unsigned long memory_t;   /* Full-sized pointer/address/memory-size. */
   15.43 @@ -136,8 +162,8 @@ typedef struct xen_regs
   15.44      u64 fs;      /* Non-zero => takes precedence over fs_base.     */
   15.45      u64 gs;      /* Non-zero => takes precedence over gs_base_app. */
   15.46      u64 fs_base;
   15.47 -    u64 gs_base_os;
   15.48 -    u64 gs_base_app;
   15.49 +    u64 gs_base_kernel;
   15.50 +    u64 gs_base_user;
   15.51  } PACKED execution_context_t;
   15.52  
   15.53  typedef u64 tsc_timestamp_t; /* RDTSC timestamp */
    16.1 --- a/xen/include/public/xen.h	Thu Feb 10 02:27:48 2005 +0000
    16.2 +++ b/xen/include/public/xen.h	Thu Feb 10 11:26:37 2005 +0000
    16.3 @@ -42,7 +42,7 @@
    16.4  #define __HYPERVISOR_set_debugreg          8
    16.5  #define __HYPERVISOR_get_debugreg          9
    16.6  #define __HYPERVISOR_update_descriptor    10
    16.7 -#define __HYPERVISOR_set_fast_trap        11
    16.8 +#define __HYPERVISOR_set_fast_trap        11 /* x86/32 only */
    16.9  #define __HYPERVISOR_dom_mem_op           12
   16.10  #define __HYPERVISOR_multicall            13
   16.11  #define __HYPERVISOR_update_va_mapping    14
   16.12 @@ -54,8 +54,10 @@
   16.13  #define __HYPERVISOR_grant_table_op       20
   16.14  #define __HYPERVISOR_vm_assist            21
   16.15  #define __HYPERVISOR_update_va_mapping_otherdomain 22
   16.16 -#define __HYPERVISOR_switch_vm86          23
   16.17 +#define __HYPERVISOR_switch_vm86          23 /* x86/32 only */
   16.18 +#define __HYPERVISOR_switch_to_user       23 /* x86/64 only */
   16.19  #define __HYPERVISOR_boot_vcpu            24
   16.20 +#define __HYPERVISOR_set_segment_base     25 /* x86/64 only */
   16.21  
   16.22  /*
   16.23   * MULTICALLS
   16.24 @@ -118,6 +120,10 @@
   16.25   *   val[7:0] == MMUEXT_NEW_BASEPTR:
   16.26   *   ptr[:2]  -- Machine address of new page-table base to install in MMU.
   16.27   * 
   16.28 + *   val[7:0] == MMUEXT_NEW_USER_BASEPTR: [x86/64 only]
   16.29 + *   ptr[:2]  -- Machine address of new page-table base to install in MMU
   16.30 + *               when in user space.
   16.31 + * 
   16.32   *   val[7:0] == MMUEXT_TLB_FLUSH:
   16.33   *   No additional arguments.
   16.34   * 
   16.35 @@ -166,6 +172,7 @@
   16.36  #define MMUEXT_CLEAR_FOREIGNDOM 11
   16.37  #define MMUEXT_TRANSFER_PAGE    12 /* ptr = MA of frame; val[31:16] = dom    */
   16.38  #define MMUEXT_REASSIGN_PAGE    13
   16.39 +#define MMUEXT_NEW_USER_BASEPTR 14
   16.40  #define MMUEXT_CMD_MASK        255
   16.41  #define MMUEXT_CMD_SHIFT         8
   16.42