ia64/xen-unstable

changeset 4140:6a7120b3405b

bitkeeper revision 1.1236.25.22 (4235e4fbkgMjr8FNbH_NCE7pnfV92g)

Inform guest kernel whether it interrupts kernel or user context
by using RPL of saved CS selector (0 == kernel context;
3 == user context). Add some security checking and enforcement to
switch_to_user hypercall.
author kaf24@firebug.cl.cam.ac.uk
date Mon Mar 14 19:24:43 2005 +0000 (2005-03-14)
parents a4136a26fa6d
children 288c77d96e81
files xen/arch/x86/domain.c xen/arch/x86/x86_32/mm.c xen/arch/x86/x86_64/entry.S xen/arch/x86/x86_64/mm.c xen/include/asm-x86/desc.h
line diff
     1.1 --- a/xen/arch/x86/domain.c	Mon Mar 14 18:30:47 2005 +0000
     1.2 +++ b/xen/arch/x86/domain.c	Mon Mar 14 19:24:43 2005 +0000
     1.3 @@ -655,11 +655,10 @@ static void switch_segments(
     1.4              (unsigned long *)regs->rsp : 
     1.5              (unsigned long *)n->arch.kernel_sp;
     1.6  
     1.7 -        /* Set the kernel-mode indicator byte at the top of RFLAGS. */
     1.8 -        ((char *)regs->rflags)[7] = !!(n->arch.flags & TF_kernel_mode);
     1.9 -
    1.10          if ( !(n->arch.flags & TF_kernel_mode) )
    1.11              toggle_guest_mode(n);
    1.12 +        else
    1.13 +            regs->cs &= ~3;
    1.14  
    1.15          if ( put_user(regs->ss,     rsp- 1) |
    1.16               put_user(regs->rsp,    rsp- 2) |
    1.17 @@ -699,10 +698,10 @@ long do_switch_to_user(void)
    1.18      toggle_guest_mode(ed);
    1.19  
    1.20      regs->rip    = stu.rip;
    1.21 -    regs->cs     = stu.cs;
    1.22 +    regs->cs     = stu.cs | 3; /* force guest privilege */
    1.23      regs->rflags = stu.rflags;
    1.24      regs->rsp    = stu.rsp;
    1.25 -    regs->ss     = stu.ss;
    1.26 +    regs->ss     = stu.ss | 3; /* force guest privilege */
    1.27  
    1.28      if ( !(stu.flags & ECF_IN_SYSCALL) )
    1.29      {
     2.1 --- a/xen/arch/x86/x86_32/mm.c	Mon Mar 14 18:30:47 2005 +0000
     2.2 +++ b/xen/arch/x86/x86_32/mm.c	Mon Mar 14 19:24:43 2005 +0000
     2.3 @@ -202,8 +202,7 @@ long do_stack_switch(unsigned long ss, u
     2.4      int nr = smp_processor_id();
     2.5      struct tss_struct *t = &init_tss[nr];
     2.6  
     2.7 -    /* We need to do this check as we load and use SS on guest's behalf. */
     2.8 -    if ( (ss & 3) == 0 )
     2.9 +    if ( (ss & 3) != 1 )
    2.10          return -EPERM;
    2.11  
    2.12      current->arch.kernel_ss = ss;
    2.13 @@ -275,21 +274,9 @@ int check_descriptor(struct desc_struct 
    2.14      if ( (b & _SEGMENT_G) )
    2.15          limit <<= 12;
    2.16  
    2.17 -    if ( (b & (_SEGMENT_CODE | _SEGMENT_EC)) == _SEGMENT_EC )
    2.18 +    switch ( b & (_SEGMENT_CODE | _SEGMENT_EC) )
    2.19      {
    2.20 -        /*
    2.21 -         * Grows-down limit check. 
    2.22 -         * NB. limit == 0xFFFFF provides no access      (if G=1).
    2.23 -         *     limit == 0x00000 provides 4GB-4kB access (if G=1).
    2.24 -         */
    2.25 -        if ( (base + limit) > base )
    2.26 -        {
    2.27 -            limit = -(base & PAGE_MASK);
    2.28 -            goto truncate;
    2.29 -        }
    2.30 -    }
    2.31 -    else
    2.32 -    {
    2.33 +    case 0: /* Data segment, grows-up */
    2.34          /*
    2.35           * Grows-up limit check.
    2.36           * NB. limit == 0xFFFFF provides 4GB access (if G=1).
    2.37 @@ -306,6 +293,23 @@ int check_descriptor(struct desc_struct 
    2.38              d->a &= ~0x0ffff; d->a |= limit & 0x0ffff;
    2.39              d->b &= ~0xf0000; d->b |= limit & 0xf0000;
    2.40          }
    2.41 +        goto good;
    2.42 +    case _SEGMENT_EC: /* Data segment, grows-down */
    2.43 +        /*
    2.44 +         * Grows-down limit check. 
    2.45 +         * NB. limit == 0xFFFFF provides no access      (if G=1).
    2.46 +         *     limit == 0x00000 provides 4GB-4kB access (if G=1).
    2.47 +         */
    2.48 +        if ( (base + limit) > base )
    2.49 +        {
    2.50 +            limit = -(base & PAGE_MASK);
    2.51 +            goto truncate;
    2.52 +        }
    2.53 +        goto good;
    2.54 +    case _SEGMENT_CODE: /* Code segment, non-conforming */
    2.55 +        goto good;
    2.56 +    case _SEGMENT_CODE|_SEGMENT_EC: /* Code segment, conforming */
    2.57 +        goto bad;
    2.58      }
    2.59  
    2.60   good:
     3.1 --- a/xen/arch/x86/x86_64/entry.S	Mon Mar 14 18:30:47 2005 +0000
     3.2 +++ b/xen/arch/x86/x86_64/entry.S	Mon Mar 14 19:24:43 2005 +0000
     3.3 @@ -162,20 +162,19 @@ process_softirqs:
     3.4  /* %rdx: trap_bounce, %rbx: struct exec_domain                           */
     3.5  /* On return only %rbx is guaranteed non-clobbered.                      */
     3.6  create_bounce_frame:
     3.7 -        /* Push new frame at existing %rsp if already in guest-OS mode. */
     3.8 -        movq  XREGS_rsp+8(%rsp),%rsi
     3.9          testb $TF_kernel_mode,EDOMAIN_thread_flags(%rbx)
    3.10 -        /* Set kernel-mode indicator byte (RFLAGS[63:56]). */
    3.11 -        setnz XREGS_eflags+15(%rsp)
    3.12          jnz   1f
    3.13          /* Push new frame at registered guest-OS stack base. */
    3.14 -        /* Then call to C: toggle_guest_mode(current) */
    3.15          movq  EDOMAIN_kernel_sp(%rbx),%rsi
    3.16 +        pushq %rdx
    3.17          movq  %rbx,%rdi
    3.18 -        pushq %rdx
    3.19          call  SYMBOL_NAME(toggle_guest_mode)
    3.20          popq  %rdx
    3.21 -1:      movq  $HYPERVISOR_VIRT_START,%rax
    3.22 +        jmp   2f
    3.23 +1:      /* In kernel context already: push new frame at existing %rsp. */
    3.24 +        movq  XREGS_rsp+8(%rsp),%rsi
    3.25 +        andb  $0xfc,XREGS_cs+8(%rsp)    # Indicate kernel context to guest.
    3.26 +2:      movq  $HYPERVISOR_VIRT_START,%rax
    3.27          cmpq  %rax,%rsi
    3.28          jb    1f                        # In +ve address space? Then okay.
    3.29          movq  $HYPERVISOR_VIRT_END+60,%rax
     4.1 --- a/xen/arch/x86/x86_64/mm.c	Mon Mar 14 18:30:47 2005 +0000
     4.2 +++ b/xen/arch/x86/x86_64/mm.c	Mon Mar 14 19:24:43 2005 +0000
     4.3 @@ -287,9 +287,14 @@ int check_descriptor(struct desc_struct 
     4.4      if ( (b & _SEGMENT_DPL) != 3 )
     4.5          goto bad;
     4.6  
     4.7 -    /* Any code or data segment is okay. No base/limit checking. */
     4.8 +    /* Most code and data segments are okay. No base/limit checking. */
     4.9      if ( (b & _SEGMENT_S) )
    4.10 +    {
    4.11 +        /* Disallow conforming code segments. I'm not sure they're safe. */
    4.12 +        if ( (b & (_SEGMENT_CODE|_SEGMENT_EC)) == (_SEGMENT_CODE|_SEGMENT_EC) )
    4.13 +            goto bad;
    4.14          goto good;
    4.15 +    }
    4.16  
    4.17      /* Invalid type 0 is harmless. It is used for 2nd half of a call gate. */
    4.18      if ( (b & _SEGMENT_TYPE) == 0x000 )
     5.1 --- a/xen/include/asm-x86/desc.h	Mon Mar 14 18:30:47 2005 +0000
     5.2 +++ b/xen/include/asm-x86/desc.h	Mon Mar 14 19:24:43 2005 +0000
     5.3 @@ -6,18 +6,23 @@
     5.4  
     5.5  #define load_TR(n)  __asm__ __volatile__ ("ltr  %%ax" : : "a" (__TSS(n)<<3) )
     5.6  
     5.7 +#if defined(__x86_64__)
     5.8 +#define GUEST_KERNEL_RPL 3
     5.9 +#elif defined(__i386__)
    5.10 +#define GUEST_KERNEL_RPL 1
    5.11 +#endif
    5.12 +
    5.13  /*
    5.14 - * Guest OS must provide its own code selectors, or use the one we provide. The
    5.15 - * RPL must be 1, as we only create bounce frames to ring 1. Any LDT selector
    5.16 - * value is okay. Note that checking only the RPL is insufficient: if the
    5.17 - * selector is poked into an interrupt, trap or call gate then the RPL is
    5.18 - * ignored when the gate is accessed.
    5.19 + * Guest OS must provide its own code selectors, or use the one we provide. Any
    5.20 + * LDT selector value is okay. Note that checking only the RPL is insufficient:
    5.21 + * if the selector is poked into an interrupt, trap or call gate then the RPL
    5.22 + * is ignored when the gate is accessed.
    5.23   */
    5.24  #define VALID_SEL(_s)                                                      \
    5.25      (((((_s)>>3) < FIRST_RESERVED_GDT_ENTRY) ||                            \
    5.26        (((_s)>>3) >  LAST_RESERVED_GDT_ENTRY) ||                            \
    5.27        ((_s)&4)) &&                                                         \
    5.28 -     (((_s)&3) == 1))
    5.29 +     (((_s)&3) == GUEST_KERNEL_RPL))
    5.30  #define VALID_CODESEL(_s) ((_s) == FLAT_KERNEL_CS || VALID_SEL(_s))
    5.31  
    5.32  /* These are bitmasks for the high 32 bits of a descriptor table entry. */