ia64/xen-unstable

changeset 8968:697fac283c9e

Add feature flag allowing guest kernel to run in domain 0.

To support this we give kernel GDT entries DPL=0 and use kernel
segment selectors with RPL=0. Xen will crunch these to ring 1 when
they are passed in. When a segment selector is used directly or placed
in a stack frame the guest OS is reponsible for crunching the RPL.

Signed-off-by: Ian Campbell <Ian.Campbell@XenSource.com>
author Ian.Campbell@xensource.com
date Wed Feb 22 19:11:23 2006 +0000 (2006-02-22)
parents f1c75df46b46
children 175ad739d8bc
files linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S linux-2.6-xen-sparse/arch/i386/kernel/process-xen.c linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/segment.h xen/arch/x86/domain.c xen/arch/x86/x86_32/mm.c xen/arch/x86/x86_64/mm.c xen/include/asm-x86/desc.h xen/include/public/version.h
line diff
     1.1 --- a/linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S	Wed Feb 22 19:40:19 2006 +0100
     1.2 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S	Wed Feb 22 19:11:23 2006 +0000
     1.3 @@ -106,8 +106,8 @@ ENTRY(cpu_gdt_table)
     1.4  	.quad 0x0000000000000000	/* 0x53 reserved */
     1.5  	.quad 0x0000000000000000	/* 0x5b reserved */
     1.6  
     1.7 -	.quad 0x00cfbb000000ffff	/* 0x60 kernel 4GB code at 0x00000000 */
     1.8 -	.quad 0x00cfb3000000ffff	/* 0x68 kernel 4GB data at 0x00000000 */
     1.9 +	.quad 0x00cf9b000000ffff	/* 0x60 kernel 4GB code at 0x00000000 */
    1.10 +	.quad 0x00cf93000000ffff	/* 0x68 kernel 4GB data at 0x00000000 */
    1.11  	.quad 0x00cffb000000ffff	/* 0x73 user 4GB code at 0x00000000 */
    1.12  	.quad 0x00cff3000000ffff	/* 0x7b user 4GB data at 0x00000000 */
    1.13  
    1.14 @@ -182,6 +182,7 @@ ENTRY(_stext)
    1.15  	.ascii  ",FEATURES=writable_page_tables"
    1.16  	.ascii	         "|writable_descriptor_tables"
    1.17  	.ascii	         "|auto_translated_physmap"
    1.18 +	.ascii	         "|ring0_kernel"
    1.19  #ifdef CONFIG_X86_PAE
    1.20  	.ascii	",PAE=yes"
    1.21  #else
     2.1 --- a/linux-2.6-xen-sparse/arch/i386/kernel/process-xen.c	Wed Feb 22 19:40:19 2006 +0100
     2.2 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/process-xen.c	Wed Feb 22 19:11:23 2006 +0000
     2.3 @@ -272,7 +272,7 @@ int kernel_thread(int (*fn)(void *), voi
     2.4  	regs.xes = __USER_DS;
     2.5  	regs.orig_eax = -1;
     2.6  	regs.eip = (unsigned long) kernel_thread_helper;
     2.7 -	regs.xcs = __KERNEL_CS;
     2.8 +	regs.xcs = GET_KERNEL_CS();
     2.9  	regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;
    2.10  
    2.11  	/* Ok, create the new process.. */
     3.1 --- a/linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c	Wed Feb 22 19:40:19 2006 +0100
     3.2 +++ b/linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c	Wed Feb 22 19:11:23 2006 +0000
     3.3 @@ -85,7 +85,7 @@ static inline unsigned long get_segment_
     3.4  		return eip + (seg << 4);
     3.5  	
     3.6  	/* By far the most common cases. */
     3.7 -	if (likely(seg == __USER_CS || seg == __KERNEL_CS))
     3.8 +	if (likely(seg == __USER_CS || seg == GET_KERNEL_CS()))
     3.9  		return eip;
    3.10  
    3.11  	/* Check the segment exists, is within the current LDT/GDT size,
    3.12 @@ -396,7 +396,7 @@ good_area:
    3.13  	switch (error_code & 3) {
    3.14  		default:	/* 3: write, present */
    3.15  #ifdef TEST_VERIFY_AREA
    3.16 -			if (regs->cs == KERNEL_CS)
    3.17 +			if (regs->cs == GET_KERNEL_CS())
    3.18  				printk("WP fault at %08lx\n", regs->eip);
    3.19  #endif
    3.20  			/* fall through */
     4.1 --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/segment.h	Wed Feb 22 19:40:19 2006 +0100
     4.2 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/segment.h	Wed Feb 22 19:11:23 2006 +0000
     4.3 @@ -60,10 +60,12 @@
     4.4  #define GDT_ENTRY_KERNEL_BASE	12
     4.5  
     4.6  #define GDT_ENTRY_KERNEL_CS		(GDT_ENTRY_KERNEL_BASE + 0)
     4.7 -#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8 + 1)
     4.8 +#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8)
     4.9 +#define GET_KERNEL_CS() (__KERNEL_CS | (xen_feature(XENFEAT_ring0_kernel)?0:1) )
    4.10  
    4.11  #define GDT_ENTRY_KERNEL_DS		(GDT_ENTRY_KERNEL_BASE + 1)
    4.12 -#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8 + 1)
    4.13 +#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8)
    4.14 +#define GET_KERNEL_DS() (__KERNEL_DS | (xen_feature(XENFEAT_ring0_kernel)?0:1) )
    4.15  
    4.16  #define GDT_ENTRY_TSS			(GDT_ENTRY_KERNEL_BASE + 4)
    4.17  #define GDT_ENTRY_LDT			(GDT_ENTRY_KERNEL_BASE + 5)
     5.1 --- a/xen/arch/x86/domain.c	Wed Feb 22 19:40:19 2006 +0100
     5.2 +++ b/xen/arch/x86/domain.c	Wed Feb 22 19:11:23 2006 +0000
     5.3 @@ -356,7 +356,8 @@ int arch_set_info_guest(
     5.4       */
     5.5      if ( !(c->flags & VGCF_HVM_GUEST) )
     5.6      {
     5.7 -        if ( ((c->user_regs.ss & 3) == 0) ||
     5.8 +        if ( !VALID_STACKSEL(c->user_regs.ss) ||
     5.9 +             !VALID_STACKSEL(c->kernel_ss) ||
    5.10               !VALID_CODESEL(c->user_regs.cs) )
    5.11              return -EINVAL;
    5.12  
     6.1 --- a/xen/arch/x86/x86_32/mm.c	Wed Feb 22 19:40:19 2006 +0100
     6.2 +++ b/xen/arch/x86/x86_32/mm.c	Wed Feb 22 19:11:23 2006 +0000
     6.3 @@ -223,7 +223,7 @@ long do_stack_switch(unsigned long ss, u
     6.4      int nr = smp_processor_id();
     6.5      struct tss_struct *t = &init_tss[nr];
     6.6  
     6.7 -    if ( (ss & 3) != 1 )
     6.8 +    if ( !VALID_STACKSEL(ss) )
     6.9          return -EPERM;
    6.10  
    6.11      current->arch.guest_context.kernel_ss = ss;
    6.12 @@ -239,6 +239,7 @@ int check_descriptor(struct desc_struct 
    6.13  {
    6.14      unsigned long base, limit;
    6.15      u32 a = d->a, b = d->b;
    6.16 +    u16 cs = a>>16;
    6.17  
    6.18      /* A not-present descriptor will always fault, so is safe. */
    6.19      if ( !(b & _SEGMENT_P) ) 
    6.20 @@ -251,7 +252,7 @@ int check_descriptor(struct desc_struct 
    6.21       * DPL 0 -- this would get the OS ring-0 privileges).
    6.22       */
    6.23      if ( (b & _SEGMENT_DPL) == 0 )
    6.24 -        goto bad;
    6.25 +        d->b = b = b | (0x01<<13); /* Force DPL == 1 */
    6.26  
    6.27      if ( !(b & _SEGMENT_S) )
    6.28      {
    6.29 @@ -272,9 +273,17 @@ int check_descriptor(struct desc_struct 
    6.30              goto bad;
    6.31  
    6.32          /* Can't allow far jump to a Xen-private segment. */
    6.33 -        if ( !VALID_CODESEL(a>>16) )
    6.34 +        if ( !VALID_CODESEL(cs) )
    6.35              goto bad;
    6.36  
    6.37 +        /*
    6.38 +         * VALID_CODESEL might have fixed up the RPL for us. So be sure to
    6.39 +         * update the descriptor.
    6.40 +         *
    6.41 +         */
    6.42 +        d->a &= 0x0000ffff;
    6.43 +        d->a |= cs<<16;
    6.44 +
    6.45          /* Reserved bits must be zero. */
    6.46          if ( (b & 0xe0) != 0 )
    6.47              goto bad;
     7.1 --- a/xen/arch/x86/x86_64/mm.c	Wed Feb 22 19:40:19 2006 +0100
     7.2 +++ b/xen/arch/x86/x86_64/mm.c	Wed Feb 22 19:11:23 2006 +0000
     7.3 @@ -292,6 +292,7 @@ long do_set_segment_base(unsigned int wh
     7.4  int check_descriptor(struct desc_struct *d)
     7.5  {
     7.6      u32 a = d->a, b = d->b;
     7.7 +    u16 cs = a>>16;
     7.8  
     7.9      /* A not-present descriptor will always fault, so is safe. */
    7.10      if ( !(b & _SEGMENT_P) ) 
    7.11 @@ -314,9 +315,17 @@ int check_descriptor(struct desc_struct 
    7.12          goto bad;
    7.13  
    7.14      /* Can't allow far jump to a Xen-private segment. */
    7.15 -    if ( !VALID_CODESEL(a>>16) )
    7.16 +    if ( !VALID_CODESEL(cs) )
    7.17          goto bad;
    7.18  
    7.19 +    /*
    7.20 +     * VALID_CODESEL might have fixed up the RPL for us. So be sure to
    7.21 +     * update the descriptor.
    7.22 +     *
    7.23 +     */
    7.24 +    d->a &= 0x0000ffff;
    7.25 +    d->a |= cs<<16;
    7.26 +
    7.27      /* Reserved bits must be zero. */
    7.28      if ( (b & 0xe0) != 0 )
    7.29          goto bad;
     8.1 --- a/xen/include/asm-x86/desc.h	Wed Feb 22 19:40:19 2006 +0100
     8.2 +++ b/xen/include/asm-x86/desc.h	Wed Feb 22 19:11:23 2006 +0000
     8.3 @@ -35,7 +35,14 @@
     8.4  #define VALID_SEL(_s)                                                      \
     8.5      (((((_s)>>3) < FIRST_RESERVED_GDT_ENTRY) || ((_s)&4)) &&               \
     8.6       (((_s)&3) == GUEST_KERNEL_RPL))
     8.7 -#define VALID_CODESEL(_s) ((_s) == FLAT_KERNEL_CS || VALID_SEL(_s))
     8.8 +#define VALID_CODESEL(_s) ({                                               \
     8.9 +    if ( ((_s) & 3) == 0 )                                                 \
    8.10 +        (_s) |= GUEST_KERNEL_RPL;                                          \
    8.11 +    (_s) == FLAT_KERNEL_CS || VALID_SEL(_s); })
    8.12 +#define VALID_STACKSEL(_s) ({                                              \
    8.13 +    if ( ((_s) & 3) == 0 )                                                 \
    8.14 +        (_s) |= GUEST_KERNEL_RPL;                                          \
    8.15 +    (_s) == FLAT_KERNEL_SS || VALID_SEL(_s); })
    8.16  
    8.17  /* These are bitmasks for the high 32 bits of a descriptor table entry. */
    8.18  #define _SEGMENT_TYPE    (15<< 8)
     9.1 --- a/xen/include/public/version.h	Wed Feb 22 19:40:19 2006 +0100
     9.2 +++ b/xen/include/public/version.h	Wed Feb 22 19:11:23 2006 +0000
     9.3 @@ -51,6 +51,7 @@ typedef struct xen_feature_info {
     9.4  #define XENFEAT_writable_page_tables       0
     9.5  #define XENFEAT_writable_descriptor_tables 1
     9.6  #define XENFEAT_auto_translated_physmap    2
     9.7 +#define XENFEAT_ring0_kernel               3
     9.8  
     9.9  #define XENFEAT_NR_SUBMAPS 1
    9.10