ia64/xen-unstable

changeset 1816:4e0d79404699

bitkeeper revision 1.1104.1.1 (40f923322G2jO4f0TVh9AXW3jpr9bQ)

Initial Xen support for 4GB segments thru instruction emulation.
The instruction decoder needs some refactoring as there is lots of
duplicated crufty code in there right now. Also, the TLS libraries
hit the emulator a LOT, but mainly with one or two instructions. Probably
we need to patch those within Linux.
author kaf24@scramble.cl.cam.ac.uk
date Sat Jul 17 13:01:38 2004 +0000 (2004-07-17)
parents ae33aca75a3e
children 0366608703aa
files .rootkeys linux-2.4.26-xen-sparse/arch/xen/kernel/ldt.c linux-2.6.7-xen-sparse/arch/xen/i386/kernel/ldt.c linux-2.6.7-xen-sparse/arch/xen/i386/kernel/process.c xen/arch/x86/dom0_ops.c xen/arch/x86/memory.c xen/arch/x86/traps.c xen/arch/x86/x86_32/emulate.c xen/arch/x86/x86_32/mm.c xen/include/asm-x86/desc.h xen/include/asm-x86/mm.h xen/include/asm-x86/processor.h xen/include/xen/perfc_defn.h
line diff
     1.1 --- a/.rootkeys	Fri Jul 16 20:04:00 2004 +0000
     1.2 +++ b/.rootkeys	Sat Jul 17 13:01:38 2004 +0000
     1.3 @@ -446,6 +446,7 @@ 3ddb79bc-Udq7ol-NX4q9XsYnN7A2Q xen/arch/
     1.4  3ddb79bccYVzXZJyVaxuv5T42Z1Fsw xen/arch/x86/trampoline.S
     1.5  3ddb79bcOftONV9h4QCxXOfiT0h91w xen/arch/x86/traps.c
     1.6  3e32af9aRnYGl4GMOaDKp7JdfhOGhg xen/arch/x86/x86_32/domain_page.c
     1.7 +40f92331jfOlE7MfKwpdkEb1CEf23g xen/arch/x86/x86_32/emulate.c
     1.8  3ddb79bcecupHj56ZbTa3B0FxDowMg xen/arch/x86/x86_32/entry.S
     1.9  3ddb79bcHwuCQDjBICDTSis52hWguw xen/arch/x86/x86_32/mm.c
    1.10  3ddb79bc4nTpGQOe6_-MbyZzkhlhFQ xen/arch/x86/x86_32/usercopy.c
     2.1 --- a/linux-2.4.26-xen-sparse/arch/xen/kernel/ldt.c	Fri Jul 16 20:04:00 2004 +0000
     2.2 +++ b/linux-2.4.26-xen-sparse/arch/xen/kernel/ldt.c	Sat Jul 17 13:01:38 2004 +0000
     2.3 @@ -156,132 +156,116 @@ static int read_ldt(void * ptr, unsigned
     2.4  	return bytecount;
     2.5  }
     2.6  
     2.7 -
     2.8  static int read_default_ldt(void * ptr, unsigned long bytecount)
     2.9  {
    2.10 -    int err;
    2.11 -    unsigned long size;
    2.12 -    void *address;
    2.13 +	int err;
    2.14 +	unsigned long size;
    2.15 +	void *address;
    2.16  
    2.17 -    err = 0;
    2.18 -    address = &default_ldt[0];
    2.19 -    size = 5*sizeof(struct desc_struct);
    2.20 -    if (size > bytecount)
    2.21 -        size = bytecount;
    2.22 +	err = 0;
    2.23 +	address = &default_ldt[0];
    2.24 +	size = 5*sizeof(struct desc_struct);
    2.25 +	if (size > bytecount)
    2.26 +		size = bytecount;
    2.27  
    2.28 -    err = size;
    2.29 -    if (copy_to_user(ptr, address, size))
    2.30 -        err = -EFAULT;
    2.31 +	err = size;
    2.32 +	if (copy_to_user(ptr, address, size))
    2.33 +		err = -EFAULT;
    2.34  
    2.35 -    return err;
    2.36 +	return err;
    2.37  }
    2.38  
    2.39  static int write_ldt(void * ptr, unsigned long bytecount, int oldmode)
    2.40  {
    2.41 -    struct mm_struct * mm = current->mm;
    2.42 -    __u32 entry_1, entry_2, *lp;
    2.43 -    unsigned long phys_lp, max_limit;
    2.44 -    int error;
    2.45 -    struct modify_ldt_ldt_s ldt_info;
    2.46 +	struct mm_struct * mm = current->mm;
    2.47 +	__u32 entry_1, entry_2, *lp;
    2.48 +	unsigned long phys_lp;
    2.49 +	int error;
    2.50 +	struct modify_ldt_ldt_s ldt_info;
    2.51  
    2.52 -    error = -EINVAL;
    2.53 -    if (bytecount != sizeof(ldt_info))
    2.54 -        goto out;
    2.55 -    error = -EFAULT; 	
    2.56 -    if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info)))
    2.57 -        goto out;
    2.58 +	error = -EINVAL;
    2.59 +	if (bytecount != sizeof(ldt_info))
    2.60 +		goto out;
    2.61 +	error = -EFAULT; 	
    2.62 +	if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info)))
    2.63 +		goto out;
    2.64  
    2.65 -    error = -EINVAL;
    2.66 -    if (ldt_info.entry_number >= LDT_ENTRIES)
    2.67 -        goto out;
    2.68 -    if (ldt_info.contents == 3) {
    2.69 -        if (oldmode)
    2.70 -            goto out;
    2.71 -        if (ldt_info.seg_not_present == 0)
    2.72 -            goto out;
    2.73 -    }
    2.74 +	error = -EINVAL;
    2.75 +	if (ldt_info.entry_number >= LDT_ENTRIES)
    2.76 +		goto out;
    2.77 +	if (ldt_info.contents == 3) {
    2.78 +		if (oldmode)
    2.79 +			goto out;
    2.80 +		if (ldt_info.seg_not_present == 0)
    2.81 +			goto out;
    2.82 +	}
    2.83  
    2.84 -    /*
    2.85 -     * This makes our tests for overlap with Xen space easier. There's no good
    2.86 -     * reason to have a user segment starting this high anyway.
    2.87 -     */
    2.88 -    if (ldt_info.base_addr >= PAGE_OFFSET)
    2.89 -        goto out;
    2.90 +	down(&mm->context.sem);
    2.91 +	if (ldt_info.entry_number >= mm->context.size) {
    2.92 +		error = alloc_ldt(&current->mm->context, ldt_info.entry_number+1, 1);
    2.93 +		if (error < 0)
    2.94 +			goto out_unlock;
    2.95 +	}
    2.96  
    2.97 -    down(&mm->context.sem);
    2.98 -    if (ldt_info.entry_number >= mm->context.size) {
    2.99 -      error = alloc_ldt(&current->mm->context, ldt_info.entry_number+1, 1);
   2.100 -      if (error < 0)
   2.101 -	goto out_unlock;
   2.102 -    }
   2.103 -
   2.104 -
   2.105 -    lp = (__u32 *)((ldt_info.entry_number<<3) + (char *)mm->context.ldt);
   2.106 -    phys_lp = arbitrary_virt_to_phys(lp);
   2.107 +	lp = (__u32 *) ((ldt_info.entry_number << 3) + (char *) mm->context.ldt);
   2.108 +	phys_lp = arbitrary_virt_to_phys(lp);
   2.109  
   2.110 -    /* Allow LDTs to be cleared by the user. */
   2.111 -    if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
   2.112 -        if (oldmode ||
   2.113 -            (ldt_info.contents == 0		&&
   2.114 -             ldt_info.read_exec_only == 1	&&
   2.115 -             ldt_info.seg_32bit == 0		&&
   2.116 -             ldt_info.limit_in_pages == 0	&&
   2.117 -             ldt_info.seg_not_present == 1	&&
   2.118 -             ldt_info.useable == 0 )) {
   2.119 -            entry_1 = 0;
   2.120 -            entry_2 = 0;
   2.121 -            goto install;
   2.122 -        }
   2.123 -    }
   2.124 -
   2.125 -    max_limit = HYPERVISOR_VIRT_START - ldt_info.base_addr;
   2.126 -    if ( ldt_info.limit_in_pages )
   2.127 -        max_limit >>= PAGE_SHIFT;
   2.128 -    max_limit--;
   2.129 -    if ( (ldt_info.limit & 0xfffff) > (max_limit & 0xfffff) )
   2.130 -        ldt_info.limit = max_limit;
   2.131 +   	/* Allow LDTs to be cleared by the user. */
   2.132 +   	if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
   2.133 +		if (oldmode ||
   2.134 +		    (ldt_info.contents == 0		&&
   2.135 +		     ldt_info.read_exec_only == 1	&&
   2.136 +		     ldt_info.seg_32bit == 0		&&
   2.137 +		     ldt_info.limit_in_pages == 0	&&
   2.138 +		     ldt_info.seg_not_present == 1	&&
   2.139 +		     ldt_info.useable == 0 )) {
   2.140 +			entry_1 = 0;
   2.141 +			entry_2 = 0;
   2.142 +			goto install;
   2.143 +		}
   2.144 +	}
   2.145  
   2.146 -    entry_1 = ((ldt_info.base_addr & 0x0000ffff) << 16) |
   2.147 -        (ldt_info.limit & 0x0ffff);
   2.148 -    entry_2 = (ldt_info.base_addr & 0xff000000) |
   2.149 -        ((ldt_info.base_addr & 0x00ff0000) >> 16) |
   2.150 -        (ldt_info.limit & 0xf0000) |
   2.151 -        ((ldt_info.read_exec_only ^ 1) << 9) |
   2.152 -        (ldt_info.contents << 10) |
   2.153 -        ((ldt_info.seg_not_present ^ 1) << 15) |
   2.154 -        (ldt_info.seg_32bit << 22) |
   2.155 -        (ldt_info.limit_in_pages << 23) |
   2.156 -        0x7000;
   2.157 -    if (!oldmode)
   2.158 -        entry_2 |= (ldt_info.useable << 20);
   2.159 +	entry_1 = ((ldt_info.base_addr & 0x0000ffff) << 16) |
   2.160 +		  (ldt_info.limit & 0x0ffff);
   2.161 +	entry_2 = (ldt_info.base_addr & 0xff000000) |
   2.162 +		  ((ldt_info.base_addr & 0x00ff0000) >> 16) |
   2.163 +		  (ldt_info.limit & 0xf0000) |
   2.164 +		  ((ldt_info.read_exec_only ^ 1) << 9) |
   2.165 +		  (ldt_info.contents << 10) |
   2.166 +		  ((ldt_info.seg_not_present ^ 1) << 15) |
   2.167 +		  (ldt_info.seg_32bit << 22) |
   2.168 +		  (ldt_info.limit_in_pages << 23) |
   2.169 +		  0x7000;
   2.170 +	if (!oldmode)
   2.171 +		entry_2 |= (ldt_info.useable << 20);
   2.172  
   2.173 -    /* Install the new entry ...  */
   2.174 - install:
   2.175 -    error = HYPERVISOR_update_descriptor(phys_lp, entry_1, entry_2);
   2.176 +	/* Install the new entry ...  */
   2.177 +install:
   2.178 +	error = HYPERVISOR_update_descriptor(phys_lp, entry_1, entry_2);
   2.179  
   2.180 - out_unlock:
   2.181 -    up(&mm->context.sem);
   2.182 - out:
   2.183 -    return error;
   2.184 +out_unlock:
   2.185 +	up(&mm->context.sem);
   2.186 +out:
   2.187 +	return error;
   2.188  }
   2.189  
   2.190  asmlinkage int sys_modify_ldt(int func, void *ptr, unsigned long bytecount)
   2.191  {
   2.192 -    int ret = -ENOSYS;
   2.193 +	int ret = -ENOSYS;
   2.194  
   2.195 -    switch (func) {
   2.196 -    case 0:
   2.197 -        ret = read_ldt(ptr, bytecount);
   2.198 -        break;
   2.199 -    case 1:
   2.200 -        ret = write_ldt(ptr, bytecount, 1);
   2.201 -        break;
   2.202 -    case 2:
   2.203 -        ret = read_default_ldt(ptr, bytecount);
   2.204 -        break;
   2.205 -    case 0x11:
   2.206 -        ret = write_ldt(ptr, bytecount, 0);
   2.207 -        break;
   2.208 -    }
   2.209 -    return ret;
   2.210 +	switch (func) {
   2.211 +	case 0:
   2.212 +		ret = read_ldt(ptr, bytecount);
   2.213 +		break;
   2.214 +	case 1:
   2.215 +		ret = write_ldt(ptr, bytecount, 1);
   2.216 +		break;
   2.217 +	case 2:
   2.218 +		ret = read_default_ldt(ptr, bytecount);
   2.219 +		break;
   2.220 +	case 0x11:
   2.221 +		ret = write_ldt(ptr, bytecount, 0);
   2.222 +		break;
   2.223 +	}
   2.224 +	return ret;
   2.225  }
     3.1 --- a/linux-2.6.7-xen-sparse/arch/xen/i386/kernel/ldt.c	Fri Jul 16 20:04:00 2004 +0000
     3.2 +++ b/linux-2.6.7-xen-sparse/arch/xen/i386/kernel/ldt.c	Sat Jul 17 13:01:38 2004 +0000
     3.3 @@ -182,7 +182,7 @@ static int write_ldt(void __user * ptr, 
     3.4  {
     3.5  	struct mm_struct * mm = current->mm;
     3.6  	__u32 entry_1, entry_2, *lp;
     3.7 -	unsigned long phys_lp, max_limit;
     3.8 +	unsigned long phys_lp;
     3.9  	int error;
    3.10  	struct user_desc ldt_info;
    3.11  
    3.12 @@ -203,14 +203,6 @@ static int write_ldt(void __user * ptr, 
    3.13  			goto out;
    3.14  	}
    3.15  
    3.16 -	/*
    3.17 -	 * This makes our tests for overlap with Xen space
    3.18 -	 * easier. There's no good reason to have a user segment
    3.19 -	 * starting this high anyway.
    3.20 -	 */
    3.21 -	if (ldt_info.base_addr >= PAGE_OFFSET)
    3.22 -		goto out;
    3.23 -
    3.24  	down(&mm->context.sem);
    3.25  	if (ldt_info.entry_number >= mm->context.size) {
    3.26  		error = alloc_ldt(&current->mm->context, ldt_info.entry_number+1, 1);
    3.27 @@ -230,13 +222,6 @@ static int write_ldt(void __user * ptr, 
    3.28  		}
    3.29  	}
    3.30  
    3.31 -	max_limit = HYPERVISOR_VIRT_START - ldt_info.base_addr;
    3.32 -	if (ldt_info.limit_in_pages)
    3.33 -		max_limit >>= PAGE_SHIFT;
    3.34 -	max_limit--;
    3.35 -	if ((ldt_info.limit & 0xfffff) > (max_limit & 0xfffff))
    3.36 -		ldt_info.limit = max_limit;
    3.37 -
    3.38  	entry_1 = LDT_entry_a(&ldt_info);
    3.39  	entry_2 = LDT_entry_b(&ldt_info);
    3.40  	if (oldmode)
     4.1 --- a/linux-2.6.7-xen-sparse/arch/xen/i386/kernel/process.c	Fri Jul 16 20:04:00 2004 +0000
     4.2 +++ b/linux-2.6.7-xen-sparse/arch/xen/i386/kernel/process.c	Sat Jul 17 13:01:38 2004 +0000
     4.3 @@ -331,24 +331,6 @@ void prepare_to_copy(struct task_struct 
     4.4  	unlazy_fpu(tsk);
     4.5  }
     4.6  
     4.7 -/* NB. This Xen-specific function is inlined in 'write_ldt'. */
     4.8 -static int truncate_user_desc(struct user_desc *info)
     4.9 -{
    4.10 -	unsigned long max_limit;
    4.11 -
    4.12 -	if (info->base_addr >= PAGE_OFFSET)
    4.13 -		return 0;
    4.14 -
    4.15 -	max_limit = HYPERVISOR_VIRT_START - info->base_addr;
    4.16 -	if (info->limit_in_pages)
    4.17 -		max_limit >>= PAGE_SHIFT;
    4.18 -	max_limit--;
    4.19 -	if ((info->limit & 0xfffff) > (max_limit & 0xfffff))
    4.20 -		info->limit = max_limit;
    4.21 -
    4.22 -	return 1;
    4.23 -}
    4.24 -
    4.25  int copy_thread(int nr, unsigned long clone_flags, unsigned long esp,
    4.26  	unsigned long unused,
    4.27  	struct task_struct * p, struct pt_regs * regs)
    4.28 @@ -399,9 +381,6 @@ int copy_thread(int nr, unsigned long cl
    4.29  		if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
    4.30  			goto out;
    4.31  
    4.32 -		if (!truncate_user_desc(&info))
    4.33 -			goto out;
    4.34 -
    4.35  		desc = p->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
    4.36  		desc->a = LDT_entry_a(&info);
    4.37  		desc->b = LDT_entry_b(&info);
    4.38 @@ -717,9 +696,6 @@ asmlinkage int sys_set_thread_area(struc
    4.39  		return -EFAULT;
    4.40  	idx = info.entry_number;
    4.41  
    4.42 -	if (!truncate_user_desc(&info))
    4.43 -		return -EINVAL;
    4.44 -
    4.45  	/*
    4.46  	 * index -1 means the kernel should try to find and
    4.47  	 * allocate an empty descriptor:
     5.1 --- a/xen/arch/x86/dom0_ops.c	Fri Jul 16 20:04:00 2004 +0000
     5.2 +++ b/xen/arch/x86/dom0_ops.c	Sat Jul 17 13:01:38 2004 +0000
     5.3 @@ -133,8 +133,7 @@ void arch_getdomaininfo_ctxt(struct doma
     5.4          for ( i = 0; i < 16; i++ )
     5.5              c->gdt_frames[i] = 
     5.6                  l1_pgentry_to_pagenr(d->mm.perdomain_pt[i]);
     5.7 -        c->gdt_ents = 
     5.8 -            (GET_GDT_ENTRIES(d) + 1) >> 3;
     5.9 +        c->gdt_ents = GET_GDT_ENTRIES(d);
    5.10      }
    5.11      c->guestos_ss  = d->thread.guestos_ss;
    5.12      c->guestos_esp = d->thread.guestos_sp;
     6.1 --- a/xen/arch/x86/memory.c	Fri Jul 16 20:04:00 2004 +0000
     6.2 +++ b/xen/arch/x86/memory.c	Sat Jul 17 13:01:38 2004 +0000
     6.3 @@ -178,7 +178,7 @@ int alloc_segdesc_page(struct pfn_info *
     6.4      int i;
     6.5  
     6.6      for ( i = 0; i < 512; i++ )
     6.7 -        if ( unlikely(!check_descriptor(descs[i*2], descs[i*2+1])) )
     6.8 +        if ( unlikely(!check_descriptor(&descs[i*2])) )
     6.9              goto fail;
    6.10  
    6.11      unmap_domain_mem(descs);
     7.1 --- a/xen/arch/x86/traps.c	Fri Jul 16 20:04:00 2004 +0000
     7.2 +++ b/xen/arch/x86/traps.c	Sat Jul 17 13:01:38 2004 +0000
     7.3 @@ -51,16 +51,7 @@
     7.4  #include <asm/i387.h>
     7.5  #include <asm/pdb.h>
     7.6  
     7.7 -#define GTBF_TRAP        1
     7.8 -#define GTBF_TRAP_NOCODE 2
     7.9 -#define GTBF_TRAP_CR2    4
    7.10 -struct guest_trap_bounce {
    7.11 -    unsigned long  error_code;        /*   0 */
    7.12 -    unsigned long  cr2;               /*   4 */
    7.13 -    unsigned short flags;             /*   8 */
    7.14 -    unsigned short cs;                /*  10 */
    7.15 -    unsigned long  eip;               /*  12 */
    7.16 -} guest_trap_bounce[NR_CPUS] = { { 0 } };
    7.17 +struct guest_trap_bounce guest_trap_bounce[NR_CPUS] = { { 0 } };
    7.18  
    7.19  #if defined(__i386__)
    7.20  
    7.21 @@ -451,6 +442,11 @@ asmlinkage void do_general_protection(st
    7.22              goto finish_propagation;
    7.23          }
    7.24      }
    7.25 +
    7.26 +#if defined(__i386__)
    7.27 +    if ( (error_code == 0) && gpf_emulate_4gb(regs) )
    7.28 +        return;
    7.29 +#endif
    7.30      
    7.31      /* Pass on GPF as is. */
    7.32      ti = current->thread.traps + 13;
     8.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.2 +++ b/xen/arch/x86/x86_32/emulate.c	Sat Jul 17 13:01:38 2004 +0000
     8.3 @@ -0,0 +1,553 @@
     8.4 +/******************************************************************************
     8.5 + * arch/x86/x86_32/emulate.c
     8.6 + * 
     8.7 + * Emulation of certain classes of IA32 instruction. Used to emulate 4GB
     8.8 + * segments, for example.
     8.9 + * 
    8.10 + * Copyright (c) 2004, K A Fraser
    8.11 + * 
    8.12 + * This program is free software; you can redistribute it and/or modify
    8.13 + * it under the terms of the GNU General Public License as published by
    8.14 + * the Free Software Foundation; either version 2 of the License, or
    8.15 + * (at your option) any later version.
    8.16 + * 
    8.17 + * This program is distributed in the hope that it will be useful,
    8.18 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
    8.19 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    8.20 + * GNU General Public License for more details.
    8.21 + * 
    8.22 + * You should have received a copy of the GNU General Public License
    8.23 + * along with this program; if not, write to the Free Software
    8.24 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
    8.25 + */
    8.26 +
    8.27 +#include <xen/config.h>
    8.28 +#include <xen/init.h>
    8.29 +#include <xen/sched.h>
    8.30 +#include <xen/lib.h>
    8.31 +#include <xen/errno.h>
    8.32 +#include <xen/mm.h>
    8.33 +#include <xen/perfc.h>
    8.34 +#include <asm/processor.h>
    8.35 +
    8.36 +int get_baselimit(u16 seg, unsigned long *base, unsigned long *limit)
    8.37 +{
    8.38 +    struct domain *d = current;
    8.39 +    unsigned long *table, a, b;
    8.40 +    int            ldt = !!(seg & 4);
    8.41 +    int            idx = (seg >> 3) & 8191;
    8.42 +
    8.43 +    /* Get base and check limit. */
    8.44 +    if ( ldt )
    8.45 +    {
    8.46 +        table = (unsigned long *)LDT_VIRT_START;
    8.47 +        if ( idx >= d->mm.ldt_ents )
    8.48 +            goto fail;
    8.49 +    }
    8.50 +    else /* gdt */
    8.51 +    {
    8.52 +        table = (unsigned long *)GET_GDT_ADDRESS(d);
    8.53 +        if ( idx >= GET_GDT_ENTRIES(d) )
    8.54 +            goto fail;
    8.55 +    }
    8.56 +
    8.57 +    /* Grab the segment descriptor. */
    8.58 +    if ( __get_user(a, &table[2*idx+0]) ||
    8.59 +         __get_user(b, &table[2*idx+1]) )
    8.60 +        goto fail; /* Barking up the wrong tree. Decode needs a page fault.*/
    8.61 +
    8.62 +    /* We only parse 32-bit code and data segments. */
    8.63 +    if ( (b & (_SEGMENT_P|_SEGMENT_S|_SEGMENT_DB)) != 
    8.64 +         (_SEGMENT_P|_SEGMENT_S|_SEGMENT_DB) )
    8.65 +        goto fail;
    8.66 +
    8.67 +    /* Decode base and limit. */
    8.68 +    *base  = (b&(0xff<<24)) | ((b&0xff)<<16) | (a>>16);
    8.69 +    *limit = ((b & 0xf0000) | (a & 0x0ffff)) + 1;
    8.70 +    if ( (b & _SEGMENT_G) )
    8.71 +        *limit <<= 12;
    8.72 +
    8.73 +    /*
    8.74 +     * Anything that looks like a truncated segment we assume ought really
    8.75 +     * to be a 4GB segment. DANGER!
    8.76 +     */
    8.77 +    if ( (PAGE_OFFSET - (*base + *limit)) < PAGE_SIZE )
    8.78 +        *limit = 0;
    8.79 +
    8.80 +    return 1;
    8.81 +
    8.82 + fail:
    8.83 +    return 0;
    8.84 +}
    8.85 +
    8.86 +int linearise_address(u16 seg, unsigned long off, unsigned long *linear)
    8.87 +{
    8.88 +    unsigned long base, limit;
    8.89 +
    8.90 +    if ( !get_baselimit(seg, &base, &limit) )
    8.91 +        return 0;
    8.92 +
    8.93 +    if ( off > (limit-1) )
    8.94 +        return 0;
    8.95 +
    8.96 +    *linear = base + off;
    8.97 +
    8.98 +    return 1;
    8.99 +}
   8.100 +
   8.101 +void *decode_reg(struct pt_regs *regs, u8 b)
   8.102 +{
   8.103 +    switch ( b & 7 )
   8.104 +    {
   8.105 +    case 0: return &regs->eax;
   8.106 +    case 1: return &regs->ecx;
   8.107 +    case 2: return &regs->edx;
   8.108 +    case 3: return &regs->ebx;
   8.109 +    case 4: return &regs->esp;
   8.110 +    case 5: return &regs->ebp;
   8.111 +    case 6: return &regs->esi;
   8.112 +    case 7: return &regs->edi;
   8.113 +    }
   8.114 +
   8.115 +    return NULL;
   8.116 +}
   8.117 +
   8.118 +/*
   8.119 + * Decode an effective address:
   8.120 + *  @ppb (IN/OUT): IN == address of ModR/M byte; OUT == byte following EA.
   8.121 + *  @preg (OUT)  : address in pt_regs block of the EA register parameter.
   8.122 + *  @pmem (OUT)  : address of the EA memory parameter.
   8.123 + *  @pseg (IN)   : address in pt_regs block of the override segment.
   8.124 + *  @regs (IN)   : addrress of the the pt_regs block.
   8.125 + */
   8.126 +int decode_effective_address(u8 **ppb, void **preg, void **pmem,
   8.127 +                             unsigned int *pseg, struct pt_regs *regs)
   8.128 +{
   8.129 +    u8            modrm, mod, reg, rm, *pb = *ppb;
   8.130 +    void         *memreg, *regreg;
   8.131 +    unsigned long ea, limit, offset;
   8.132 +    u8            disp8;
   8.133 +    u32           disp32 = 0;
   8.134 +
   8.135 +    if ( get_user(modrm, pb) )
   8.136 +    {
   8.137 +        DPRINTK("Fault while extracting modrm byte\n");
   8.138 +        return 0;
   8.139 +    }
   8.140 +
   8.141 +    pb++;
   8.142 +
   8.143 +    mod = (modrm >> 6) & 3;
   8.144 +    reg = (modrm >> 3) & 7;
   8.145 +    rm  = (modrm >> 0) & 7;
   8.146 +
   8.147 +    if ( rm == 4 )
   8.148 +    {
   8.149 +        DPRINTK("FIXME: Add decoding for the SIB byte.\n");
   8.150 +        return 0;
   8.151 +    }
   8.152 +
   8.153 +    /* Decode Reg and R/M fields. */
   8.154 +    regreg = decode_reg(regs, reg);
   8.155 +    memreg = decode_reg(regs, rm);
   8.156 +
   8.157 +    /* Decode Mod field. */
   8.158 +    switch ( modrm >> 6 )
   8.159 +    {
   8.160 +    case 0:
   8.161 +        if ( pseg == NULL )
   8.162 +            pseg = &regs->xds;
   8.163 +        disp32 = 0;
   8.164 +        if ( rm == 5 ) /* disp32 rather than (EBP) */
   8.165 +        {
   8.166 +            memreg = NULL;
   8.167 +            if ( get_user(disp32, (u32 *)pb) )
   8.168 +            {
   8.169 +                DPRINTK("Fault while extracting <disp8>.\n");
   8.170 +                return 0;
   8.171 +            }
   8.172 +            pb += 4;
   8.173 +        }
   8.174 +        break;
   8.175 +
   8.176 +    case 1:
   8.177 +        if ( pseg == NULL ) /* NB. EBP defaults to SS */
   8.178 +            pseg = (rm == 5) ? &regs->xss : &regs->xds;
   8.179 +        if ( get_user(disp8, pb) )
   8.180 +        {
   8.181 +            DPRINTK("Fault while extracting <disp8>.\n");
   8.182 +            return 0;
   8.183 +        }
   8.184 +        pb++;
   8.185 +        disp32 = (disp8 & 0x80) ? (disp8 | ~0xff) : disp8;;
   8.186 +        break;
   8.187 +
   8.188 +    case 2:
   8.189 +        if ( pseg == NULL ) /* NB. EBP defaults to SS */
   8.190 +            pseg = (rm == 5) ? &regs->xss : &regs->xds;
   8.191 +        if ( get_user(disp32, (u32 *)pb) )
   8.192 +        {
   8.193 +            DPRINTK("Fault while extracting <disp8>.\n");
   8.194 +            return 0;
   8.195 +        }
   8.196 +        pb += 4;
   8.197 +        break;
   8.198 +
   8.199 +    case 3:
   8.200 +        DPRINTK("Not a memory operand!\n");
   8.201 +        return 0;
   8.202 +    }
   8.203 +
   8.204 +    if ( !get_baselimit((u16)(*pseg), &ea, &limit) )
   8.205 +        return 0;
   8.206 +    if ( limit != 0 )
   8.207 +    {
   8.208 +        DPRINTK("Bailing: not a 4GB data segment.\n");
   8.209 +        return 0;
   8.210 +    }
   8.211 +
   8.212 +    offset = disp32;
   8.213 +    if ( memreg != NULL )
   8.214 +        offset += *(u32 *)memreg;
   8.215 +    if ( (offset & 0xf0000000) != 0xf0000000 )
   8.216 +    {
   8.217 +        DPRINTK("Bailing: not a -ve offset into 4GB segment.\n");
   8.218 +        return 0;
   8.219 +    }
   8.220 +
   8.221 +    ea += offset;
   8.222 +    if ( ea > (PAGE_OFFSET - PAGE_SIZE) )
   8.223 +    {
   8.224 +        DPRINTK("!!!! DISALLOWING UNSAFE ACCESS !!!!\n");
   8.225 +        return 0;
   8.226 +    }
   8.227 +
   8.228 +    *ppb  = pb;
   8.229 +    *preg = regreg;
   8.230 +    *pmem = (void *)ea;
   8.231 +
   8.232 +    return 1;
   8.233 +}
   8.234 +
   8.235 +/*
   8.236 + * Called from the general-protection fault handler to attempt to decode
   8.237 + * and emulate an instruction that depends on 4GB segments. At this point
   8.238 + * we assume that the instruction itself is paged into memory (the CPU
   8.239 + * must have triggered this in order to decode the instruction itself).
   8.240 + */
   8.241 +int gpf_emulate_4gb(struct pt_regs *regs)
   8.242 +{
   8.243 +    struct domain *d = current;
   8.244 +    trap_info_t   *ti;
   8.245 +    u8            *eip, *nextbyte, b, mb, rb;
   8.246 +    u16            mw, rw;
   8.247 +    u32            ml, rl, eflags;
   8.248 +    unsigned int  *pseg = NULL;
   8.249 +    int            i;
   8.250 +    int            opsz_override = 0;
   8.251 +    void          *reg, *mem;
   8.252 +    struct guest_trap_bounce *gtb;
   8.253 +
   8.254 +    if ( !linearise_address((u16)regs->xcs, regs->eip, (unsigned long *)&eip) )
   8.255 +    {
   8.256 +        DPRINTK("Cannot linearise %04x:%08lx\n", regs->xcs, regs->eip);
   8.257 +        return 0;
   8.258 +    }
   8.259 +
   8.260 +    /* Parse prefix bytes. We're basically looking for segment override. */
   8.261 +    for ( i = 0; i < 4; i++ )
   8.262 +    {
   8.263 +        if ( get_user(b, &eip[i]) )
   8.264 +        {
   8.265 +            DPRINTK("Fault while accessing byte %d of instruction\n", i);
   8.266 +            return 0;
   8.267 +        }
   8.268 +        
   8.269 +        switch ( b )
   8.270 +        {
   8.271 +        case 0xf0: /* LOCK */
   8.272 +        case 0xf2: /* REPNE/REPNZ */
   8.273 +        case 0xf3: /* REP/REPE/REPZ */
   8.274 +        case 0x67: /* Address-size override */
   8.275 +            DPRINTK("Unhandleable prefix byte %02x\n", b);
   8.276 +            goto undecodeable;
   8.277 +        case 0x66: /* Operand-size override */
   8.278 +            opsz_override = 1;
   8.279 +            break;
   8.280 +        case 0x2e: /* CS override */
   8.281 +            pseg = &regs->xcs;
   8.282 +            break;
   8.283 +        case 0x3e: /* DS override */
   8.284 +            pseg = &regs->xds;
   8.285 +            break;
   8.286 +        case 0x26: /* ES override */
   8.287 +            pseg = &regs->xes;
   8.288 +            break;
   8.289 +        case 0x64: /* FS override */
   8.290 +            pseg = &regs->xfs;
   8.291 +            break;
   8.292 +        case 0x65: /* GS override */
   8.293 +            pseg = &regs->xgs;
   8.294 +            break;
   8.295 +        case 0x36: /* SS override */
   8.296 +            pseg = &regs->xss;
   8.297 +            break;
   8.298 +        default: /* Not a prefix byte */
   8.299 +            goto done_prefix;
   8.300 +        }
   8.301 +    }
   8.302 + done_prefix:
   8.303 +
   8.304 +    nextbyte = &eip[i+1];
   8.305 +    if ( !decode_effective_address(&nextbyte, &reg, &mem, pseg, regs) )
   8.306 +        goto undecodeable;
   8.307 +
   8.308 +    /* Only handle single-byte opcodes right now. Sufficient for MOV. */
   8.309 +    /*
   8.310 +     * XXX Now I see how this decode routine is panning out, it needs
   8.311 +     * refactoring. Lots of duplicated cruft in here...
   8.312 +     */
   8.313 +    switch ( b )
   8.314 +    {
   8.315 +    case 0x88: /* movb r,r/m */
   8.316 +        if ( __put_user(*(u8 *)reg, (u8 *)mem) )
   8.317 +            goto page_fault_w;
   8.318 +        regs->eip += nextbyte - eip;
   8.319 +        break;
   8.320 +    case 0x89: /* movl r,r/m */
   8.321 +        if ( opsz_override )
   8.322 +        {
   8.323 +            if ( __put_user(*(u16 *)reg, (u16 *)mem) )
   8.324 +                goto page_fault_w;
   8.325 +        }
   8.326 +        else
   8.327 +        {
   8.328 +            if ( __put_user(*(u32 *)reg, (u32 *)mem) )
   8.329 +                goto page_fault_w;
   8.330 +        }
   8.331 +        regs->eip += nextbyte - eip;
   8.332 +        break;
   8.333 +    case 0x8a: /* movb r/m,r */
   8.334 +        if ( __get_user(*(u8 *)reg, (u8 *)mem) )
   8.335 +            goto page_fault_r;
   8.336 +        regs->eip += nextbyte - eip;
   8.337 +        break;
   8.338 +    case 0x8b: /* movl r/m,r */
   8.339 +        if ( opsz_override )
   8.340 +        {
   8.341 +            if ( __get_user(*(u16 *)reg, (u16 *)mem) )
   8.342 +                goto page_fault_r;
   8.343 +        }
   8.344 +        else
   8.345 +        {
   8.346 +            if ( __get_user(*(u32 *)reg, (u32 *)mem) )
   8.347 +                goto page_fault_r;
   8.348 +        }
   8.349 +        regs->eip += nextbyte - eip;
   8.350 +        break;
   8.351 +    case 0xc6: /* movb imm,r/m */
   8.352 +        if ( reg != &regs->eax ) /* Reg == /0 */
   8.353 +            goto undecodeable;
   8.354 +        if ( get_user(rb, nextbyte) )
   8.355 +        {
   8.356 +            DPRINTK("Fault while extracting immediate byte\n");
   8.357 +            return 0;
   8.358 +        }
   8.359 +        if ( __put_user(rb, (u8 *)mem) )
   8.360 +            goto page_fault_w;
   8.361 +        regs->eip += nextbyte - eip + 1;
   8.362 +        break;
   8.363 +    case 0xc7: /* movl imm,r/m */
   8.364 +        if ( reg != &regs->eax ) /* Reg == /0 */
   8.365 +            goto undecodeable;
   8.366 +        if ( opsz_override )
   8.367 +        {
   8.368 +            if ( get_user(rw, (u16 *)nextbyte) )
   8.369 +            {
   8.370 +                DPRINTK("Fault while extracting immediate word\n");
   8.371 +                return 0;
   8.372 +            }
   8.373 +            if ( __put_user(rw, (u16 *)mem) )
   8.374 +                goto page_fault_w;
   8.375 +            regs->eip += nextbyte - eip + 2;
   8.376 +        }
   8.377 +        else
   8.378 +        {
   8.379 +            if ( get_user(rl, (u32 *)nextbyte) )
   8.380 +            {
   8.381 +                DPRINTK("Fault while extracting immediate longword\n");
   8.382 +                return 0;
   8.383 +            }
   8.384 +            if ( __put_user(rl, (u32 *)mem) )
   8.385 +                goto page_fault_w;
   8.386 +            regs->eip += nextbyte - eip + 4;
   8.387 +        }
   8.388 +        break;
   8.389 +    case 0x80: /* cmpb imm8,r/m */
   8.390 +        if ( reg != &regs->edi ) /* Reg == /7 */
   8.391 +            goto undecodeable;
   8.392 +        if ( get_user(rb, nextbyte) )
   8.393 +        {
   8.394 +            DPRINTK("Fault while extracting immediate byte\n");
   8.395 +            return 0;
   8.396 +        }
   8.397 +        if ( __get_user(mb, (u8 *)mem) )
   8.398 +            goto page_fault_r;
   8.399 +        __asm__ __volatile__ (
   8.400 +            "cmpb %b1,%b2 ; pushf ; popl %0"
   8.401 +            : "=a" (eflags)
   8.402 +            : "0" (rb), "b" (mb) );
   8.403 +        regs->eflags &= ~0x8d5;     /* OF,SF,ZF,AF,PF,CF */
   8.404 +        regs->eflags |= eflags & 0x8d5;
   8.405 +        regs->eip += nextbyte - eip + 1;
   8.406 +        break;
   8.407 +    case 0x81: /* cmpl imm32,r/m */
   8.408 +        if ( reg != &regs->edi ) /* Reg == /7 */
   8.409 +            goto undecodeable;
   8.410 +        if ( opsz_override )
   8.411 +        {
   8.412 +            if ( get_user(rw, (u16 *)nextbyte) )
   8.413 +            {
   8.414 +                DPRINTK("Fault while extracting immediate word\n");
   8.415 +                return 0;
   8.416 +            }
   8.417 +            if ( __get_user(mw, (u16 *)mem) )
   8.418 +                goto page_fault_r;
   8.419 +            __asm__ __volatile__ (
   8.420 +                "cmpw %w1,%w2 ; pushf ; popl %0"
   8.421 +                : "=a" (eflags)
   8.422 +                : "0" (rw), "b" (mw) );
   8.423 +            regs->eip += nextbyte - eip + 2;
   8.424 +        }
   8.425 +        else
   8.426 +        {
   8.427 +            if ( get_user(rl, (u32 *)nextbyte) )
   8.428 +            {
   8.429 +                DPRINTK("Fault while extracting immediate longword\n");
   8.430 +                return 0;
   8.431 +            }
   8.432 +            if ( __get_user(ml, (u32 *)mem) )
   8.433 +                goto page_fault_r;
   8.434 +            __asm__ __volatile__ (
   8.435 +                "cmpl %1,%2 ; pushf ; popl %0"
   8.436 +                : "=a" (eflags)
   8.437 +                : "0" (rl), "b" (ml) );
   8.438 +            regs->eip += nextbyte - eip + 4;
   8.439 +        }
   8.440 +        regs->eflags &= ~0x8d5;     /* OF,SF,ZF,AF,PF,CF */
   8.441 +        regs->eflags |= eflags & 0x8d5;
   8.442 +        break;
   8.443 +    case 0x83: /* cmpl imm8,r/m */
   8.444 +        if ( reg != &regs->edi ) /* Reg == /7 */
   8.445 +            goto undecodeable;
   8.446 +        if ( get_user(rb, nextbyte) )
   8.447 +        {
   8.448 +            DPRINTK("Fault while extracting immediate byte\n");
   8.449 +            return 0;
   8.450 +        }
   8.451 +        if ( opsz_override )
   8.452 +        {
   8.453 +            rw = (rb & 0x80) ? (rb | ~0xff) : rb;
   8.454 +            if ( __get_user(mw, (u16 *)mem) )
   8.455 +                goto page_fault_r;
   8.456 +            __asm__ __volatile__ (
   8.457 +                "cmpw %w1,%w2 ; pushf ; popl %0"
   8.458 +                : "=a" (eflags)
   8.459 +                : "0" (rw), "b" (mw) );
   8.460 +            regs->eip += nextbyte - eip + 2;
   8.461 +        }
   8.462 +        else
   8.463 +        {
   8.464 +            rl = (rb & 0x80) ? (rb | ~0xff) : rb;
   8.465 +            if ( __get_user(ml, (u32 *)mem) )
   8.466 +                goto page_fault_r;
   8.467 +            __asm__ __volatile__ (
   8.468 +                "cmpl %1,%2 ; pushf ; popl %0"
   8.469 +                : "=a" (eflags)
   8.470 +                : "0" (rl), "b" (ml) );
   8.471 +        }
   8.472 +        regs->eflags &= ~0x8d5;     /* OF,SF,ZF,AF,PF,CF */
   8.473 +        regs->eflags |= eflags & 0x8d5;
   8.474 +        regs->eip += nextbyte - eip + 1;
   8.475 +        break;
   8.476 +    case 0x38: /* cmpb r,r/m */
   8.477 +    case 0x3a: /* cmpb r/m,r */
   8.478 +        rb = *(u8 *)reg;
   8.479 +        if ( __get_user(mb, (u8 *)mem) )
   8.480 +            goto page_fault_r;
   8.481 +        __asm__ __volatile__ (
   8.482 +            "cmpb %b1,%b2 ; pushf ; popl %0"
   8.483 +            : "=a" (eflags)
   8.484 +            : "0" ((b==0x38)?rb:mb), "b" ((b==0x38)?mb:rb) );
   8.485 +        regs->eflags &= ~0x8d5;     /* OF,SF,ZF,AF,PF,CF */
   8.486 +        regs->eflags |= eflags & 0x8d5;
   8.487 +        regs->eip += nextbyte - eip;
   8.488 +        break;
   8.489 +    case 0x39: /* cmpl r,r/m */
   8.490 +    case 0x3b: /* cmpl r/m,r */
   8.491 +        if ( opsz_override )
   8.492 +        {
   8.493 +            rw = *(u16 *)reg;
   8.494 +            if ( __get_user(mw, (u16 *)mem) )
   8.495 +                goto page_fault_r;
   8.496 +            __asm__ __volatile__ (
   8.497 +                "cmpw %w1,%w2 ; pushf ; popl %0"
   8.498 +                : "=a" (eflags)
   8.499 +                : "0" ((b==0x38)?rw:mw), "b" ((b==0x38)?mw:rw) );
   8.500 +        }
   8.501 +        else
   8.502 +        {
   8.503 +            rl = *(u32 *)reg;
   8.504 +            if ( __get_user(ml, (u32 *)mem) )
   8.505 +                goto page_fault_r;
   8.506 +            __asm__ __volatile__ (
   8.507 +                "cmpl %1,%2 ; pushf ; popl %0"
   8.508 +                : "=a" (eflags)
   8.509 +                : "0" ((b==0x38)?rl:ml), "b" ((b==0x38)?ml:rl) );
   8.510 +        }
   8.511 +        regs->eflags &= ~0x8d5;     /* OF,SF,ZF,AF,PF,CF */
   8.512 +        regs->eflags |= eflags & 0x8d5;
   8.513 +        regs->eip += nextbyte - eip;
   8.514 +        break;
   8.515 +    default:
   8.516 +        DPRINTK("Unhandleable opcode byte %02x\n", b);
   8.517 +        goto undecodeable;
   8.518 +    }
   8.519 +
   8.520 +    perfc_incrc(emulations);
   8.521 +
   8.522 +    /* Success! */
   8.523 +    return 1;
   8.524 +
   8.525 + undecodeable:
   8.526 +    printk("Undecodable instruction %02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x "
   8.527 +           "caused GPF(0) at %04x:%08lx\n",
   8.528 +           eip[0], eip[1], eip[2], eip[3],
   8.529 +           eip[4], eip[5], eip[6], eip[7],
   8.530 +           regs->xcs, regs->eip);
   8.531 +    return 0;
   8.532 +
   8.533 + page_fault_w:
   8.534 +    ti  = &d->thread.traps[14];
   8.535 +    gtb = &guest_trap_bounce[d->processor];
   8.536 +    /*
   8.537 +     * XXX We don't distinguish between page-not-present and read-only.
   8.538 +     * Linux doesn't care, but this might need fixing if others do.
   8.539 +     */
   8.540 +    gtb->error_code = 6; /* user fault, write access, page not present */
   8.541 +    goto page_fault_common;
   8.542 + page_fault_r:
   8.543 +    ti  = &d->thread.traps[14];
   8.544 +    gtb = &guest_trap_bounce[d->processor];
   8.545 +    gtb->error_code = 4; /* user fault, read access, page not present */
   8.546 + page_fault_common:
   8.547 +    gtb->flags      = GTBF_TRAP_CR2;
   8.548 +    gtb->cr2        = (unsigned long)mem;
   8.549 +    gtb->cs         = ti->cs;
   8.550 +    gtb->eip        = ti->address;
   8.551 +    if ( TI_GET_IF(ti) )
   8.552 +        d->shared_info->vcpu_data[0].evtchn_upcall_mask = 1;
   8.553 +    return 1;
   8.554 +}
   8.555 +
   8.556 +
     9.1 --- a/xen/arch/x86/x86_32/mm.c	Fri Jul 16 20:04:00 2004 +0000
     9.2 +++ b/xen/arch/x86/x86_32/mm.c	Sat Jul 17 13:01:38 2004 +0000
     9.3 @@ -1,7 +1,7 @@
     9.4  /******************************************************************************
     9.5 - * arch/i386/mm.c
     9.6 + * arch/x86/x86_32/mm.c
     9.7   * 
     9.8 - * Modifications to Linux original are copyright (c) 2002-2003, K A Fraser
     9.9 + * Modifications to Linux original are copyright (c) 2004, K A Fraser
    9.10   * 
    9.11   * This program is free software; you can redistribute it and/or modify
    9.12   * it under the terms of the GNU General Public License as published by
    9.13 @@ -164,9 +164,9 @@ long do_stack_switch(unsigned long ss, u
    9.14  
    9.15  
    9.16  /* Returns TRUE if given descriptor is valid for GDT or LDT. */
    9.17 -int check_descriptor(unsigned long a, unsigned long b)
    9.18 +int check_descriptor(unsigned long *d)
    9.19  {
    9.20 -    unsigned long base, limit;
    9.21 +    unsigned long base, limit, a = d[0], b = d[1];
    9.22  
    9.23      /* A not-present descriptor will always fault, so is safe. */
    9.24      if ( !(b & _SEGMENT_P) ) 
    9.25 @@ -211,15 +211,27 @@ int check_descriptor(unsigned long a, un
    9.26          goto good;
    9.27      }
    9.28      
    9.29 -    /* Check that base/limit do not overlap Xen-private space. */
    9.30 +    /* Check that base is at least a page away from Xen-private area. */
    9.31      base  = (b&(0xff<<24)) | ((b&0xff)<<16) | (a>>16);
    9.32 +    if ( base >= (PAGE_OFFSET - PAGE_SIZE) )
    9.33 +        goto bad;
    9.34 +
    9.35 +    /* Check and truncate the limit if necessary. */
    9.36      limit = (b&0xf0000) | (a&0xffff);
    9.37      limit++; /* We add one because limit is inclusive. */
    9.38      if ( (b & _SEGMENT_G) )
    9.39          limit <<= 12;
    9.40      if ( ((base + limit) <= base) || 
    9.41           ((base + limit) > PAGE_OFFSET) )
    9.42 -        goto bad;
    9.43 +    {
    9.44 +        /* Need to truncate. Calculate and poke a best-effort limit. */
    9.45 +        limit = PAGE_OFFSET - base;
    9.46 +        if ( (b & _SEGMENT_G) )
    9.47 +            limit >>= 12;
    9.48 +        limit--;
    9.49 +        d[0] &= ~0x0ffff; d[0] |= limit & 0x0ffff;
    9.50 +        d[1] &= ~0xf0000; d[1] |= limit & 0xf0000;
    9.51 +    }
    9.52  
    9.53   good:
    9.54      return 1;
    9.55 @@ -275,7 +287,7 @@ long set_gdt(struct domain *d,
    9.56              mk_l1_pgentry((frames[i] << PAGE_SHIFT) | __PAGE_HYPERVISOR);
    9.57  
    9.58      SET_GDT_ADDRESS(d, GDT_VIRT_START);
    9.59 -    SET_GDT_ENTRIES(d, (entries*8)-1);
    9.60 +    SET_GDT_ENTRIES(d, entries);
    9.61  
    9.62      return 0;
    9.63  
    9.64 @@ -311,11 +323,14 @@ long do_set_gdt(unsigned long *frame_lis
    9.65  long do_update_descriptor(
    9.66      unsigned long pa, unsigned long word1, unsigned long word2)
    9.67  {
    9.68 -    unsigned long *gdt_pent, pfn = pa >> PAGE_SHIFT;
    9.69 +    unsigned long *gdt_pent, pfn = pa >> PAGE_SHIFT, d[2];
    9.70      struct pfn_info *page;
    9.71      long ret = -EINVAL;
    9.72  
    9.73 -    if ( (pa & 7) || (pfn >= max_page) || !check_descriptor(word1, word2) )
    9.74 +    d[0] = word1;
    9.75 +    d[1] = word2;
    9.76 +
    9.77 +    if ( (pa & 7) || (pfn >= max_page) || !check_descriptor(d) )
    9.78          return -EINVAL;
    9.79  
    9.80      page = &frame_table[pfn];
    9.81 @@ -346,8 +361,7 @@ long do_update_descriptor(
    9.82  
    9.83      /* All is good so make the update. */
    9.84      gdt_pent = map_domain_mem(pa);
    9.85 -    gdt_pent[0] = word1;
    9.86 -    gdt_pent[1] = word2;
    9.87 +    memcpy(gdt_pent, d, 8);
    9.88      unmap_domain_mem(gdt_pent);
    9.89  
    9.90      put_page_type(page);
    10.1 --- a/xen/include/asm-x86/desc.h	Fri Jul 16 20:04:00 2004 +0000
    10.2 +++ b/xen/include/asm-x86/desc.h	Sat Jul 17 13:01:38 2004 +0000
    10.3 @@ -32,6 +32,7 @@
    10.4  #define _SEGMENT_S       ( 1<<12) /* System descriptor (yes iff S==0) */
    10.5  #define _SEGMENT_DPL     ( 3<<13) /* Descriptor Privilege Level */
    10.6  #define _SEGMENT_P       ( 1<<15) /* Segment Present */
    10.7 +#define _SEGMENT_DB      ( 1<<22) /* 16- or 32-bit segment */
    10.8  #define _SEGMENT_G       ( 1<<23) /* Granularity */
    10.9  
   10.10  #ifndef __ASSEMBLY__
    11.1 --- a/xen/include/asm-x86/mm.h	Fri Jul 16 20:04:00 2004 +0000
    11.2 +++ b/xen/include/asm-x86/mm.h	Sat Jul 17 13:01:38 2004 +0000
    11.3 @@ -274,7 +274,7 @@ static inline int get_page_and_type(stru
    11.4      ASSERT(((_p)->count_and_flags & PGC_count_mask) != 0);  \
    11.5      ASSERT((_p)->u.domain == (_d))
    11.6  
    11.7 -int check_descriptor(unsigned long a, unsigned long b);
    11.8 +int check_descriptor(unsigned long *d);
    11.9  
   11.10  /*
   11.11   * Use currently-executing domain's pagetables on the specified CPUs.
   11.12 @@ -298,7 +298,7 @@ extern unsigned long *machine_to_phys_ma
   11.13  /* Part of the domain API. */
   11.14  int do_mmu_update(mmu_update_t *updates, int count, int *success_count);
   11.15  
   11.16 -#define DEFAULT_GDT_ENTRIES     ((LAST_RESERVED_GDT_ENTRY*8)+7)
   11.17 +#define DEFAULT_GDT_ENTRIES     (LAST_RESERVED_GDT_ENTRY+1)
   11.18  #define DEFAULT_GDT_ADDRESS     ((unsigned long)gdt_table)
   11.19  
   11.20  #ifdef MEMORY_GUARD
    12.1 --- a/xen/include/asm-x86/processor.h	Fri Jul 16 20:04:00 2004 +0000
    12.2 +++ b/xen/include/asm-x86/processor.h	Sat Jul 17 13:01:38 2004 +0000
    12.3 @@ -345,6 +345,20 @@ long set_fast_trap(struct domain *p, int
    12.4  
    12.5  #endif /* __x86_64__ */
    12.6  
    12.7 +#define GTBF_TRAP        1
    12.8 +#define GTBF_TRAP_NOCODE 2
    12.9 +#define GTBF_TRAP_CR2    4
   12.10 +struct guest_trap_bounce {
   12.11 +    unsigned long  error_code;        /*   0 */
   12.12 +    unsigned long  cr2;               /*   4 */
   12.13 +    unsigned short flags;             /*   8 */
   12.14 +    unsigned short cs;                /*  10 */
   12.15 +    unsigned long  eip;               /*  12 */
   12.16 +};
   12.17 +extern struct guest_trap_bounce guest_trap_bounce[];
   12.18 +
   12.19 +extern int gpf_emulate_4gb(struct pt_regs *regs);
   12.20 +
   12.21  struct mm_struct {
   12.22      /*
   12.23       * Every domain has a L1 pagetable of its own. Per-domain mappings
   12.24 @@ -401,10 +415,10 @@ static inline void write_ptbase(struct m
   12.25  }
   12.26  
   12.27  /* Convenient accessor for mm.gdt. */
   12.28 -#define SET_GDT_ENTRIES(_p, _e) ((*(u16 *)((_p)->mm.gdt + 0)) = (_e))
   12.29 +#define SET_GDT_ENTRIES(_p, _e) ((*(u16 *)((_p)->mm.gdt + 0)) = (((_e)<<3)-1))
   12.30  #define SET_GDT_ADDRESS(_p, _a) ((*(unsigned long *)((_p)->mm.gdt + 2)) = (_a))
   12.31 -#define GET_GDT_ENTRIES(_p)     ((*(u16 *)((_p)->mm.gdt + 0)))
   12.32 -#define GET_GDT_ADDRESS(_p)     ((*(unsigned long *)((_p)->mm.gdt + 2)))
   12.33 +#define GET_GDT_ENTRIES(_p)     (((*(u16 *)((_p)->mm.gdt + 0))+1)>>3)
   12.34 +#define GET_GDT_ADDRESS(_p)     (*(unsigned long *)((_p)->mm.gdt + 2))
   12.35  
   12.36  void destroy_gdt(struct domain *d);
   12.37  long set_gdt(struct domain *d, 
    13.1 --- a/xen/include/xen/perfc_defn.h	Fri Jul 16 20:04:00 2004 +0000
    13.2 +++ b/xen/include/xen/perfc_defn.h	Sat Jul 17 13:01:38 2004 +0000
    13.3 @@ -1,3 +1,5 @@
    13.4 +
    13.5 +PERFCOUNTER_CPU (emulations,   "instructions emulated" )
    13.6  
    13.7  PERFCOUNTER_CPU( irqs,         "#interrupts" )
    13.8  PERFCOUNTER_CPU( ipis,         "#IPIs" )