direct-io.hg

changeset 2182:4f4a42993157

bitkeeper revision 1.1159.17.7 (41193c6d91e3zHZIZDPZuDFK592hhw)

Merge scramble.cl.cam.ac.uk:/auto/groups/xeno/BK/xeno.bk
into scramble.cl.cam.ac.uk:/local/scratch/kaf24/xeno
author kaf24@scramble.cl.cam.ac.uk
date Tue Aug 10 21:21:49 2004 +0000 (2004-08-10)
parents dc3ac4b676dd 3d198cf1331f
children 8651fa465906
files linux-2.6.7-xen-sparse/arch/xen/i386/mm/fault.c linux-2.6.7-xen-sparse/arch/xen/i386/mm/pgtable.c linux-2.6.7-xen-sparse/arch/xen/kernel/fixup.c linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/fixmap.h linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/pgtable.h
line diff
     1.1 --- a/linux-2.6.7-xen-sparse/arch/xen/i386/mm/fault.c	Tue Aug 10 21:12:00 2004 +0000
     1.2 +++ b/linux-2.6.7-xen-sparse/arch/xen/i386/mm/fault.c	Tue Aug 10 21:21:49 2004 +0000
     1.3 @@ -248,7 +248,8 @@ asmlinkage void do_page_fault(struct pt_
     1.4  	 * (error_code & 4) == 0, and that the fault was not a
     1.5  	 * protection error (error_code & 1) == 0.
     1.6  	 */
     1.7 -	if (unlikely(address >= TASK_SIZE)) { 
     1.8 +	if (unlikely(address >= TASK_SIZE) ||
     1.9 +	    unlikely(address < (FIRST_USER_PGD_NR<<PGDIR_SHIFT))) { 
    1.10  		if (!(error_code & 5))
    1.11  			goto vmalloc_fault;
    1.12  		/* 
     2.1 --- a/linux-2.6.7-xen-sparse/arch/xen/i386/mm/pgtable.c	Tue Aug 10 21:12:00 2004 +0000
     2.2 +++ b/linux-2.6.7-xen-sparse/arch/xen/i386/mm/pgtable.c	Tue Aug 10 21:21:49 2004 +0000
     2.3 @@ -258,16 +258,21 @@ void pgd_ctor(void *pgd, kmem_cache_t *c
     2.4  	if (PTRS_PER_PMD == 1)
     2.5  		spin_lock_irqsave(&pgd_lock, flags);
     2.6  
     2.7 -	memcpy((pgd_t *)pgd + USER_PTRS_PER_PGD,
     2.8 -			swapper_pg_dir + USER_PTRS_PER_PGD,
     2.9 -			(PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
    2.10 +	memcpy((pgd_t *)pgd,
    2.11 +			swapper_pg_dir,
    2.12 +			FIRST_USER_PGD_NR * sizeof(pgd_t));
    2.13 +	memcpy((pgd_t *)pgd + FIRST_USER_PGD_NR + USER_PTRS_PER_PGD,
    2.14 +			swapper_pg_dir + FIRST_USER_PGD_NR + USER_PTRS_PER_PGD,
    2.15 +			(PTRS_PER_PGD - USER_PTRS_PER_PGD -
    2.16 +			 FIRST_USER_PGD_NR) * sizeof(pgd_t));
    2.17  
    2.18  	if (PTRS_PER_PMD > 1)
    2.19  		goto out;
    2.20  
    2.21  	pgd_list_add(pgd);
    2.22  	spin_unlock_irqrestore(&pgd_lock, flags);
    2.23 -	memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
    2.24 +	memset((pgd_t *)pgd + FIRST_USER_PGD_NR,
    2.25 +			0, USER_PTRS_PER_PGD*sizeof(pgd_t));
    2.26   out:
    2.27  	__make_page_readonly(pgd);
    2.28  	queue_pgd_pin(__pa(pgd));
     3.1 --- a/linux-2.6.7-xen-sparse/arch/xen/kernel/fixup.c	Tue Aug 10 21:12:00 2004 +0000
     3.2 +++ b/linux-2.6.7-xen-sparse/arch/xen/kernel/fixup.c	Tue Aug 10 21:21:49 2004 +0000
     3.3 @@ -27,11 +27,12 @@
     3.4  #include <linux/sched.h>
     3.5  #include <linux/kernel.h>
     3.6  #include <linux/highmem.h>
     3.7 +#include <linux/vmalloc.h>
     3.8  #include <asm/fixmap.h>
     3.9  #include <asm/pgtable.h>
    3.10  #include <asm/uaccess.h>
    3.11  
    3.12 -#if 0
    3.13 +#if 1
    3.14  #define ASSERT(_p) \
    3.15      if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \
    3.16      __LINE__, __FILE__); *(int*)0=0; }
    3.17 @@ -43,35 +44,57 @@
    3.18  #define DPRINTK(_f, _a...) ((void)0)
    3.19  #endif
    3.20  
    3.21 +static char            *fixup_buf;
    3.22 +#define FIXUP_BUF_USER  PAGE_SIZE
    3.23 +#define FIXUP_BUF_ORDER 1
    3.24 +#define FIXUP_BUF_SIZE  (PAGE_SIZE<<FIXUP_BUF_ORDER)
    3.25 +#define PATCH_LEN       5
    3.26 +
    3.27  struct fixup_entry {
    3.28 -    unsigned long  patch_addr;
    3.29 +    unsigned long  patch_addr; /* XXX */
    3.30      unsigned char  patched_code[20];
    3.31      unsigned short patched_code_len;
    3.32      unsigned short fixup_idx;
    3.33 +    unsigned short return_idx;
    3.34      struct fixup_entry *next;
    3.35  };
    3.36  
    3.37  #define FIXUP_HASHSZ 128
    3.38  static struct fixup_entry *fixup_hash[FIXUP_HASHSZ];
    3.39 -#define FIXUP_HASH(_a) ((unsigned int)(_a) & (FIXUP_HASHSZ-1))
    3.40 +static inline int FIXUP_HASH(char *b)
    3.41 +{
    3.42 +    int i, j = 0;
    3.43 +    for ( i = 0; i < PATCH_LEN; i++ )
    3.44 +        j ^= b[i];
    3.45 +    return j & (FIXUP_HASHSZ-1);
    3.46 +}
    3.47  
    3.48 +/* General instruction properties. */
    3.49  #define INSN_SUFFIX_BYTES (7)
    3.50  #define PREFIX_BYTE       (1<<3)
    3.51  #define OPCODE_BYTE       (1<<4)  
    3.52  #define HAS_MODRM         (1<<5)
    3.53  
    3.54 -#define X  0 /* invalid */
    3.55 +/* Helpful codes for the main decode routine. */
    3.56 +#define CODE_MASK         (3<<6)
    3.57 +#define PUSH              (0<<6) /* PUSH onto stack */
    3.58 +#define POP               (1<<6) /* POP from stack */
    3.59 +#define JMP               (2<<6) /* 8-bit relative JMP */
    3.60 +
    3.61 +/* Short forms for the table. */
    3.62 +#define X  0 /* invalid for some random reason */
    3.63 +#define S  0 /* invalid because it munges the stack */
    3.64  #define P  PREFIX_BYTE
    3.65  #define O  OPCODE_BYTE
    3.66  #define M  HAS_MODRM
    3.67  
    3.68  static unsigned char insn_decode[256] = {
    3.69      /* 0x00 - 0x0F */
    3.70 -    O|M, O|M, O|M, O|M, O|1, O|4, O, O,
    3.71 -    O|M, O|M, O|M, O|M, O|1, O|4, O, X,
    3.72 +    O|M, O|M, O|M, O|M, O|1, O|4, S, S,
    3.73 +    O|M, O|M, O|M, O|M, O|1, O|4, S, X,
    3.74      /* 0x10 - 0x1F */
    3.75 -    O|M, O|M, O|M, O|M, O|1, O|4, O, O,
    3.76 -    O|M, O|M, O|M, O|M, O|1, O|4, O, O,
    3.77 +    O|M, O|M, O|M, O|M, O|1, O|4, S, S,
    3.78 +    O|M, O|M, O|M, O|M, O|1, O|4, S, S,
    3.79      /* 0x20 - 0x2F */
    3.80      O|M, O|M, O|M, O|M, O|1, O|4, P, O,
    3.81      O|M, O|M, O|M, O|M, O|1, O|4, P, O,
    3.82 @@ -79,20 +102,20 @@ static unsigned char insn_decode[256] = 
    3.83      O|M, O|M, O|M, O|M, O|1, O|4, P, O,
    3.84      O|M, O|M, O|M, O|M, O|1, O|4, P, O,
    3.85      /* 0x40 - 0x4F */
    3.86 -    O, O, O, O, O, O, O, O,
    3.87 -    O, O, O, O, O, O, O, O,
    3.88 +    O, O, O, O, S, O, O, O,
    3.89 +    O, O, O, O, S, O, O, O,
    3.90      /* 0x50 - 0x5F */
    3.91 -    O, O, O, O, O, O, O, O,
    3.92 -    O, O, O, O, O, O, O, O,
    3.93 +    O|PUSH, O|PUSH, O|PUSH, O|PUSH, S, O|PUSH, O|PUSH, O|PUSH,
    3.94 +    O|POP, O|POP, O|POP, O|POP, S, O|POP, O|POP, O|POP,
    3.95      /* 0x60 - 0x6F */
    3.96 -    O, O, O|M, O|M, P, P, X, X,
    3.97 -    O|4, O|M|4, O|1, O|M|1, O, O, O, O,
    3.98 +    S, S, O|M, O|M, P, P, X, X,
    3.99 +    O|4|PUSH, O|M|4, O|1|PUSH, O|M|1, O, O, O, O,
   3.100      /* 0x70 - 0x7F */
   3.101 -    O|1, O|1, O|1, O|1, O|1, O|1, O|1, O|1,
   3.102 -    O|1, O|1, O|1, O|1, O|1, O|1, O|1, O|1,
   3.103 +    O|1|JMP, O|1|JMP, O|1|JMP, O|1|JMP, O|1|JMP, O|1|JMP, O|1|JMP, O|1|JMP,
   3.104 +    O|1|JMP, O|1|JMP, O|1|JMP, O|1|JMP, O|1|JMP, O|1|JMP, O|1|JMP, O|1|JMP,
   3.105      /* 0x80 - 0x8F */
   3.106      O|M|1, O|M|4, O|M|1, O|M|1, O|M, O|M, O|M, O|M,
   3.107 -    O|M, O|M, O|M, O|M, O|M, O|M, O|M, O|M, 
   3.108 +    O|M, O|M, O|M, O|M, O|M, O|M, O|M, O|M|POP, 
   3.109      /* 0x90 - 0x9F */
   3.110      O, O, O, O, O, O, O, O,
   3.111      O, O, X, O, O, O, O, O,
   3.112 @@ -110,13 +133,15 @@ static unsigned char insn_decode[256] = 
   3.113      X, X, X, X, X, X, X, X,
   3.114      /* 0xE0 - 0xEF */
   3.115      X, X, X, X, X, X, X, X,
   3.116 -    X, O|4, X, O|1, X, X, X, X,
   3.117 +    X, O|4, X, O|1|JMP, X, X, X, X,
   3.118      /* 0xF0 - 0xFF */
   3.119      P, X, P, P, O, O, O|M|1, O|M|4, 
   3.120 -    O, O, O, O, O, O, O|M, O|M
   3.121 +    O, O, O, O, O, O, O|M, X
   3.122  };
   3.123  
   3.124 -static unsigned int get_insn_len(unsigned char *insn, unsigned char *p_opcode)
   3.125 +static unsigned int get_insn_len(unsigned char *insn, 
   3.126 +                                 unsigned char *p_opcode,
   3.127 +                                 unsigned char *p_decode)
   3.128  {
   3.129      unsigned char b, d, *pb, mod, rm;
   3.130  
   3.131 @@ -130,10 +155,14 @@ static unsigned int get_insn_len(unsigne
   3.132      }
   3.133  
   3.134      *p_opcode = b;
   3.135 +    *p_decode = d;
   3.136  
   3.137      /* 2. Ensure we have a valid opcode byte. */
   3.138      if ( !(d & OPCODE_BYTE) )
   3.139 +    {
   3.140 +        printk(KERN_ALERT " *** %02x %02x %02x\n", pb[0], pb[1], pb[2]);
   3.141          return 0;
   3.142 +    }
   3.143  
   3.144      /* 3. Process Mod/RM if there is one. */
   3.145      if ( d & HAS_MODRM )
   3.146 @@ -175,13 +204,10 @@ static unsigned char handleable_code[32]
   3.147  asmlinkage void do_fixup_4gb_segment(struct pt_regs *regs, long error_code)
   3.148  {
   3.149      static unsigned int fixup_idx = 0;
   3.150 -    int relbyte_idx = -1, relword_idx = -1, save_indirect_reg;
   3.151 +    unsigned int fi;
   3.152 +    int save_indirect_reg, hash, rel_idx;
   3.153      unsigned int insn_len = (unsigned int)error_code, new_insn_len;
   3.154 -    unsigned char b[20], modrm, mod, reg, rm, patch[5], opcode;
   3.155 -    unsigned char *fixup_buf = 
   3.156 -        (unsigned char *)fix_to_virt(FIX_4GB_SEGMENT_FIXUP_RW);
   3.157 -    unsigned long fixup_buf_user = 
   3.158 -        fix_to_virt(FIX_4GB_SEGMENT_FIXUP_RO);
   3.159 +    unsigned char b[20], modrm, mod, reg, rm, patch[PATCH_LEN], opcode, decode;
   3.160      unsigned long eip = regs->eip - insn_len;
   3.161      struct fixup_entry *fe;
   3.162      pte_t *pte;
   3.163 @@ -198,28 +224,50 @@ asmlinkage void do_fixup_4gb_segment(str
   3.164  
   3.165      if ( unlikely(eip >= (PAGE_OFFSET-32)) )
   3.166      {
   3.167 -        if ( (eip < fixup_buf_user) || (eip >= (fixup_buf_user+PAGE_SIZE-32)) )
   3.168 +        DPRINTK("User executing out of kernel space?!");
   3.169 +        return;
   3.170 +    }
   3.171 +    
   3.172 +    if ( unlikely(((eip ^ (eip+PATCH_LEN)) & PAGE_MASK) != 0) )
   3.173 +    {
   3.174 +        DPRINTK("Patch instruction would straddle a page boundary.");
   3.175 +        return;
   3.176 +    }
   3.177 +
   3.178 +    /*
   3.179 +     * Check that the page to be patched is part of a read-only VMA. This 
   3.180 +     * means that our patch will never erroneously get flushed to disc.
   3.181 +     */
   3.182 +    if ( eip > (FIXUP_BUF_USER + FIXUP_BUF_SIZE) ) /* don't check fixup area */
   3.183 +    {
   3.184 +        /* [SMP] Need to the mmap_sem semaphore. */
   3.185 +        struct vm_area_struct *vma = find_vma(current->mm, eip);
   3.186 +        if ( (vma == NULL) || (vma->vm_flags & VM_MAYSHARE) )
   3.187          {
   3.188 -            DPRINTK("User executing out of kernel space?!");
   3.189 +            DPRINTK("Cannot patch a shareable VMA.");
   3.190              return;
   3.191          }
   3.192 -        /* We know it's safe to directly copy teh bytes into our buffer. */
   3.193 -        memcpy(b, (void *)eip, sizeof(b));
   3.194      }
   3.195 -    else if ( unlikely(copy_from_user(b, (void *)eip, sizeof(b)) != 0) )
   3.196 +
   3.197 +    if ( unlikely(copy_from_user(b, (void *)eip, sizeof(b)) != 0) )
   3.198      {
   3.199          DPRINTK("Could not read instruction bytes from user space.");
   3.200          return;
   3.201      }
   3.202  
   3.203 -    if ( unlikely(((eip ^ (eip+5)) & PAGE_MASK) != 0) )
   3.204 +    /* Already created a fixup for this address and code sequence? */
   3.205 +    hash = FIXUP_HASH(b);
   3.206 +    for ( fe = fixup_hash[hash];
   3.207 +          fe != NULL; fe = fe->next )
   3.208      {
   3.209 -        DPRINTK("Patch instruction would straddle a page boundary.");
   3.210 -        return;
   3.211 +        if ( eip != fe->patch_addr )
   3.212 +            continue; /* XXX */
   3.213 +        if ( memcmp(fe->patched_code, b, fe->patched_code_len) == 0 )
   3.214 +            goto do_the_patch;
   3.215      }
   3.216  
   3.217      /* Guaranteed enough room to patch? */
   3.218 -    if ( unlikely(fixup_idx > (PAGE_SIZE-32)) )
   3.219 +    if ( unlikely((fi = fixup_idx) > (FIXUP_BUF_SIZE-32)) )
   3.220      {
   3.221          static int printed = 0;
   3.222          if ( !printed )
   3.223 @@ -268,40 +316,77 @@ asmlinkage void do_fixup_4gb_segment(str
   3.224          return;
   3.225      }
   3.226  
   3.227 -    while ( insn_len < 5 )
   3.228 +    /* Indirect jump pointer. */
   3.229 +    *(u32 *)&fixup_buf[fi] = FIXUP_BUF_USER + fi + 4;
   3.230 +    fi += 4;
   3.231 +
   3.232 +    /* push <r32> */
   3.233 +    if ( save_indirect_reg )
   3.234 +        fixup_buf[fi++] = 0x50 + rm;
   3.235 +
   3.236 +    /* add %gs:0,<r32> */
   3.237 +    fixup_buf[fi++] = 0x65;
   3.238 +    fixup_buf[fi++] = 0x03;
   3.239 +    fixup_buf[fi++] = 0x05 | (rm << 3);
   3.240 +    *(unsigned long *)&fixup_buf[fi] = 0;
   3.241 +    fi += 4;
   3.242 +
   3.243 +    /* Relocate the faulting instruction, minus the GS override. */
   3.244 +    memcpy(&fixup_buf[fi], &b[1], error_code - 1);
   3.245 +    fi += error_code - 1;
   3.246 +
   3.247 +    /* pop <r32> */
   3.248 +    if ( save_indirect_reg )
   3.249 +        fixup_buf[fi++] = 0x58 + rm;
   3.250 +
   3.251 +    while ( insn_len < PATCH_LEN )
   3.252      {
   3.253          /* Bail if can't decode the following instruction. */
   3.254          if ( unlikely((new_insn_len =
   3.255 -                       get_insn_len(&b[insn_len], &opcode)) == 0) )
   3.256 +                       get_insn_len(&b[insn_len], &opcode, &decode)) == 0) )
   3.257          {
   3.258              DPRINTK("Could not decode following instruction.");
   3.259              return;
   3.260          }
   3.261  
   3.262          /* We track one 8-bit relative offset for patching later. */
   3.263 -        if ( ((opcode >= 0x70) && (opcode <= 0x7f)) || (opcode == 0xeb) )
   3.264 +        if ( (decode & CODE_MASK) == JMP )
   3.265          {
   3.266 -            if ( relbyte_idx != -1 )
   3.267 -            {
   3.268 -                DPRINTK("Multiple relative offsets in patch seq!");
   3.269 -                return;
   3.270 -            }
   3.271 -            relbyte_idx = insn_len;
   3.272 -            while ( b[relbyte_idx] != opcode )
   3.273 -                relbyte_idx++;
   3.274 -            relbyte_idx++;
   3.275 +            rel_idx = insn_len;
   3.276 +            while ( (fixup_buf[fi++] = b[rel_idx++]) != opcode )
   3.277 +                continue;
   3.278 +            
   3.279 +            /* Patch the 8-bit relative offset. */
   3.280 +            int idx = fe->fixup_idx + relbyte_idx + 6 + 4/**/;
   3.281 +            if ( save_indirect_reg )
   3.282 +                idx += 2;
   3.283 +            fixup_buf[idx] = fi - (idx + 1);
   3.284 +        
   3.285 +            /* jmp <rel32> */
   3.286 +            fixup_buf[fi++] = 0xe9;
   3.287 +            fi += 4;
   3.288 +            *(unsigned long *)&fixup_buf[fi-4] = 
   3.289 +                (eip + relbyte_idx + 1 + (long)(char)b[relbyte_idx]) - 
   3.290 +                (FIXUP_BUF_USER + fi);
   3.291          }
   3.292          else if ( opcode == 0xe9 )
   3.293          {
   3.294 -            if ( relword_idx != -1 )
   3.295 -            {
   3.296 -                DPRINTK("Multiple relative offsets in patch seq!");
   3.297 -                return;
   3.298 -            }
   3.299 -            relword_idx = insn_len;
   3.300 -            while ( b[relword_idx] != opcode )
   3.301 -                relword_idx++;
   3.302 -            relword_idx++;
   3.303 +            rel_idx = insn_len;
   3.304 +            while ( (fixup_buf[fi++] = b[rel_idx++]) != opcode )
   3.305 +                continue;
   3.306 +            
   3.307 +            /* Patch the 32-bit relative offset. */
   3.308 +            int idx = fe->fixup_idx + relword_idx + 6 + 4/**/;
   3.309 +            if ( save_indirect_reg )
   3.310 +                idx += 2;
   3.311 +            *(unsigned long *)&fixup_buf[idx] +=
   3.312 +                (eip + relword_idx) - (FIXUP_BUF_USER + idx);
   3.313 +        }
   3.314 +        else
   3.315 +        {
   3.316 +            /* Relocate the instruction verbatim. */
   3.317 +            memcpy(&fixup_buf[fi], &b[insn_len], new_insn_len);
   3.318 +            fi += new_insn_len;
   3.319          }
   3.320  
   3.321          if ( (insn_len += new_insn_len) > 20 )
   3.322 @@ -311,7 +396,7 @@ asmlinkage void do_fixup_4gb_segment(str
   3.323          }
   3.324  
   3.325          /* The instructions together must be no smaller than 'jmp <disp32>'. */
   3.326 -        if ( insn_len >= 5 )
   3.327 +        if ( insn_len >= PATCH_LEN )
   3.328              break;
   3.329  
   3.330          /* Can't have a RET in the middle of a patch sequence. */
   3.331 @@ -322,120 +407,70 @@ asmlinkage void do_fixup_4gb_segment(str
   3.332          }
   3.333      }
   3.334  
   3.335 -    /* Already created a fixup for this address and code sequence? */
   3.336 -    for ( fe = fixup_hash[FIXUP_HASH(eip)];
   3.337 -          fe != NULL; fe = fe->next )
   3.338 -    {
   3.339 -        if ( (fe->patch_addr == eip) &&
   3.340 -             (fe->patched_code_len == insn_len) &&
   3.341 -             (memcmp(fe->patched_code, b, insn_len) == 0) )
   3.342 -            goto do_the_patch;
   3.343 -    }
   3.344 -
   3.345 -    /* No existing patch -- create an entry for one. */
   3.346 +    /* Create an entry for a new fixup patch. */
   3.347      fe = kmalloc(sizeof(struct fixup_entry), GFP_KERNEL);
   3.348      if ( unlikely(fe == NULL) )
   3.349      {
   3.350          DPRINTK("Not enough memory to allocate a fixup_entry.");
   3.351          return;
   3.352      }
   3.353 -    fe->patch_addr = eip;
   3.354 +    fe->patch_addr = eip; /* XXX */
   3.355      fe->patched_code_len = insn_len;
   3.356      memcpy(fe->patched_code, b, insn_len);
   3.357      fe->fixup_idx = fixup_idx;
   3.358 -    fe->next = fixup_hash[FIXUP_HASH(eip)];
   3.359 -    fixup_hash[FIXUP_HASH(eip)] = fe;
   3.360 -    
   3.361 -    /* push <r32> */
   3.362 -    if ( save_indirect_reg )
   3.363 -        fixup_buf[fixup_idx++] = 0x50 + rm;
   3.364 -
   3.365 -    /* add %gs:0,<r32> */
   3.366 -    fixup_buf[fixup_idx++] = 0x65;
   3.367 -    fixup_buf[fixup_idx++] = 0x03;
   3.368 -    fixup_buf[fixup_idx++] = 0x05 | (rm << 3);
   3.369 -    *(unsigned long *)&fixup_buf[fixup_idx] = 0;
   3.370 -    fixup_idx += 4;
   3.371 +    fe->return_idx = 
   3.372 +        fixup_idx + error_code + 6 + 4/**/ + (save_indirect_reg ? 2 : 0);
   3.373 +    fe->next = fixup_hash[hash];
   3.374 +    fixup_hash[hash] = fe;
   3.375  
   3.376 -    /* First relocated instruction, minus the GS override. */
   3.377 -    memcpy(&fixup_buf[fixup_idx], &b[1], error_code - 1);
   3.378 -    fixup_idx += error_code - 1;
   3.379 -
   3.380 -    /* pop <r32> */
   3.381 -    if ( save_indirect_reg )
   3.382 -        fixup_buf[fixup_idx++] = 0x58 + rm;
   3.383 -
   3.384 -    if ( insn_len != error_code )
   3.385 -    {
   3.386 -        /* Relocated instructions. */
   3.387 -        memcpy(&fixup_buf[fixup_idx], &b[error_code], insn_len - error_code);
   3.388 -        fixup_idx += insn_len - error_code;
   3.389 -    }
   3.390  
   3.391      /* jmp <rel32> */
   3.392 -    fixup_buf[fixup_idx++] = 0xe9;
   3.393 -    fixup_idx += 4;
   3.394 -    *(unsigned long *)&fixup_buf[fixup_idx-4] = 
   3.395 -        (eip + insn_len) - (fixup_buf_user + fixup_idx);
   3.396 +    fixup_buf[fi++] = 0xe9;
   3.397 +    fi += 4;
   3.398 +    *(unsigned long *)&fixup_buf[fi-4] = 
   3.399 +        (eip + insn_len) - (FIXUP_BUF_USER + fi);
   3.400  
   3.401 -    if ( relbyte_idx != -1 )
   3.402 +    /* Commit the patch. */
   3.403 +    fixup_idx = fi;
   3.404 +
   3.405 +#if 0
   3.406 +    if ( fe->fixup_idx == 4122 )
   3.407      {
   3.408 -        /* Patch the 8-bit relative offset. */
   3.409 -        int idx = fe->fixup_idx + relbyte_idx + 6;
   3.410 -        if ( save_indirect_reg )
   3.411 -            idx += 2;
   3.412 -        fixup_buf[idx] = fixup_idx - (idx + 1);
   3.413 -        
   3.414 -        /* jmp <rel32> */
   3.415 -        fixup_buf[fixup_idx++] = 0xe9;
   3.416 -        fixup_idx += 4;
   3.417 -        *(unsigned long *)&fixup_buf[fixup_idx-4] = 
   3.418 -            (eip + relbyte_idx + 1 + (long)(char)b[relbyte_idx]) - 
   3.419 -            (fixup_buf_user + fixup_idx);
   3.420 +        int iii;
   3.421 +        printk(KERN_ALERT "EIP == %08lx; USER_EIP == %08lx\n",
   3.422 +               eip, FIXUP_BUF_USER + fe->fixup_idx);
   3.423 +        printk(KERN_ALERT " .byte ");
   3.424 +        for ( iii = 0; iii < insn_len; iii++ )
   3.425 +            printk("0x%02x,", b[iii]);
   3.426 +        printk("\n");
   3.427 +        printk(KERN_ALERT " .byte ");
   3.428 +        for ( iii = fe->fixup_idx; iii < fi; iii++ )
   3.429 +            printk("0x%02x,", fixup_buf[iii]);
   3.430 +        printk("\n");
   3.431 +        printk(KERN_ALERT " .byte ");
   3.432 +        for ( iii = fe->fixup_idx; iii < fi; iii++ )
   3.433 +            printk("0x%02x,", ((char *)FIXUP_BUF_USER)[iii]);
   3.434 +        printk("\n");
   3.435      }
   3.436 -    else if ( relword_idx != -1 )
   3.437 -    {
   3.438 -        /* Patch the 32-bit relative offset by subtracting the code disp. */
   3.439 -        int idx = fe->fixup_idx + relword_idx + 6;
   3.440 -        if ( save_indirect_reg )
   3.441 -            idx += 2;
   3.442 -        *(unsigned long *)&fixup_buf[idx] +=
   3.443 -            (eip + relword_idx) - (fixup_buf_user + idx);
   3.444 -    }
   3.445 +#endif
   3.446  
   3.447   do_the_patch:
   3.448      /* Create the patching instruction in a temporary buffer. */
   3.449 -    patch[0] = 0xe9;
   3.450 -    *(unsigned long *)&patch[1] = 
   3.451 -        (fixup_buf_user + fe->fixup_idx) - (eip + 5);
   3.452 -
   3.453 -    /*
   3.454 -     * Check that the page to be patched is part of a read-only VMA. This 
   3.455 -     * means that our patch will never erroneously get flushed to disc.
   3.456 -     */
   3.457 -    if ( eip < PAGE_OFFSET ) /* don't need to check the fixmap area */
   3.458 -    {
   3.459 -        /* [SMP] Need to the mmap_sem semaphore. */
   3.460 -        struct vm_area_struct *vma = find_vma(current->mm, eip);
   3.461 -        if ( (vma == NULL) || (vma->vm_flags & VM_MAYSHARE) )
   3.462 -        {
   3.463 -            DPRINTK("Cannot patch a shareable VMA.");
   3.464 -            return;
   3.465 -        }
   3.466 -    }
   3.467 +    patch[0] = 0x67;
   3.468 +    patch[1] = 0xff;
   3.469 +    patch[2] = 0x26; /* call <r/m16> */
   3.470 +    *(u16 *)&patch[3] = FIXUP_BUF_USER + fe->fixup_idx;
   3.471  
   3.472      /* [SMP] Need to pause other threads while patching. */
   3.473      pgd = pgd_offset(current->mm, eip);
   3.474      pmd = pmd_offset(pgd, eip);
   3.475      pte = pte_offset_kernel(pmd, eip);
   3.476      veip = kmap(pte_page(*pte));
   3.477 -    memcpy((char *)veip + (eip & ~PAGE_MASK), patch, 5);
   3.478 +    memcpy((char *)veip + (eip & ~PAGE_MASK), patch, PATCH_LEN);
   3.479      kunmap(pte_page(*pte));
   3.480  
   3.481      /* Success! Return to user land to execute 2nd insn of the pair. */
   3.482 -    regs->eip = fixup_buf_user + fe->fixup_idx + error_code + 6;
   3.483 -    if ( save_indirect_reg )
   3.484 -        regs->eip += 2;
   3.485 +    regs->eip = FIXUP_BUF_USER + fe->return_idx;
   3.486      return;
   3.487  }
   3.488  
   3.489 @@ -443,7 +478,9 @@ static int nosegfixup = 0;
   3.490  
   3.491  static int __init fixup_init(void)
   3.492  {
   3.493 -    unsigned long page;
   3.494 +    struct vm_struct vma;
   3.495 +    struct page *_pages[1<<FIXUP_BUF_ORDER], **pages=_pages;
   3.496 +    int i;
   3.497  
   3.498      if ( nosegfixup )
   3.499          return 0;
   3.500 @@ -451,9 +488,14 @@ static int __init fixup_init(void)
   3.501      HYPERVISOR_vm_assist(VMASST_CMD_enable,
   3.502                           VMASST_TYPE_4gb_segments_notify);
   3.503  
   3.504 -    page = get_zeroed_page(GFP_ATOMIC);
   3.505 -    __set_fixmap(FIX_4GB_SEGMENT_FIXUP_RO, __pa(page), PAGE_READONLY);
   3.506 -    __set_fixmap(FIX_4GB_SEGMENT_FIXUP_RW, __pa(page), PAGE_KERNEL);
   3.507 +    fixup_buf = (char *)__get_free_pages(GFP_ATOMIC, FIXUP_BUF_ORDER);
   3.508 +    for ( i = 0; i < (1<<FIXUP_BUF_ORDER); i++ )
   3.509 +        _pages[i] = virt_to_page(fixup_buf) + i;
   3.510 +
   3.511 +    vma.addr = (void *)FIXUP_BUF_USER;
   3.512 +    vma.size = FIXUP_BUF_SIZE + PAGE_SIZE; /* fucking stupid interface */
   3.513 +    if ( map_vm_area(&vma, PAGE_READONLY, &pages) != 0 )
   3.514 +        BUG();
   3.515  
   3.516      memset(fixup_hash, 0, sizeof(fixup_hash));
   3.517  
     4.1 --- a/linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/fixmap.h	Tue Aug 10 21:12:00 2004 +0000
     4.2 +++ b/linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/fixmap.h	Tue Aug 10 21:21:49 2004 +0000
     4.3 @@ -44,8 +44,6 @@
     4.4  enum fixed_addresses {
     4.5  	FIX_HOLE,
     4.6  	FIX_VSYSCALL,
     4.7 -	FIX_4GB_SEGMENT_FIXUP_RO,
     4.8 -	FIX_4GB_SEGMENT_FIXUP_RW,
     4.9  #ifdef CONFIG_X86_LOCAL_APIC
    4.10  	FIX_APIC_BASE,	/* local (CPU) APIC) -- required for SMP or not */
    4.11  #endif
    4.12 @@ -128,8 +126,8 @@ extern void __set_fixmap_ma (enum fixed_
    4.13   * This is the range that is readable by user mode, and things
    4.14   * acting like user mode such as get_user_pages.
    4.15   */
    4.16 -#define FIXADDR_USER_START	(__fix_to_virt(FIX_4GB_SEGMENT_FIXUP_RO))
    4.17 -#define FIXADDR_USER_END	(FIXADDR_USER_START + (2*PAGE_SIZE))
    4.18 +#define FIXADDR_USER_START	(__fix_to_virt(FIX_VSYSCALL))
    4.19 +#define FIXADDR_USER_END	(FIXADDR_USER_START + PAGE_SIZE)
    4.20  
    4.21  
    4.22  extern void __this_fixmap_does_not_exist(void);
     5.1 --- a/linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/pgtable.h	Tue Aug 10 21:12:00 2004 +0000
     5.2 +++ b/linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/pgtable.h	Tue Aug 10 21:21:49 2004 +0000
     5.3 @@ -64,16 +64,17 @@ void paging_init(void);
     5.4  #define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
     5.5  #define PGDIR_MASK	(~(PGDIR_SIZE-1))
     5.6  
     5.7 -#define USER_PTRS_PER_PGD	(TASK_SIZE/PGDIR_SIZE)
     5.8 -#define FIRST_USER_PGD_NR	0
     5.9 +#define FIRST_USER_PGD_NR	1
    5.10 +#define USER_PTRS_PER_PGD	((TASK_SIZE/PGDIR_SIZE) - FIRST_USER_PGD_NR)
    5.11  
    5.12 +#if 0 /* XEN */
    5.13  #define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT)
    5.14  #define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS)
    5.15  
    5.16  #define TWOLEVEL_PGDIR_SHIFT	22
    5.17  #define BOOT_USER_PGD_PTRS (__PAGE_OFFSET >> TWOLEVEL_PGDIR_SHIFT)
    5.18  #define BOOT_KERNEL_PGD_PTRS (1024-BOOT_USER_PGD_PTRS)
    5.19 -
    5.20 +#endif
    5.21  
    5.22  #ifndef __ASSEMBLY__
    5.23  /* Just any arbitrary offset to the start of the vmalloc VM area: the