direct-io.hg

changeset 2281:c7ad1834d668

bitkeeper revision 1.1159.41.1 (41238c3fi_zBknIqLEcbst8cclEjAQ)

Remove binary rewriting from Linux for now.
Replaced with a warning banner.
author kaf24@scramble.cl.cam.ac.uk
date Wed Aug 18 17:05:03 2004 +0000 (2004-08-18)
parents 1419a54e36ca
children 238feb30380b
files linux-2.6.7-xen-sparse/arch/xen/kernel/fixup.c
line diff
     1.1 --- a/linux-2.6.7-xen-sparse/arch/xen/kernel/fixup.c	Wed Aug 18 16:57:36 2004 +0000
     1.2 +++ b/linux-2.6.7-xen-sparse/arch/xen/kernel/fixup.c	Wed Aug 18 17:05:03 2004 +0000
     1.3 @@ -5,6 +5,11 @@
     1.4   * Used to avoid repeated slow emulation of common instructions used by the
     1.5   * user-space TLS (Thread-Local Storage) libraries.
     1.6   * 
     1.7 + * **** NOTE ****
     1.8 + *  Issues with the binary rewriting have caused it to be removed. Instead
     1.9 + *  we rely on Xen's emulator to boot the kernel, and then print a banner
    1.10 + *  message recommending that the user disables /lib/tls.
    1.11 + * 
    1.12   * Copyright (c) 2004, K A Fraser
    1.13   * 
    1.14   * This program is free software; you can redistribute it and/or modify
    1.15 @@ -27,790 +32,47 @@
    1.16  #include <linux/sched.h>
    1.17  #include <linux/slab.h>
    1.18  #include <linux/kernel.h>
    1.19 -#include <linux/pagemap.h>
    1.20 -#include <linux/vmalloc.h>
    1.21 -#include <linux/highmem.h>
    1.22 -#include <linux/mman.h>
    1.23 -#include <asm/fixmap.h>
    1.24 -#include <asm/pgtable.h>
    1.25 -#include <asm/uaccess.h>
    1.26 -
    1.27 -#if 1
    1.28 -#define ASSERT(_p) \
    1.29 -    if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \
    1.30 -    __LINE__, __FILE__); *(int*)0=0; }
    1.31 -#define DPRINTK(_f, _a...) printk(KERN_ALERT \
    1.32 -                           "(file=%s, line=%d) " _f "\n", \
    1.33 -                           __FILE__ , __LINE__ , ## _a )
    1.34 -#else
    1.35 -#define ASSERT(_p) ((void)0)
    1.36 -#define DPRINTK(_f, _a...) ((void)0)
    1.37 -#endif
    1.38 -
    1.39 -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
    1.40 -#define TestSetPageLocked(_p) TryLockPage(_p)
    1.41 -#define PageAnon(_p)          0 /* no equivalent in 2.4 */
    1.42 -#define pte_offset_kernel     pte_offset
    1.43 -#define remap_page_range(_a,_b,_c,_d,_e) remap_page_range(_b,_c,_d,_e)
    1.44 -#define daemonize(_n)                   \
    1.45 -    do {                                \
    1.46 -        daemonize();                    \
    1.47 -        strcpy(current->comm, _n);      \
    1.48 -        sigfillset(&current->blocked);  \
    1.49 -    } while ( 0 )
    1.50 -#endif
    1.51 -
    1.52 -static unsigned char *fixup_buf;
    1.53 -#define FIXUP_BUF_USER  PAGE_SIZE
    1.54 -#define FIXUP_BUF_ORDER 1
    1.55 -#define FIXUP_BUF_SIZE  (PAGE_SIZE<<FIXUP_BUF_ORDER)
    1.56 -#define PATCH_LEN       5
    1.57 -
    1.58 -struct fixup_entry {
    1.59 -    unsigned char  patched_code[20];
    1.60 -    unsigned short patched_code_len;
    1.61 -    unsigned short fixup_idx;
    1.62 -    unsigned short return_idx;
    1.63 -    struct fixup_entry *next;
    1.64 -};
    1.65 -
    1.66 -#define FIXUP_HASHSZ 128
    1.67 -static struct fixup_entry *fixup_hash[FIXUP_HASHSZ];
    1.68 -static inline int FIXUP_HASH(char *b)
    1.69 -{
    1.70 -    int i, j = 0;
    1.71 -    for ( i = 0; i < PATCH_LEN; i++ )
    1.72 -        j ^= b[i];
    1.73 -    return j & (FIXUP_HASHSZ-1);
    1.74 -}
    1.75 -
    1.76 -/* General instruction properties. */
    1.77 -#define INSN_SUFFIX_BYTES (7)
    1.78 -#define PREFIX_BYTE       (1<<3)
    1.79 -#define OPCODE_BYTE       (1<<4)  
    1.80 -#define HAS_MODRM         (1<<5)
    1.81 -
    1.82 -/* Helpful codes for the main decode routine. */
    1.83 -#define CODE_MASK         (3<<6)
    1.84 -#define PUSH              (1<<6) /* PUSH onto stack */
    1.85 -#define POP               (2<<6) /* POP from stack */
    1.86 -#define JMP               (3<<6) /* 8-bit relative JMP */
    1.87 -
    1.88 -/* Short forms for the table. */
    1.89 -#define X  0 /* invalid for some random reason */
    1.90 -#define S  0 /* invalid because it munges the stack */
    1.91 -#define P  PREFIX_BYTE
    1.92 -#define O  OPCODE_BYTE
    1.93 -#define M  HAS_MODRM
    1.94 -
    1.95 -static unsigned char insn_decode[256] = {
    1.96 -    /* 0x00 - 0x0F */
    1.97 -    O|M, O|M, O|M, O|M, O|1, O|4, S, S,
    1.98 -    O|M, O|M, O|M, O|M, O|1, O|4, S, X,
    1.99 -    /* 0x10 - 0x1F */
   1.100 -    O|M, O|M, O|M, O|M, O|1, O|4, S, S,
   1.101 -    O|M, O|M, O|M, O|M, O|1, O|4, S, S,
   1.102 -    /* 0x20 - 0x2F */
   1.103 -    O|M, O|M, O|M, O|M, O|1, O|4, P, O,
   1.104 -    O|M, O|M, O|M, O|M, O|1, O|4, P, O,
   1.105 -    /* 0x30 - 0x3F */
   1.106 -    O|M, O|M, O|M, O|M, O|1, O|4, P, O,
   1.107 -    O|M, O|M, O|M, O|M, O|1, O|4, P, O,
   1.108 -    /* 0x40 - 0x4F */
   1.109 -    O, O, O, O, S, O, O, O,
   1.110 -    O, O, O, O, S, O, O, O,
   1.111 -    /* 0x50 - 0x5F */
   1.112 -    O|PUSH, O|PUSH, O|PUSH, O|PUSH, S, O|PUSH, O|PUSH, O|PUSH,
   1.113 -    O|POP, O|POP, O|POP, O|POP, S, O|POP, O|POP, O|POP,
   1.114 -    /* 0x60 - 0x6F */
   1.115 -    S, S, O|M, O|M, P, P, X, X,
   1.116 -    O|4|PUSH, O|M|4, O|1|PUSH, O|M|1, O, O, O, O,
   1.117 -    /* 0x70 - 0x7F */
   1.118 -    O|1|JMP, O|1|JMP, O|1|JMP, O|1|JMP, O|1|JMP, O|1|JMP, O|1|JMP, O|1|JMP,
   1.119 -    O|1|JMP, O|1|JMP, O|1|JMP, O|1|JMP, O|1|JMP, O|1|JMP, O|1|JMP, O|1|JMP,
   1.120 -    /* 0x80 - 0x8F */
   1.121 -    O|M|1, O|M|4, O|M|1, O|M|1, O|M, O|M, O|M, O|M,
   1.122 -    O|M, O|M, O|M, O|M, O|M, O|M, O|M, O|M|POP, 
   1.123 -    /* 0x90 - 0x9F */
   1.124 -    O, O, O, O, S, O, O, O,
   1.125 -    O, O, X, O, O, O, O, O,
   1.126 -    /* 0xA0 - 0xAF */
   1.127 -    O|1, O|4, O|1, O|4, O, O, O, O,
   1.128 -    O|1, O|4, O, O, O, O, O, O,
   1.129 -    /* 0xB0 - 0xBF */
   1.130 -    O|1, O|1, O|1, O|1, O|1, O|1, O|1, O|1,
   1.131 -    O|4, O|4, O|4, O|4, O|4, O|4, O|4, O|4,
   1.132 -    /* 0xC0 - 0xCF */
   1.133 -    O|M|1, O|M|1, X, O, X, X, O|M|1, O|M|4,
   1.134 -    X, X, X, X, X, X, X, X,
   1.135 -    /* 0xD0 - 0xDF */
   1.136 -    O|M, O|M, O|M, O|M, O|1, O|1, X, X,
   1.137 -    X, X, X, X, X, X, X, X,
   1.138 -    /* 0xE0 - 0xEF */
   1.139 -    X, X, X, X, X, X, X, X,
   1.140 -    X, O|4, X, O|1|JMP, X, X, X, X,
   1.141 -    /* 0xF0 - 0xFF */
   1.142 -    P, X, P, P, O, O, O|M|1, O|M|4, 
   1.143 -    O, O, O, O, O, O, O|M, X
   1.144 -};
   1.145 -
   1.146 -/* Bitmap of faulting instructions that we can handle. */
   1.147 -static unsigned char handleable_code[32] = {
   1.148 -    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
   1.149 -    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
   1.150 -    /* 0x80-0x83, 0x89, 0x8B */
   1.151 -    0x0F, 0x0A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
   1.152 -    /* 0xC7 */
   1.153 -    0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
   1.154 -};
   1.155 -
   1.156 -/* Bitmap of opcodes that use a register operand specified by Mod/RM. */
   1.157 -static unsigned char opcode_uses_reg[32] = {
   1.158 -    /* 0x00 - 0x3F */
   1.159 -    0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F,
   1.160 -    /* 0x40 - 0x7F */
   1.161 -    0x00, 0x00, 0x00, 0x00, 0x0C, 0x0A, 0x00, 0x00,
   1.162 -    /* 0x80 - 0xBF */
   1.163 -    0xF0, 0x2F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
   1.164 -    /* 0xC0 - 0xFF */
   1.165 -    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
   1.166 -};
   1.167 -
   1.168 -static unsigned int parse_insn(unsigned char *insn, 
   1.169 -                               unsigned char *p_opcode,
   1.170 -                               unsigned char *p_decode)
   1.171 -{
   1.172 -    unsigned char b, d, *pb, mod, rm;
   1.173 +#include <linux/delay.h>
   1.174  
   1.175 -    /* 1. Step over the prefix bytes. */
   1.176 -    for ( pb = insn; (pb - insn) < 4; pb++ )
   1.177 -    {
   1.178 -        b = *pb;
   1.179 -        d = insn_decode[b];
   1.180 -        if ( !(d & PREFIX_BYTE) )
   1.181 -            break;
   1.182 -    }
   1.183 -
   1.184 -    *p_opcode = b;
   1.185 -    *p_decode = d;
   1.186 -
   1.187 -    /* 2. Ensure we have a valid opcode byte. */
   1.188 -    if ( !(d & OPCODE_BYTE) )
   1.189 -        return 0;
   1.190 -
   1.191 -    /* 3. Process Mod/RM if there is one. */
   1.192 -    if ( d & HAS_MODRM )
   1.193 -    {
   1.194 -        b = *(++pb);
   1.195 -        if ( (mod = (b >> 6) & 3) != 3 )
   1.196 -        {           
   1.197 -            if ( (rm = (b >> 0) & 7) == 4 )
   1.198 -                pb += 1; /* SIB byte */
   1.199 -            switch ( mod )
   1.200 -            {
   1.201 -            case 0:
   1.202 -                if ( rm == 5 )
   1.203 -                    pb += 4; /* disp32 */
   1.204 -                break;
   1.205 -            case 1:
   1.206 -                pb += 1; /* disp8 */
   1.207 -                break;
   1.208 -            case 2:
   1.209 -                pb += 4; /* disp32 */
   1.210 -                break;
   1.211 -            }
   1.212 -        }
   1.213 -    }
   1.214 -
   1.215 -    /* 4. All done. Result is all bytes stepped over, plus any immediates. */
   1.216 -    return ((pb - insn) + 1 + (d & INSN_SUFFIX_BYTES));
   1.217 -}
   1.218 -
   1.219 -#define SUCCESS 1
   1.220 -#define FAIL    0
   1.221 -static int map_fixup_buf(struct mm_struct *mm)
   1.222 -{
   1.223 -    struct vm_area_struct *vma;
   1.224 -
   1.225 -    /* Already mapped? This is a pretty safe check. */
   1.226 -    if ( ((vma = find_vma(current->mm, FIXUP_BUF_USER)) != NULL) &&
   1.227 -         (vma->vm_start <= FIXUP_BUF_USER) &&
   1.228 -         (vma->vm_flags == (VM_READ | VM_MAYREAD | VM_RESERVED)) &&
   1.229 -         (vma->vm_file == NULL) )
   1.230 -        return SUCCESS;
   1.231 -
   1.232 -    if ( (vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL)) == NULL )
   1.233 -    {
   1.234 -        DPRINTK("Cannot allocate VMA.");
   1.235 -        return FAIL;
   1.236 -    }
   1.237 -
   1.238 -    memset(vma, 0, sizeof(*vma));
   1.239 -
   1.240 -    vma->vm_mm        = mm;
   1.241 -    vma->vm_flags     = VM_READ | VM_MAYREAD | VM_RESERVED;
   1.242 -    vma->vm_page_prot = PAGE_READONLY;
   1.243 -
   1.244 -    down_write(&mm->mmap_sem);
   1.245 -
   1.246 -    vma->vm_start = get_unmapped_area(
   1.247 -        NULL, FIXUP_BUF_USER, FIXUP_BUF_SIZE,
   1.248 -        0, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED);
   1.249 -    if ( vma->vm_start != FIXUP_BUF_USER )
   1.250 -    {
   1.251 -        DPRINTK("Cannot allocate low-memory-region VMA.");
   1.252 -        up_write(&mm->mmap_sem);
   1.253 -        kmem_cache_free(vm_area_cachep, vma);
   1.254 -        return FAIL;
   1.255 -    }
   1.256 -
   1.257 -    vma->vm_end = vma->vm_start + FIXUP_BUF_SIZE;
   1.258 -
   1.259 -    if ( remap_page_range(vma, vma->vm_start, __pa(fixup_buf), 
   1.260 -                          vma->vm_end - vma->vm_start, vma->vm_page_prot) )
   1.261 -    {
   1.262 -        DPRINTK("Cannot map low-memory-region VMA.");
   1.263 -        up_write(&mm->mmap_sem);
   1.264 -        kmem_cache_free(vm_area_cachep, vma);
   1.265 -        return FAIL;
   1.266 -    }
   1.267 -
   1.268 -    insert_vm_struct(mm, vma);
   1.269 -    
   1.270 -    mm->total_vm += FIXUP_BUF_SIZE >> PAGE_SHIFT;
   1.271 -
   1.272 -    up_write(&mm->mmap_sem);
   1.273 -
   1.274 -    return SUCCESS;
   1.275 -}
   1.276 -
   1.277 -/*
   1.278 - * Mainly this function checks that our patches can't erroneously get flushed
   1.279 - * to a file on disc, which would screw us after reboot!
   1.280 - */
   1.281 -#define SUCCESS 1
   1.282 -#define FAIL    0
   1.283 -static int safe_to_patch(struct mm_struct *mm, unsigned long addr)
   1.284 -{
   1.285 -    struct vm_area_struct *vma;
   1.286 -    struct file           *file;
   1.287 -    unsigned char          _name[30], *name;
   1.288 -
   1.289 -    /* Always safe to patch the fixup buffer. */
   1.290 -    if ( addr <= (FIXUP_BUF_USER + FIXUP_BUF_SIZE) )
   1.291 -        return SUCCESS;
   1.292 -
   1.293 -    if ( ((vma = find_vma(current->mm, addr)) == NULL) ||
   1.294 -         (vma->vm_start > addr) )
   1.295 -    {
   1.296 -        DPRINTK("No VMA contains fault address.");
   1.297 -        return FAIL;
   1.298 -    }
   1.299 -
   1.300 -    /* Only patch shared libraries. */
   1.301 -    if ( (file = vma->vm_file) == NULL )
   1.302 -    {
   1.303 -        DPRINTK("VMA is anonymous!");
   1.304 -        return FAIL;
   1.305 -    }
   1.306 -
   1.307 -    /* No shared mappings => nobody can dirty the file. */
   1.308 -    /* XXX Note the assumption that noone will dirty the file in future! */
   1.309 -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
   1.310 -    if ( file->f_mapping->i_mmap_writable != 0 )
   1.311 -#else
   1.312 -    if ( file->f_dentry->d_inode->i_mapping->i_mmap_shared != NULL )
   1.313 -#endif
   1.314 -    {
   1.315 -        DPRINTK("Shared mappings exist.");
   1.316 -        return FAIL;
   1.317 -    }
   1.318 -
   1.319 -    /*
   1.320 -     * Because of above dodgy assumption, we will only patch things in
   1.321 -     * /lib/tls. Our belief is that updates here will only ever occur by
   1.322 -     * unlinking the old files and installing completely fresh ones. :-)
   1.323 -     */
   1.324 -    name = d_path(file->f_dentry, file->f_vfsmnt, _name, sizeof(_name));
   1.325 -    if ( IS_ERR(name) || (strncmp("/lib/tls", name, 8) != 0) )
   1.326 -    {
   1.327 -        DPRINTK("Backing file is not in /lib/tls");
   1.328 -        return FAIL;
   1.329 -    }
   1.330 -
   1.331 -    return SUCCESS;
   1.332 -}
   1.333 +#define DP(_f) printk(KERN_ALERT "  " _f "\n")
   1.334  
   1.335  asmlinkage void do_fixup_4gb_segment(struct pt_regs *regs, long error_code)
   1.336  {
   1.337 -    static unsigned int fixup_idx = 0;
   1.338 -    struct mm_struct *mm = current->mm;
   1.339 -    unsigned int fi;
   1.340 -    int save_indirect_reg, hash, i;
   1.341 -    unsigned int insn_len = (unsigned int)error_code, new_insn_len;
   1.342 -    unsigned char b[20], modrm, mod, reg, rm, sib, patch[20], opcode, decode;
   1.343 -    unsigned long eip = regs->eip - insn_len;
   1.344 -    struct fixup_entry *fe;
   1.345 -    struct page *page;
   1.346 -    pte_t *pte;
   1.347 -    pmd_t *pmd;
   1.348 -    pgd_t *pgd;
   1.349 -    void *veip;
   1.350 -
   1.351 -    /* Easy check that code segment has base 0, max limit. */
   1.352 -    if ( unlikely(regs->xcs != __USER_CS) )
   1.353 -    {
   1.354 -        DPRINTK("Unexpected CS value.");
   1.355 -        return;
   1.356 -    }
   1.357 +    static unsigned long printed = 0;
   1.358 +    int i;
   1.359  
   1.360 -    if ( unlikely(!map_fixup_buf(mm)) )
   1.361 -        goto out;
   1.362 -
   1.363 -    /* Hold the mmap_sem to prevent the mapping from disappearing under us. */
   1.364 -    down_read(&mm->mmap_sem);
   1.365 -
   1.366 -    if ( unlikely(!safe_to_patch(mm, eip)) )
   1.367 -        goto out;
   1.368 -
   1.369 -    if ( unlikely(copy_from_user(b, (void *)eip, sizeof(b)) != 0) )
   1.370 -    {
   1.371 -        DPRINTK("Could not read instruction bytes from user space.");
   1.372 -        goto out;
   1.373 -    }
   1.374 -
   1.375 -    /* Already created a fixup for this code sequence? */
   1.376 -    hash = FIXUP_HASH(b);
   1.377 -    for ( fe = fixup_hash[hash]; fe != NULL; fe = fe->next )
   1.378 -    {
   1.379 -        if ( memcmp(fe->patched_code, b, fe->patched_code_len) == 0 )
   1.380 -            goto do_the_patch;
   1.381 -    }
   1.382 -
   1.383 -    /* Guaranteed enough room to patch? */
   1.384 -    if ( unlikely((fi = fixup_idx) > (FIXUP_BUF_SIZE-64)) )
   1.385 +    if ( !test_and_set_bit(0, &printed) )
   1.386      {
   1.387 -        static int printed = 0;
   1.388 -        if ( !printed )
   1.389 -            printk(KERN_ALERT "WARNING: Out of room in segment-fixup page.\n");
   1.390 -        printed = 1;
   1.391 -        goto out;
   1.392 -    }
   1.393 -
   1.394 -    /* Must be a handleable opcode with GS override. */
   1.395 -    if ( (b[0] != 0x65) || 
   1.396 -         !test_bit((unsigned int)b[1], (unsigned long *)handleable_code) )
   1.397 -    {
   1.398 -        DPRINTK("No GS override, or not a MOV (%02x %02x).", b[0], b[1]);
   1.399 -        goto out;
   1.400 -    }
   1.401 -
   1.402 -    modrm = b[2];
   1.403 -    mod   = (modrm >> 6) & 3;
   1.404 -    reg   = (modrm >> 3) & 7;
   1.405 -    rm    = (modrm >> 0) & 7;
   1.406 -
   1.407 -    /* If indirect register isn't clobbered then we must push/pop it. */
   1.408 -    save_indirect_reg = !((b[1] == 0x8b) && (reg == rm));
   1.409 -
   1.410 -    /* We don't grok SIB bytes. */
   1.411 -    if ( rm == 4 )
   1.412 -    {
   1.413 -        DPRINTK("We don't grok SIB bytes.");
   1.414 -        goto out;
   1.415 -    }
   1.416 -
   1.417 -    /* Ensure Mod/RM specifies (r32) or disp8(r32). */
   1.418 -    switch ( mod )
   1.419 -    {
   1.420 -    case 0:
   1.421 -        if ( rm == 5 )
   1.422 -        {
   1.423 -            DPRINTK("Unhandleable disp32 EA %d.", rm);
   1.424 -            goto out;
   1.425 -        }
   1.426 -        break;            /* m32 == (r32) */
   1.427 -    case 1:
   1.428 -        break;            /* m32 == disp8(r32) */
   1.429 -    default:
   1.430 -        DPRINTK("Unhandleable Mod value %d.", mod);
   1.431 -        goto out;
   1.432 -    }
   1.433 -
   1.434 -    /* Indirect jump pointer. */
   1.435 -    *(u32 *)&fixup_buf[fi] = FIXUP_BUF_USER + fi + 4;
   1.436 -    fi += 4;
   1.437 -
   1.438 -    /* push <r32> */
   1.439 -    if ( save_indirect_reg )
   1.440 -        fixup_buf[fi++] = 0x50 + rm;
   1.441 -
   1.442 -    /* pushf */
   1.443 -    fixup_buf[fi++] = 0x9c;
   1.444 +        HYPERVISOR_vm_assist(VMASST_CMD_disable,
   1.445 +                             VMASST_TYPE_4gb_segments_notify);
   1.446  
   1.447 -    /* add %gs:0,<r32> */
   1.448 -    fixup_buf[fi++] = 0x65;
   1.449 -    fixup_buf[fi++] = 0x03;
   1.450 -    fixup_buf[fi++] = 0x05 | (rm << 3);
   1.451 -    *(unsigned long *)&fixup_buf[fi] = 0;
   1.452 -    fi += 4;
   1.453 -
   1.454 -    /* popf */
   1.455 -    fixup_buf[fi++] = 0x9d;
   1.456 -
   1.457 -    /* Relocate the faulting instruction, minus the GS override. */
   1.458 -    memcpy(&fixup_buf[fi], &b[1], error_code - 1);
   1.459 -    fi += error_code - 1;
   1.460 -
   1.461 -    /* pop <r32> */
   1.462 -    if ( save_indirect_reg )
   1.463 -        fixup_buf[fi++] = 0x58 + rm;
   1.464 -
   1.465 -    for ( ; ; )
   1.466 -    {
   1.467 -        if ( insn_len >= PATCH_LEN )
   1.468 -        {
   1.469 -            /* ret */
   1.470 -            fixup_buf[fi++] = 0xc3;
   1.471 -            break;
   1.472 -        }
   1.473 -
   1.474 -        /* Bail if can't decode the following instruction. */
   1.475 -        if ( unlikely((new_insn_len =
   1.476 -                       parse_insn(&b[insn_len], &opcode, &decode)) == 0) )
   1.477 -        {
   1.478 -            DPRINTK("Could not decode following instruction.");
   1.479 -            goto out;
   1.480 -        }
   1.481 -
   1.482 -        if ( (decode & CODE_MASK) == JMP )
   1.483 -        {
   1.484 -            long off;
   1.485 -
   1.486 -            memcpy(&fixup_buf[fi], &b[insn_len], new_insn_len - 1);
   1.487 -            fi += new_insn_len - 1;
   1.488 -            
   1.489 -            /* Patch the 8-bit relative offset. */
   1.490 -            fixup_buf[fi++] = 1;
   1.491 -            
   1.492 -            insn_len += new_insn_len;
   1.493 -            ASSERT(insn_len >= PATCH_LEN);
   1.494 -        
   1.495 -            /* ret */
   1.496 -            fixup_buf[fi++] = 0xc3;
   1.497 -
   1.498 -            /* pushf */
   1.499 -            fixup_buf[fi++] = 0x9c;
   1.500 +        DP("");
   1.501 +        DP("***************************************************************");
   1.502 +        DP("***************************************************************");
   1.503 +        DP("** WARNING: Currently emulating unsupported memory accesses  **");
   1.504 +        DP("**          in /lib/tls libraries. Although this emulation   **");
   1.505 +        DP("**          ensures correct execution, it is very slow!      **");
   1.506 +        DP("**          TO OBTAIN FULL PERFORMANCE, EXECUTE THE          **");
   1.507 +        DP("**          FOLLOWING AS ROOT:                               **");
   1.508 +        DP("**          mv /lib/tls /lib/tls.disabled                    **");
   1.509 +        DP("***************************************************************");
   1.510 +        DP("***************************************************************");
   1.511 +        DP("");
   1.512  
   1.513 -            off = (insn_len - PATCH_LEN) + (long)(char)b[insn_len-1];
   1.514 -            if ( unlikely(off > 127) )
   1.515 -            {
   1.516 -                /* add <imm32>,4(%esp) */
   1.517 -                fixup_buf[fi++] = 0x81;
   1.518 -                fixup_buf[fi++] = 0x44;
   1.519 -                fixup_buf[fi++] = 0x24;
   1.520 -                fixup_buf[fi++] = 0x04;
   1.521 -                fi += 4;
   1.522 -                *(long *)&fixup_buf[fi-4] = off;
   1.523 -            }
   1.524 -            else
   1.525 -            {
   1.526 -                /* add <imm8>,4(%esp) [sign-extended] */
   1.527 -                fixup_buf[fi++] = 0x83;
   1.528 -                fixup_buf[fi++] = 0x44;
   1.529 -                fixup_buf[fi++] = 0x24;
   1.530 -                fixup_buf[fi++] = 0x04;
   1.531 -                fixup_buf[fi++] = (char)(off & 0xff);
   1.532 -            }
   1.533 -
   1.534 -            /* popf */
   1.535 -            fixup_buf[fi++] = 0x9d;
   1.536 -
   1.537 -            /* ret */
   1.538 -            fixup_buf[fi++] = 0xc3;
   1.539 -
   1.540 -            break;
   1.541 -        }
   1.542 -        else if ( opcode == 0xe9 ) /* jmp <rel32> */
   1.543 -        {
   1.544 -            insn_len += new_insn_len;
   1.545 -            ASSERT(insn_len >= PATCH_LEN);
   1.546 -        
   1.547 -            /* pushf */
   1.548 -            fixup_buf[fi++] = 0x9c;
   1.549 -
   1.550 -            /* add <imm32>,4(%esp) */
   1.551 -            fixup_buf[fi++] = 0x81;
   1.552 -            fixup_buf[fi++] = 0x44;
   1.553 -            fixup_buf[fi++] = 0x24;
   1.554 -            fixup_buf[fi++] = 0x04;
   1.555 -            fi += 4;
   1.556 -            *(long *)&fixup_buf[fi-4] = 
   1.557 -                (insn_len - PATCH_LEN) + *(long *)&b[insn_len-4];
   1.558 -
   1.559 -            /* popf */
   1.560 -            fixup_buf[fi++] = 0x9d;
   1.561 -
   1.562 -            /* ret */
   1.563 -            fixup_buf[fi++] = 0xc3;
   1.564 -
   1.565 -            break;
   1.566 -        }
   1.567 -        else if ( opcode == 0xc3 ) /* ret */
   1.568 +        for ( i = 5; i > 0; i-- )
   1.569          {
   1.570 -            /* pop -4(%esp) [doesn't affect EFLAGS] */
   1.571 -            fixup_buf[fi++] = 0x8f;
   1.572 -            fixup_buf[fi++] = 0x44;
   1.573 -            fixup_buf[fi++] = 0x24;
   1.574 -            fixup_buf[fi++] = 0xfc;
   1.575 -        }
   1.576 -        else
   1.577 -        {
   1.578 -            int stack_addon = 4;
   1.579 -
   1.580 -            if ( (decode & CODE_MASK) == PUSH )
   1.581 -            {
   1.582 -                stack_addon = 8;
   1.583 -                /* push (%esp) */
   1.584 -                fixup_buf[fi++] = 0xff;
   1.585 -                fixup_buf[fi++] = 0x34;
   1.586 -                fixup_buf[fi++] = 0x24;
   1.587 -            }
   1.588 -            else if ( (decode & CODE_MASK) == POP )
   1.589 -            {
   1.590 -                stack_addon = 8;
   1.591 -                /* push 4(%esp) */
   1.592 -                fixup_buf[fi++] = 0xff;
   1.593 -                fixup_buf[fi++] = 0x74;
   1.594 -                fixup_buf[fi++] = 0x24;
   1.595 -                fixup_buf[fi++] = 0x04;
   1.596 -            }
   1.597 -
   1.598 -            /* Check for EA calculations involving ESP, and skip return addr */
   1.599 -            if ( decode & HAS_MODRM )
   1.600 -            {
   1.601 -                do { new_insn_len--; }
   1.602 -                while ( (fixup_buf[fi++] = b[insn_len++]) != opcode );
   1.603 -
   1.604 -                modrm = fixup_buf[fi++] = b[insn_len++];
   1.605 -                new_insn_len--;
   1.606 -                mod   = (modrm >> 6) & 3;
   1.607 -                reg   = (modrm >> 3) & 7;
   1.608 -                rm    = (modrm >> 0) & 7;
   1.609 -
   1.610 -                if ( (reg == 4) &&
   1.611 -                     test_bit(opcode, (unsigned long *)opcode_uses_reg) )
   1.612 -                {
   1.613 -                    DPRINTK("Data movement to ESP unsupported.");
   1.614 -                    goto out;
   1.615 -                }
   1.616 -
   1.617 -                if ( rm == 4 )
   1.618 -                {
   1.619 -                    if ( mod == 3 )
   1.620 -                    {
   1.621 -                        DPRINTK("Data movement to ESP is unsupported.");
   1.622 -                        goto out;
   1.623 -                    }
   1.624 -
   1.625 -                    sib = fixup_buf[fi++] = b[insn_len++];
   1.626 -                    new_insn_len--;
   1.627 -                    if ( (sib & 7) == 4 )
   1.628 -                    {
   1.629 -                        switch ( mod )
   1.630 -                        {
   1.631 -                        case 0:
   1.632 -                            mod = 1;
   1.633 -                            fixup_buf[fi-2] |= 0x40;
   1.634 -                            fixup_buf[fi++] = stack_addon;
   1.635 -                            break;
   1.636 -                        case 1:
   1.637 -                            fixup_buf[fi++] = b[insn_len++] + stack_addon;
   1.638 -                            new_insn_len--;
   1.639 -                            break;
   1.640 -                        case 2:
   1.641 -                            *(long *)&fixup_buf[fi] = 
   1.642 -                                *(long *)&b[insn_len] + stack_addon;
   1.643 -                            fi += 4;
   1.644 -                            insn_len += 4;
   1.645 -                            new_insn_len -= 4;
   1.646 -                            break;
   1.647 -                        }
   1.648 -                    }
   1.649 -                }
   1.650 -            }
   1.651 -
   1.652 -            /* Relocate the (remainder of) the instruction. */
   1.653 -            if ( new_insn_len != 0 )
   1.654 -            {
   1.655 -                memcpy(&fixup_buf[fi], &b[insn_len], new_insn_len);
   1.656 -                fi += new_insn_len;
   1.657 -            }
   1.658 -
   1.659 -            if ( (decode & CODE_MASK) == PUSH )
   1.660 -            {
   1.661 -                /* pop 4(%esp) */
   1.662 -                fixup_buf[fi++] = 0x8f;
   1.663 -                fixup_buf[fi++] = 0x44;
   1.664 -                fixup_buf[fi++] = 0x24;
   1.665 -                fixup_buf[fi++] = 0x04;
   1.666 -            }
   1.667 -            else if ( (decode & CODE_MASK) == POP )
   1.668 -            {
   1.669 -                /* pop (%esp) */
   1.670 -                fixup_buf[fi++] = 0x8f;
   1.671 -                fixup_buf[fi++] = 0x04;
   1.672 -                fixup_buf[fi++] = 0x24;
   1.673 -            }
   1.674 -        }
   1.675 -
   1.676 -        if ( (insn_len += new_insn_len) > 20 )
   1.677 -        {
   1.678 -            DPRINTK("Code to patch is too long!");
   1.679 -            goto out;
   1.680 +            printk("Pausing... %d", i);
   1.681 +            mdelay(1000);
   1.682 +            printk("\b\b\b\b\b\b\b\b\b\b\b\b");
   1.683          }
   1.684 -
   1.685 -        /* Can't have a RET in the middle of a patch sequence. */
   1.686 -        if ( (opcode == 0xc3) && (insn_len < PATCH_LEN) )
   1.687 -        {
   1.688 -            DPRINTK("RET in middle of patch seq!\n");
   1.689 -            goto out;
   1.690 -        }
   1.691 -    }
   1.692 -
   1.693 -    /* Create an entry for a new fixup patch. */
   1.694 -    fe = kmalloc(sizeof(struct fixup_entry), GFP_KERNEL);
   1.695 -    if ( unlikely(fe == NULL) )
   1.696 -    {
   1.697 -        DPRINTK("Not enough memory to allocate a fixup_entry.");
   1.698 -        goto out;
   1.699 -    }
   1.700 -    fe->patched_code_len = insn_len;
   1.701 -    memcpy(fe->patched_code, b, insn_len);
   1.702 -    fe->fixup_idx = fixup_idx;
   1.703 -    fe->return_idx = 
   1.704 -        fixup_idx + error_code + (save_indirect_reg ? 14 : 12);
   1.705 -    fe->next = fixup_hash[hash];
   1.706 -    fixup_hash[hash] = fe;
   1.707 -
   1.708 -    /* Commit the patch. */
   1.709 -    fixup_idx = fi;
   1.710 -
   1.711 - do_the_patch:
   1.712 -
   1.713 -    if ( unlikely(((eip ^ (eip + fe->patched_code_len)) & PAGE_MASK) != 0) )
   1.714 -    {
   1.715 -        DPRINTK("Patch instruction would straddle a page boundary.");
   1.716 -        goto out;
   1.717 -    }
   1.718 -
   1.719 -    if ( put_user(eip + PATCH_LEN, (unsigned long *)regs->esp - 1) != 0 )
   1.720 -    {
   1.721 -        DPRINTK("Failed to place return address on user stack.");
   1.722 -        goto out;
   1.723 -    }
   1.724 -
   1.725 -    /* Create the patching instructions in a temporary buffer. */
   1.726 -    patch[0] = 0x67;
   1.727 -    patch[1] = 0xff;
   1.728 -    patch[2] = 0x16; /* call <r/m16> */
   1.729 -    *(u16 *)&patch[3] = FIXUP_BUF_USER + fe->fixup_idx;
   1.730 -    for ( i = 5; i < fe->patched_code_len; i++ )
   1.731 -        patch[i] = 0x90; /* nop */
   1.732 -
   1.733 -    spin_lock(&mm->page_table_lock);
   1.734 -
   1.735 -    /* Find the physical page that is to be patched. */
   1.736 -    pgd = pgd_offset(current->mm, eip);
   1.737 -    if ( unlikely(!pgd_present(*pgd)) )
   1.738 -        goto unlock_and_out;
   1.739 -    pmd = pmd_offset(pgd, eip);
   1.740 -    if ( unlikely(!pmd_present(*pmd)) )
   1.741 -        goto unlock_and_out;
   1.742 -    pte = pte_offset_kernel(pmd, eip);
   1.743 -    if ( unlikely(!pte_present(*pte)) )
   1.744 -        goto unlock_and_out;
   1.745 -    page = pte_page(*pte);
   1.746 -
   1.747 -    /*
   1.748 -     * We get lock to prevent page going AWOL on us. Also a locked page
   1.749 -     * might be getting flushed to disc!
   1.750 -     */
   1.751 -    if ( unlikely(TestSetPageLocked(page)) )
   1.752 -    {
   1.753 -        DPRINTK("Page is locked.");
   1.754 -        goto unlock_and_out;
   1.755 -    }
   1.756 -
   1.757 -    /*
   1.758 -     * If page is dirty it will get flushed back to disc - bad news! An
   1.759 -     * anonymous page may be moulinexed under our feet by another thread.
   1.760 -     */
   1.761 -    if ( unlikely(PageDirty(page)) || unlikely(PageAnon(page)) )
   1.762 -    {
   1.763 -        DPRINTK("Page is dirty or anonymous.");
   1.764 -        unlock_page(page);
   1.765 -        goto unlock_and_out;
   1.766 -    }
   1.767 -
   1.768 -    veip = kmap(page);
   1.769 -    memcpy((char *)veip + (eip & ~PAGE_MASK), patch, fe->patched_code_len);
   1.770 -    kunmap(page);
   1.771 -
   1.772 -    unlock_page(page);
   1.773 -    spin_unlock(&mm->page_table_lock);
   1.774 -
   1.775 -    /* Success! Return to user land to execute 2nd insn of the pair. */
   1.776 -    regs->esp -= 4;
   1.777 -    regs->eip = FIXUP_BUF_USER + fe->return_idx;
   1.778 -
   1.779 - out:
   1.780 -    up_read(&mm->mmap_sem);
   1.781 -    return;
   1.782 -
   1.783 - unlock_and_out:
   1.784 -    spin_unlock(&mm->page_table_lock);
   1.785 -    up_read(&mm->mmap_sem);
   1.786 -    return;
   1.787 -}
   1.788 -
   1.789 -static int fixup_thread(void *unused)
   1.790 -{
   1.791 -    daemonize("segfixup");
   1.792 -    
   1.793 -    for ( ; ; )
   1.794 -    {
   1.795 -        set_current_state(TASK_INTERRUPTIBLE);
   1.796 -        schedule();
   1.797 +        printk("Continuing...\n\n");
   1.798      }
   1.799  }
   1.800  
   1.801 -static int nosegfixup = 0;
   1.802 -
   1.803  static int __init fixup_init(void)
   1.804  {
   1.805 -    int i;
   1.806 -
   1.807 -    nosegfixup = 1; /* XXX */
   1.808 -
   1.809 -    if ( !nosegfixup )
   1.810 -    {
   1.811 -        HYPERVISOR_vm_assist(VMASST_CMD_enable,
   1.812 -                             VMASST_TYPE_4gb_segments_notify);
   1.813 -        fixup_buf = (char *)__get_free_pages(GFP_ATOMIC, FIXUP_BUF_ORDER);
   1.814 -        for ( i = 0; i < (1 << FIXUP_BUF_ORDER); i++ )
   1.815 -            SetPageReserved(virt_to_page(fixup_buf) + i);
   1.816 -        memset(fixup_hash, 0, sizeof(fixup_hash));
   1.817 -        (void)kernel_thread(fixup_thread, NULL, CLONE_FS | CLONE_FILES);
   1.818 -    }
   1.819 -
   1.820 +    HYPERVISOR_vm_assist(VMASST_CMD_enable,
   1.821 +                         VMASST_TYPE_4gb_segments_notify);
   1.822      return 0;
   1.823  }
   1.824  __initcall(fixup_init);
   1.825 -
   1.826 -static int __init fixup_setup(char *str)
   1.827 -{
   1.828 -    nosegfixup = 1;
   1.829 -    return 0;
   1.830 -}
   1.831 -__setup("nosegfixup", fixup_setup);