ia64/xen-unstable

changeset 2093:e8ef06e458e1

bitkeeper revision 1.1145.1.2 (4112b44fQiWPPD5sUdsW9Yhi90ujCg)

Binary-rewrite patches.
author kaf24@scramble.cl.cam.ac.uk
date Thu Aug 05 22:27:27 2004 +0000 (2004-08-05)
parents 91b951040040
children 8e0d9e45c5f7
files linux-2.6.7-xen-sparse/arch/xen/kernel/fixup.c linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/fixmap.h
line diff
     1.1 --- a/linux-2.6.7-xen-sparse/arch/xen/kernel/fixup.c	Thu Aug 05 22:25:17 2004 +0000
     1.2 +++ b/linux-2.6.7-xen-sparse/arch/xen/kernel/fixup.c	Thu Aug 05 22:27:27 2004 +0000
     1.3 @@ -23,9 +23,400 @@
     1.4   */
     1.5  
     1.6  #include <linux/config.h>
     1.7 +#include <linux/init.h>
     1.8  #include <linux/sched.h>
     1.9  #include <linux/kernel.h>
    1.10 +#include <linux/highmem.h>
    1.11 +#include <asm/fixmap.h>
    1.12 +#include <asm/pgtable.h>
    1.13 +#include <asm/uaccess.h>
    1.14 +
    1.15 +#if 0
    1.16 +#define ASSERT(_p) \
    1.17 +    if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \
    1.18 +    __LINE__, __FILE__); *(int*)0=0; }
    1.19 +#define DPRINTK(_f, _a...) printk(KERN_ALERT \
    1.20 +                           "(file=%s, line=%d, eip=%08lx) " _f "\n", \
    1.21 +                           __FILE__ , __LINE__ , eip, ## _a )
    1.22 +#else
    1.23 +#define ASSERT(_p) ((void)0)
    1.24 +#define DPRINTK(_f, _a...) ((void)0)
    1.25 +#endif
    1.26 +
    1.27 +struct fixup_entry {
    1.28 +    unsigned long  patch_addr;
    1.29 +    unsigned char  patched_code[20];
    1.30 +    unsigned short patched_code_len;
    1.31 +    unsigned short fixup_idx;
    1.32 +    struct fixup_entry *next;
    1.33 +};
    1.34 +
    1.35 +#define FIXUP_HASHSZ 128
    1.36 +static struct fixup_entry *fixup_hash[FIXUP_HASHSZ];
    1.37 +#define FIXUP_HASH(_a) ((unsigned int)(_a) & (FIXUP_HASHSZ-1))
    1.38 +
    1.39 +#define INSN_SUFFIX_BYTES (7)
    1.40 +#define PREFIX_BYTE       (1<<3)
    1.41 +#define OPCODE_BYTE       (1<<4)  
    1.42 +#define HAS_MODRM         (1<<5)
    1.43 +
    1.44 +#define X  0 /* invalid */
    1.45 +#define P  PREFIX_BYTE
    1.46 +#define O  OPCODE_BYTE
    1.47 +#define M  HAS_MODRM
    1.48 +
    1.49 +static unsigned char insn_decode[256] = {
    1.50 +    /* 0x00 - 0x0F */
    1.51 +    O|M, O|M, O|M, O|M, O|1, O|4, O, O,
    1.52 +    O|M, O|M, O|M, O|M, O|1, O|4, O, X,
    1.53 +    /* 0x10 - 0x1F */
    1.54 +    O|M, O|M, O|M, O|M, O|1, O|4, O, O,
    1.55 +    O|M, O|M, O|M, O|M, O|1, O|4, O, O,
    1.56 +    /* 0x20 - 0x2F */
    1.57 +    O|M, O|M, O|M, O|M, O|1, O|4, P, O,
    1.58 +    O|M, O|M, O|M, O|M, O|1, O|4, P, O,
    1.59 +    /* 0x30 - 0x3F */
    1.60 +    O|M, O|M, O|M, O|M, O|1, O|4, P, O,
    1.61 +    O|M, O|M, O|M, O|M, O|1, O|4, P, O,
    1.62 +    /* 0x40 - 0x4F */
    1.63 +    O, O, O, O, O, O, O, O,
    1.64 +    O, O, O, O, O, O, O, O,
    1.65 +    /* 0x50 - 0x5F */
    1.66 +    O, O, O, O, O, O, O, O,
    1.67 +    O, O, O, O, O, O, O, O,
    1.68 +    /* 0x60 - 0x6F */
    1.69 +    O, O, O|M, O|M, P, P, X, X,
    1.70 +    O|4, O|M|4, O|1, O|M|1, O, O, O, O,
    1.71 +    /* 0x70 - 0x7F */
    1.72 +    O|1, O|1, O|1, O|1, O|1, O|1, O|1, O|1,
    1.73 +    O|1, O|1, O|1, O|1, O|1, O|1, O|1, O|1,
    1.74 +    /* 0x80 - 0x8F */
    1.75 +    O|M|1, O|M|4, O|M|1, O|M|1, O|M, O|M, O|M, O|M,
    1.76 +    O|M, O|M, O|M, O|M, O|M, O|M, O|M, O|M, 
    1.77 +    /* 0x90 - 0x9F */
    1.78 +    O, O, O, O, O, O, O, O,
    1.79 +    O, O, X, O, O, O, O, O,
    1.80 +    /* 0xA0 - 0xAF */
    1.81 +    O|1, O|4, O|1, O|4, O, O, O, O,
    1.82 +    O|1, O|4, O, O, O, O, O, O,
    1.83 +    /* 0xB0 - 0xBF */
    1.84 +    O|1, O|1, O|1, O|1, O|1, O|1, O|1, O|1,
    1.85 +    O|4, O|4, O|4, O|4, O|4, O|4, O|4, O|4,
    1.86 +    /* 0xC0 - 0xCF */
    1.87 +    O|M|1, O|M|1, X, O, X, X, O|M|1, O|M|4,
    1.88 +    X, X, X, X, X, X, X, X,
    1.89 +    /* 0xD0 - 0xDF */
    1.90 +    O|M, O|M, O|M, O|M, O|1, O|1, X, X,
    1.91 +    X, X, X, X, X, X, X, X,
    1.92 +    /* 0xE0 - 0xEF */
    1.93 +    X, X, X, X, X, X, X, X,
    1.94 +    X, X, X, O|1, X, X, X, X,
    1.95 +    /* 0xF0 - 0xFF */
    1.96 +    P, X, P, P, O, O, O|M|1, O|M|4, 
    1.97 +    O, O, O, O, O, O, O|M, O|M
    1.98 +};
    1.99 +
   1.100 +static unsigned int get_insn_len(unsigned char *insn, unsigned char *p_opcode)
   1.101 +{
   1.102 +    unsigned char b, d, *pb, mod, rm;
   1.103 +
   1.104 +    /* 1. Step over the prefix bytes. */
   1.105 +    for ( pb = insn; (pb - insn) < 4; pb++ )
   1.106 +    {
   1.107 +        b = *pb;
   1.108 +        d = insn_decode[b];
   1.109 +        if ( !(d & PREFIX_BYTE) )
   1.110 +            break;
   1.111 +    }
   1.112 +
   1.113 +    *p_opcode = b;
   1.114 +
   1.115 +    /* 2. Ensure we have a valid opcode byte. */
   1.116 +    if ( !(d & OPCODE_BYTE) )
   1.117 +    {
   1.118 +        printk(KERN_ALERT " !!! 0x%02x 0x%02x\n", b, *(pb+1));
   1.119 +        return 0;
   1.120 +    }
   1.121 +
   1.122 +    /* 3. Process Mod/RM if there is one. */
   1.123 +    if ( d & HAS_MODRM )
   1.124 +    {
   1.125 +        b = *(++pb);
   1.126 +        if ( (mod = (b >> 6) & 3) != 3 )
   1.127 +        {           
   1.128 +            if ( (rm = (b >> 0) & 7) == 4 )
   1.129 +                pb += 1; /* SIB byte */
   1.130 +            switch ( mod )
   1.131 +            {
   1.132 +            case 0:
   1.133 +                if ( rm == 5 )
   1.134 +                    pb += 4; /* disp32 */
   1.135 +                break;
   1.136 +            case 1:
   1.137 +                pb += 1; /* disp8 */
   1.138 +                break;
   1.139 +            case 2:
   1.140 +                pb += 4; /* disp32 */
   1.141 +                break;
   1.142 +            }
   1.143 +        }
   1.144 +    }
   1.145 +
   1.146 +    /* 4. All done. Result is all byte sstepped over, plus any immediates. */
   1.147 +    return ((pb - insn) + 1 + (d & INSN_SUFFIX_BYTES));
   1.148 +}
   1.149  
   1.150  asmlinkage void do_fixup_4gb_segment(struct pt_regs *regs, long error_code)
   1.151  {
   1.152 +    static unsigned int fixup_idx = 0;
   1.153 +    int relbyte_idx = -1;
   1.154 +    unsigned int insn_len = (unsigned int)error_code, new_insn_len;
   1.155 +    unsigned char b[20], modrm, mod, reg, rm, *fixup_buf, patch[5], opcode;
   1.156 +    unsigned long fixup_buf_user, eip = regs->eip - insn_len;
   1.157 +    struct fixup_entry *fe;
   1.158 +    pte_t *pte;
   1.159 +    pmd_t *pmd;
   1.160 +    pgd_t *pgd;
   1.161 +    void *veip;
   1.162 +
   1.163 +    return; /* XXX */
   1.164 +
   1.165 +    /* Easy check that code segment has base 0, max limit. */
   1.166 +    if ( unlikely(regs->xcs != __USER_CS) )
   1.167 +    {
   1.168 +        DPRINTK("Unexpected CS value.");
   1.169 +        return;
   1.170 +    }
   1.171 +
   1.172 +    if ( unlikely(eip >= (PAGE_OFFSET-32)) )
   1.173 +    {
   1.174 +        DPRINTK("User executing out of kernel space?!");
   1.175 +        return;
   1.176 +    }
   1.177 +
   1.178 +    if ( unlikely(((eip ^ (eip+5)) & PAGE_MASK) != 0) )
   1.179 +    {
   1.180 +        DPRINTK("Patch instruction would straddle a page boundary.");
   1.181 +        return;
   1.182 +    }
   1.183 +
   1.184 +    /* Guaranteed enough room to patch? */
   1.185 +    if ( unlikely(fixup_idx > (PAGE_SIZE-32)) )
   1.186 +    {
   1.187 +        DPRINTK("Out of room in fixup page.");
   1.188 +        return;
   1.189 +    }
   1.190 +
   1.191 +    if ( unlikely(copy_from_user(b, (void *)eip, sizeof(b)) != 0) )
   1.192 +    {
   1.193 +        DPRINTK("Could not read instruction bytes from user space.");
   1.194 +        return;
   1.195 +    }
   1.196 +
   1.197 +    /* Must be 'mov %gs:m32,r32' or 'mov r32,%gs:m32'. */
   1.198 +    if ( (b[0] != 0x65) || ((b[1] != 0x89) && (b[1] != 0x8b)) )
   1.199 +    {
   1.200 +        DPRINTK("No GS override, or not a MOV (%02x %02x).", b[0], b[1]);
   1.201 +        return;
   1.202 +    }
   1.203 +
   1.204 +    modrm = b[2];
   1.205 +    mod   = (modrm >> 6) & 3;
   1.206 +    reg   = (modrm >> 3) & 7;
   1.207 +    rm    = (modrm >> 0) & 7;
   1.208 +
   1.209 +    /* We don't grok SIB bytes. */
   1.210 +    if ( rm == 4 )
   1.211 +    {
   1.212 +        DPRINTK("We don't grok SIB bytes.");
   1.213 +        return;
   1.214 +    }
   1.215 +
   1.216 +    /* Ensure Mod/RM specifies (r32) or disp8(r32). */
   1.217 +    switch ( mod )
   1.218 +    {
   1.219 +    case 0:
   1.220 +        if ( (rm == 5) || unlikely(insn_len != 3) )
   1.221 +        {
   1.222 +            DPRINTK("Unhandleable disp32 EA, or bad insn_len (%d, %d).",
   1.223 +                    rm, insn_len);
   1.224 +            return;
   1.225 +        }
   1.226 +        break;            /* m32 == (r32) */
   1.227 +    case 1:
   1.228 +        if ( unlikely(insn_len != 4) )
   1.229 +        {
   1.230 +            DPRINTK("Bad insn_len (%d).", insn_len);
   1.231 +            return;
   1.232 +        }
   1.233 +        break;            /* m32 == disp8(r32) */
   1.234 +    default:
   1.235 +        DPRINTK("Unhandleable Mod value %d.", mod);
   1.236 +        return;
   1.237 +    }
   1.238 +
   1.239 +    for ( ; ; )
   1.240 +    {
   1.241 +        /* Bail if can't decode the following instruction. */
   1.242 +        if ( unlikely((new_insn_len =
   1.243 +                       get_insn_len(&b[insn_len], &opcode)) == 0) )
   1.244 +        {
   1.245 +            DPRINTK("Could not decode following instruction.");
   1.246 +            return;
   1.247 +        }
   1.248 +
   1.249 +        /* We track one 8-bit relative offset for patching later. */
   1.250 +        if ( ((opcode >= 0x70) && (opcode <= 0x7f)) || (opcode == 0xeb) )
   1.251 +        {
   1.252 +            if ( relbyte_idx != -1 )
   1.253 +            {
   1.254 +                printk(KERN_ALERT "Multiple relative offsets in patch seq!");
   1.255 +                return;
   1.256 +            }
   1.257 +            relbyte_idx = insn_len;
   1.258 +            while ( b[relbyte_idx] != opcode )
   1.259 +                relbyte_idx++;
   1.260 +            relbyte_idx++;
   1.261 +        }
   1.262 +
   1.263 +        if ( (insn_len += new_insn_len) > 20 )
   1.264 +        {
   1.265 +            DPRINTK("Code to patch is too long!");
   1.266 +            return;
   1.267 +        }
   1.268 +
   1.269 +        /* The instructions together must be no smaller than 'jmp <disp32>'. */
   1.270 +        if ( insn_len >= 5 )
   1.271 +            break;
   1.272 +
   1.273 +        /* Can't have a RET in the middle of a patch sequence. */
   1.274 +        if ( (opcode == 0xc4) || (relbyte_idx != -1) )
   1.275 +        {
   1.276 +            printk(KERN_ALERT "RET or rel. off. in middle of patch seq!\n");
   1.277 +            return;
   1.278 +        }
   1.279 +    }
   1.280 +
   1.281 +    fixup_buf = (unsigned char *)fix_to_virt(FIX_4GB_SEGMENT_FIXUP_RW);
   1.282 +    fixup_buf_user = fix_to_virt(FIX_4GB_SEGMENT_FIXUP_RO);
   1.283 +
   1.284 +    /* Already created a fixup for this address and code sequence? */
   1.285 +    for ( fe = fixup_hash[FIXUP_HASH(eip)];
   1.286 +          fe != NULL; fe = fe->next )
   1.287 +    {
   1.288 +        if ( (fe->patch_addr == eip) &&
   1.289 +             (fe->patched_code_len == insn_len) &&
   1.290 +             (memcmp(fe->patched_code, b, insn_len) == 0) )
   1.291 +        {
   1.292 +#if 0
   1.293 +            if ( fe->fixup_idx == 10000 )
   1.294 +                return;
   1.295 +#endif
   1.296 +            goto do_the_patch;
   1.297 +        }
   1.298 +    }
   1.299 +
   1.300 +    /* No existing patch -- create an entry for one. */
   1.301 +    fe = kmalloc(sizeof(struct fixup_entry), GFP_KERNEL);
   1.302 +    if ( unlikely(fe == NULL) )
   1.303 +    {
   1.304 +        DPRINTK("Not enough memory to allocate a fixup_entry.");
   1.305 +        return;
   1.306 +    }
   1.307 +    fe->patch_addr = eip;
   1.308 +    fe->patched_code_len = insn_len;
   1.309 +    memcpy(fe->patched_code, b, insn_len);
   1.310 +    fe->fixup_idx = fixup_idx;
   1.311 +    fe->next = fixup_hash[FIXUP_HASH(eip)];
   1.312 +    fixup_hash[FIXUP_HASH(eip)] = fe;
   1.313 +    
   1.314 +#if 0
   1.315 +    if ( (eip & 0x3f) == 0x38 )
   1.316 +    {
   1.317 +        int i;
   1.318 +        static int ii = 0;
   1.319 +        printk(KERN_ALERT " !!!!!!! %d'th reject\n"KERN_ALERT" .byte ", ++ii);
   1.320 +        for ( i = 0; i < insn_len; i++ )
   1.321 +            printk("0x%02x,", b[i]);
   1.322 +        printk("\n");
   1.323 +        fe->fixup_idx = 10000;
   1.324 +        return;
   1.325 +    }
   1.326 +#endif
   1.327 +
   1.328 +    /* push <r32> */
   1.329 +    if ( reg != rm )
   1.330 +        fixup_buf[fixup_idx++] = 0x50 + rm;
   1.331 +
   1.332 +    /* add %gs:0,<r32> */
   1.333 +    fixup_buf[fixup_idx++] = 0x65;
   1.334 +    fixup_buf[fixup_idx++] = 0x03;
   1.335 +    fixup_buf[fixup_idx++] = 0x05 | (rm << 3);
   1.336 +    *(unsigned long *)&fixup_buf[fixup_idx] = 0;
   1.337 +    fixup_idx += 4;
   1.338 +
   1.339 +    /* First relocated instruction, minus the GS override. */
   1.340 +    memcpy(&fixup_buf[fixup_idx], &b[1], error_code - 1);
   1.341 +    fixup_idx += error_code - 1;
   1.342 +
   1.343 +    /* pop <r32> */
   1.344 +    if ( reg != rm )
   1.345 +        fixup_buf[fixup_idx++] = 0x58 + rm;
   1.346 +
   1.347 +    /* Relocated instructions, minus the initial GS override. */
   1.348 +    memcpy(&fixup_buf[fixup_idx], &b[error_code], insn_len - error_code);
   1.349 +    fixup_idx += insn_len - error_code;
   1.350 +
   1.351 +    /* jmp <rel32> */
   1.352 +    fixup_buf[fixup_idx++] = 0xe9;
   1.353 +    fixup_idx += 4;
   1.354 +    *(unsigned long *)&fixup_buf[fixup_idx-4] = 
   1.355 +        (eip + insn_len) - (fixup_buf_user + fixup_idx);
   1.356 +
   1.357 +    if ( relbyte_idx != -1 )
   1.358 +    {
   1.359 +        /* Patch the 8-bit relative offset. */
   1.360 +        int idx = relbyte_idx + 6;
   1.361 +        if ( reg != rm )
   1.362 +            idx += 2;
   1.363 +        fixup_buf[idx] = fixup_idx - (idx + 1);
   1.364 +        
   1.365 +        /* jmp <rel32> */
   1.366 +        fixup_buf[fixup_idx++] = 0xe9;
   1.367 +        fixup_idx += 4;
   1.368 +        *(unsigned long *)&fixup_buf[fixup_idx-4] = 
   1.369 +            (eip + relbyte_idx + 1 + b[relbyte_idx]) - 
   1.370 +            (fixup_buf_user + fixup_idx);
   1.371 +
   1.372 +    }
   1.373 +
   1.374 + do_the_patch:
   1.375 +    /* Create the patching instruction in a temporary buffer. */
   1.376 +    patch[0] = 0xe9;
   1.377 +    *(unsigned long *)&patch[1] = 
   1.378 +        (fixup_buf_user + fe->fixup_idx) - (eip + 5);
   1.379 +
   1.380 +    pgd = pgd_offset(current->mm, eip);
   1.381 +    pmd = pmd_offset(pgd, eip);
   1.382 +    pte = pte_offset_kernel(pmd, eip);
   1.383 +    veip = kmap(pte_page(*pte));
   1.384 +    memcpy((char *)veip + (eip & ~PAGE_MASK), patch, 5);
   1.385 +    kunmap(pte_page(*pte));
   1.386 +
   1.387 +    /* Success! Return to user land to execute 2nd insn of the pair. */
   1.388 +    regs->eip = fixup_buf_user + fe->fixup_idx + error_code + 6;
   1.389 +    if ( reg != rm )
   1.390 +        regs->eip += 2; /* account for push/pop pair */
   1.391 +    return;
   1.392  }
   1.393 +
   1.394 +static int __init fixup_init(void)
   1.395 +{
   1.396 +    unsigned long page = get_zeroed_page(GFP_ATOMIC);
   1.397 +    __set_fixmap(FIX_4GB_SEGMENT_FIXUP_RO, __pa(page), PAGE_READONLY);
   1.398 +    __set_fixmap(FIX_4GB_SEGMENT_FIXUP_RW, __pa(page), PAGE_KERNEL);
   1.399 +    memset(fixup_hash, 0, sizeof(fixup_hash));
   1.400 +    return 0;
   1.401 +}
   1.402 +
   1.403 +__initcall(fixup_init);
     2.1 --- a/linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/fixmap.h	Thu Aug 05 22:25:17 2004 +0000
     2.2 +++ b/linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/fixmap.h	Thu Aug 05 22:27:27 2004 +0000
     2.3 @@ -44,6 +44,8 @@
     2.4  enum fixed_addresses {
     2.5  	FIX_HOLE,
     2.6  	FIX_VSYSCALL,
     2.7 +	FIX_4GB_SEGMENT_FIXUP_RO,
     2.8 +	FIX_4GB_SEGMENT_FIXUP_RW,
     2.9  #ifdef CONFIG_X86_LOCAL_APIC
    2.10  	FIX_APIC_BASE,	/* local (CPU) APIC) -- required for SMP or not */
    2.11  #endif
    2.12 @@ -124,8 +126,8 @@ extern void __set_fixmap_ma (enum fixed_
    2.13   * This is the range that is readable by user mode, and things
    2.14   * acting like user mode such as get_user_pages.
    2.15   */
    2.16 -#define FIXADDR_USER_START	(__fix_to_virt(FIX_VSYSCALL))
    2.17 -#define FIXADDR_USER_END	(FIXADDR_USER_START + PAGE_SIZE)
    2.18 +#define FIXADDR_USER_START	(__fix_to_virt(FIX_4GB_SEGMENT_FIXUP_RO))
    2.19 +#define FIXADDR_USER_END	(FIXADDR_USER_START + (2*PAGE_SIZE))
    2.20  
    2.21  
    2.22  extern void __this_fixmap_does_not_exist(void);