ia64/xen-unstable

changeset 2103:47f04385bb81

bitkeeper revision 1.1158 (41137071d88TgpVO07tfpWOGGvxo8A)

Dynamic binary rewriting of -ve segment accesses is now enabled by
default in Linux 2.6. Removes the approx 100-percent overhead incurred
by pure emulation on library-intensive benchmarks.
author kaf24@scramble.cl.cam.ac.uk
date Fri Aug 06 11:50:09 2004 +0000 (2004-08-06)
parents 1d27d2477ab7
children 4c3d4467e243
files linux-2.6.7-xen-sparse/arch/xen/i386/kernel/setup.c linux-2.6.7-xen-sparse/arch/xen/kernel/fixup.c
line diff
     1.1 --- a/linux-2.6.7-xen-sparse/arch/xen/i386/kernel/setup.c	Fri Aug 06 10:53:33 2004 +0000
     1.2 +++ b/linux-2.6.7-xen-sparse/arch/xen/i386/kernel/setup.c	Fri Aug 06 11:50:09 2004 +0000
     1.3 @@ -1100,8 +1100,6 @@ void __init setup_arch(char **cmdline_p)
     1.4  
     1.5  	HYPERVISOR_vm_assist(VMASST_CMD_enable,
     1.6  			     VMASST_TYPE_4gb_segments);
     1.7 -	HYPERVISOR_vm_assist(VMASST_CMD_enable,
     1.8 -			     VMASST_TYPE_4gb_segments_notify);
     1.9  #if 0
    1.10  	HYPERVISOR_vm_assist(VMASST_CMD_enable,
    1.11  			     VMASST_TYPE_writeable_pagetables);
     2.1 --- a/linux-2.6.7-xen-sparse/arch/xen/kernel/fixup.c	Fri Aug 06 10:53:33 2004 +0000
     2.2 +++ b/linux-2.6.7-xen-sparse/arch/xen/kernel/fixup.c	Fri Aug 06 11:50:09 2004 +0000
     2.3 @@ -110,7 +110,7 @@ static unsigned char insn_decode[256] = 
     2.4      X, X, X, X, X, X, X, X,
     2.5      /* 0xE0 - 0xEF */
     2.6      X, X, X, X, X, X, X, X,
     2.7 -    X, X, X, O|1, X, X, X, X,
     2.8 +    X, O|4, X, O|1, X, X, X, X,
     2.9      /* 0xF0 - 0xFF */
    2.10      P, X, P, P, O, O, O|M|1, O|M|4, 
    2.11      O, O, O, O, O, O, O|M, O|M
    2.12 @@ -133,10 +133,7 @@ static unsigned int get_insn_len(unsigne
    2.13  
    2.14      /* 2. Ensure we have a valid opcode byte. */
    2.15      if ( !(d & OPCODE_BYTE) )
    2.16 -    {
    2.17 -        printk(KERN_ALERT " !!! 0x%02x 0x%02x\n", b, *(pb+1));
    2.18          return 0;
    2.19 -    }
    2.20  
    2.21      /* 3. Process Mod/RM if there is one. */
    2.22      if ( d & HAS_MODRM )
    2.23 @@ -166,21 +163,32 @@ static unsigned int get_insn_len(unsigne
    2.24      return ((pb - insn) + 1 + (d & INSN_SUFFIX_BYTES));
    2.25  }
    2.26  
    2.27 +static unsigned char handleable_code[32] = {
    2.28 +    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    2.29 +    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    2.30 +    /* 0x80-0x83, 0x89, 0x8B */
    2.31 +    0x0F, 0x0A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    2.32 +    /* 0xC7 */
    2.33 +    0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    2.34 +};
    2.35 +
    2.36  asmlinkage void do_fixup_4gb_segment(struct pt_regs *regs, long error_code)
    2.37  {
    2.38      static unsigned int fixup_idx = 0;
    2.39 -    int relbyte_idx = -1;
    2.40 +    int relbyte_idx = -1, relword_idx = -1, save_indirect_reg;
    2.41      unsigned int insn_len = (unsigned int)error_code, new_insn_len;
    2.42 -    unsigned char b[20], modrm, mod, reg, rm, *fixup_buf, patch[5], opcode;
    2.43 -    unsigned long fixup_buf_user, eip = regs->eip - insn_len;
    2.44 +    unsigned char b[20], modrm, mod, reg, rm, patch[5], opcode;
    2.45 +    unsigned char *fixup_buf = 
    2.46 +        (unsigned char *)fix_to_virt(FIX_4GB_SEGMENT_FIXUP_RW);
    2.47 +    unsigned long fixup_buf_user = 
    2.48 +        fix_to_virt(FIX_4GB_SEGMENT_FIXUP_RO);
    2.49 +    unsigned long eip = regs->eip - insn_len;
    2.50      struct fixup_entry *fe;
    2.51      pte_t *pte;
    2.52      pmd_t *pmd;
    2.53      pgd_t *pgd;
    2.54      void *veip;
    2.55  
    2.56 -    return; /* XXX */
    2.57 -
    2.58      /* Easy check that code segment has base 0, max limit. */
    2.59      if ( unlikely(regs->xcs != __USER_CS) )
    2.60      {
    2.61 @@ -190,7 +198,17 @@ asmlinkage void do_fixup_4gb_segment(str
    2.62  
    2.63      if ( unlikely(eip >= (PAGE_OFFSET-32)) )
    2.64      {
    2.65 -        DPRINTK("User executing out of kernel space?!");
    2.66 +        if ( (eip < fixup_buf_user) || (eip >= (fixup_buf_user+PAGE_SIZE-32)) )
    2.67 +        {
    2.68 +            DPRINTK("User executing out of kernel space?!");
    2.69 +            return;
    2.70 +        }
    2.71 +        /* We know it's safe to directly copy teh bytes into our buffer. */
    2.72 +        memcpy(b, (void *)eip, sizeof(b));
    2.73 +    }
    2.74 +    else if ( unlikely(copy_from_user(b, (void *)eip, sizeof(b)) != 0) )
    2.75 +    {
    2.76 +        DPRINTK("Could not read instruction bytes from user space.");
    2.77          return;
    2.78      }
    2.79  
    2.80 @@ -203,18 +221,16 @@ asmlinkage void do_fixup_4gb_segment(str
    2.81      /* Guaranteed enough room to patch? */
    2.82      if ( unlikely(fixup_idx > (PAGE_SIZE-32)) )
    2.83      {
    2.84 -        DPRINTK("Out of room in fixup page.");
    2.85 +        static int printed = 0;
    2.86 +        if ( !printed )
    2.87 +            printk(KERN_ALERT "WARNING: Out of room in segment-fixup page.\n");
    2.88 +        printed = 1;
    2.89          return;
    2.90      }
    2.91  
    2.92 -    if ( unlikely(copy_from_user(b, (void *)eip, sizeof(b)) != 0) )
    2.93 -    {
    2.94 -        DPRINTK("Could not read instruction bytes from user space.");
    2.95 -        return;
    2.96 -    }
    2.97 -
    2.98 -    /* Must be 'mov %gs:m32,r32' or 'mov r32,%gs:m32'. */
    2.99 -    if ( (b[0] != 0x65) || ((b[1] != 0x89) && (b[1] != 0x8b)) )
   2.100 +    /* Must be a handleable opcode with GS override. */
   2.101 +    if ( (b[0] != 0x65) || 
   2.102 +         !test_bit((unsigned int)b[1], (unsigned long *)handleable_code) )
   2.103      {
   2.104          DPRINTK("No GS override, or not a MOV (%02x %02x).", b[0], b[1]);
   2.105          return;
   2.106 @@ -225,6 +241,9 @@ asmlinkage void do_fixup_4gb_segment(str
   2.107      reg   = (modrm >> 3) & 7;
   2.108      rm    = (modrm >> 0) & 7;
   2.109  
   2.110 +    /* If indirect register isn't clobbered then we must push/pop it. */
   2.111 +    save_indirect_reg = !((b[1] == 0x8b) && (reg == rm));
   2.112 +
   2.113      /* We don't grok SIB bytes. */
   2.114      if ( rm == 4 )
   2.115      {
   2.116 @@ -236,26 +255,20 @@ asmlinkage void do_fixup_4gb_segment(str
   2.117      switch ( mod )
   2.118      {
   2.119      case 0:
   2.120 -        if ( (rm == 5) || unlikely(insn_len != 3) )
   2.121 +        if ( rm == 5 )
   2.122          {
   2.123 -            DPRINTK("Unhandleable disp32 EA, or bad insn_len (%d, %d).",
   2.124 -                    rm, insn_len);
   2.125 +            DPRINTK("Unhandleable disp32 EA %d.", rm);
   2.126              return;
   2.127          }
   2.128          break;            /* m32 == (r32) */
   2.129      case 1:
   2.130 -        if ( unlikely(insn_len != 4) )
   2.131 -        {
   2.132 -            DPRINTK("Bad insn_len (%d).", insn_len);
   2.133 -            return;
   2.134 -        }
   2.135          break;            /* m32 == disp8(r32) */
   2.136      default:
   2.137          DPRINTK("Unhandleable Mod value %d.", mod);
   2.138          return;
   2.139      }
   2.140  
   2.141 -    for ( ; ; )
   2.142 +    while ( insn_len < 5 )
   2.143      {
   2.144          /* Bail if can't decode the following instruction. */
   2.145          if ( unlikely((new_insn_len =
   2.146 @@ -270,7 +283,7 @@ asmlinkage void do_fixup_4gb_segment(str
   2.147          {
   2.148              if ( relbyte_idx != -1 )
   2.149              {
   2.150 -                printk(KERN_ALERT "Multiple relative offsets in patch seq!");
   2.151 +                DPRINTK("Multiple relative offsets in patch seq!");
   2.152                  return;
   2.153              }
   2.154              relbyte_idx = insn_len;
   2.155 @@ -278,6 +291,18 @@ asmlinkage void do_fixup_4gb_segment(str
   2.156                  relbyte_idx++;
   2.157              relbyte_idx++;
   2.158          }
   2.159 +        else if ( opcode == 0xe9 )
   2.160 +        {
   2.161 +            if ( relword_idx != -1 )
   2.162 +            {
   2.163 +                DPRINTK("Multiple relative offsets in patch seq!");
   2.164 +                return;
   2.165 +            }
   2.166 +            relword_idx = insn_len;
   2.167 +            while ( b[relword_idx] != opcode )
   2.168 +                relword_idx++;
   2.169 +            relword_idx++;
   2.170 +        }
   2.171  
   2.172          if ( (insn_len += new_insn_len) > 20 )
   2.173          {
   2.174 @@ -290,16 +315,13 @@ asmlinkage void do_fixup_4gb_segment(str
   2.175              break;
   2.176  
   2.177          /* Can't have a RET in the middle of a patch sequence. */
   2.178 -        if ( (opcode == 0xc4) || (relbyte_idx != -1) )
   2.179 +        if ( opcode == 0xc4 )
   2.180          {
   2.181 -            printk(KERN_ALERT "RET or rel. off. in middle of patch seq!\n");
   2.182 +            DPRINTK("RET in middle of patch seq!\n");
   2.183              return;
   2.184          }
   2.185      }
   2.186  
   2.187 -    fixup_buf = (unsigned char *)fix_to_virt(FIX_4GB_SEGMENT_FIXUP_RW);
   2.188 -    fixup_buf_user = fix_to_virt(FIX_4GB_SEGMENT_FIXUP_RO);
   2.189 -
   2.190      /* Already created a fixup for this address and code sequence? */
   2.191      for ( fe = fixup_hash[FIXUP_HASH(eip)];
   2.192            fe != NULL; fe = fe->next )
   2.193 @@ -307,13 +329,7 @@ asmlinkage void do_fixup_4gb_segment(str
   2.194          if ( (fe->patch_addr == eip) &&
   2.195               (fe->patched_code_len == insn_len) &&
   2.196               (memcmp(fe->patched_code, b, insn_len) == 0) )
   2.197 -        {
   2.198 -#if 0
   2.199 -            if ( fe->fixup_idx == 10000 )
   2.200 -                return;
   2.201 -#endif
   2.202              goto do_the_patch;
   2.203 -        }
   2.204      }
   2.205  
   2.206      /* No existing patch -- create an entry for one. */
   2.207 @@ -330,22 +346,8 @@ asmlinkage void do_fixup_4gb_segment(str
   2.208      fe->next = fixup_hash[FIXUP_HASH(eip)];
   2.209      fixup_hash[FIXUP_HASH(eip)] = fe;
   2.210      
   2.211 -#if 0
   2.212 -    if ( (eip & 0x3f) == 0x38 )
   2.213 -    {
   2.214 -        int i;
   2.215 -        static int ii = 0;
   2.216 -        printk(KERN_ALERT " !!!!!!! %d'th reject\n"KERN_ALERT" .byte ", ++ii);
   2.217 -        for ( i = 0; i < insn_len; i++ )
   2.218 -            printk("0x%02x,", b[i]);
   2.219 -        printk("\n");
   2.220 -        fe->fixup_idx = 10000;
   2.221 -        return;
   2.222 -    }
   2.223 -#endif
   2.224 -
   2.225      /* push <r32> */
   2.226 -    if ( reg != rm )
   2.227 +    if ( save_indirect_reg )
   2.228          fixup_buf[fixup_idx++] = 0x50 + rm;
   2.229  
   2.230      /* add %gs:0,<r32> */
   2.231 @@ -360,12 +362,15 @@ asmlinkage void do_fixup_4gb_segment(str
   2.232      fixup_idx += error_code - 1;
   2.233  
   2.234      /* pop <r32> */
   2.235 -    if ( reg != rm )
   2.236 +    if ( save_indirect_reg )
   2.237          fixup_buf[fixup_idx++] = 0x58 + rm;
   2.238  
   2.239 -    /* Relocated instructions, minus the initial GS override. */
   2.240 -    memcpy(&fixup_buf[fixup_idx], &b[error_code], insn_len - error_code);
   2.241 -    fixup_idx += insn_len - error_code;
   2.242 +    if ( insn_len != error_code )
   2.243 +    {
   2.244 +        /* Relocated instructions. */
   2.245 +        memcpy(&fixup_buf[fixup_idx], &b[error_code], insn_len - error_code);
   2.246 +        fixup_idx += insn_len - error_code;
   2.247 +    }
   2.248  
   2.249      /* jmp <rel32> */
   2.250      fixup_buf[fixup_idx++] = 0xe9;
   2.251 @@ -376,8 +381,8 @@ asmlinkage void do_fixup_4gb_segment(str
   2.252      if ( relbyte_idx != -1 )
   2.253      {
   2.254          /* Patch the 8-bit relative offset. */
   2.255 -        int idx = relbyte_idx + 6;
   2.256 -        if ( reg != rm )
   2.257 +        int idx = fe->fixup_idx + relbyte_idx + 6;
   2.258 +        if ( save_indirect_reg )
   2.259              idx += 2;
   2.260          fixup_buf[idx] = fixup_idx - (idx + 1);
   2.261          
   2.262 @@ -385,9 +390,17 @@ asmlinkage void do_fixup_4gb_segment(str
   2.263          fixup_buf[fixup_idx++] = 0xe9;
   2.264          fixup_idx += 4;
   2.265          *(unsigned long *)&fixup_buf[fixup_idx-4] = 
   2.266 -            (eip + relbyte_idx + 1 + b[relbyte_idx]) - 
   2.267 +            (eip + relbyte_idx + 1 + (long)(char)b[relbyte_idx]) - 
   2.268              (fixup_buf_user + fixup_idx);
   2.269 -
   2.270 +    }
   2.271 +    else if ( relword_idx != -1 )
   2.272 +    {
   2.273 +        /* Patch the 32-bit relative offset by subtracting the code disp. */
   2.274 +        int idx = fe->fixup_idx + relword_idx + 6;
   2.275 +        if ( save_indirect_reg )
   2.276 +            idx += 2;
   2.277 +        *(unsigned long *)&fixup_buf[idx] +=
   2.278 +            (eip + relword_idx) - (fixup_buf_user + idx);
   2.279      }
   2.280  
   2.281   do_the_patch:
   2.282 @@ -405,18 +418,36 @@ asmlinkage void do_fixup_4gb_segment(str
   2.283  
   2.284      /* Success! Return to user land to execute 2nd insn of the pair. */
   2.285      regs->eip = fixup_buf_user + fe->fixup_idx + error_code + 6;
   2.286 -    if ( reg != rm )
   2.287 -        regs->eip += 2; /* account for push/pop pair */
   2.288 +    if ( save_indirect_reg )
   2.289 +        regs->eip += 2;
   2.290      return;
   2.291  }
   2.292  
   2.293 +static int nosegfixup = 0;
   2.294 +
   2.295  static int __init fixup_init(void)
   2.296  {
   2.297 -    unsigned long page = get_zeroed_page(GFP_ATOMIC);
   2.298 +    unsigned long page;
   2.299 +
   2.300 +    if ( nosegfixup )
   2.301 +        return 0;
   2.302 +
   2.303 +    HYPERVISOR_vm_assist(VMASST_CMD_enable,
   2.304 +                         VMASST_TYPE_4gb_segments_notify);
   2.305 +
   2.306 +    page = get_zeroed_page(GFP_ATOMIC);
   2.307      __set_fixmap(FIX_4GB_SEGMENT_FIXUP_RO, __pa(page), PAGE_READONLY);
   2.308      __set_fixmap(FIX_4GB_SEGMENT_FIXUP_RW, __pa(page), PAGE_KERNEL);
   2.309 +
   2.310      memset(fixup_hash, 0, sizeof(fixup_hash));
   2.311 +
   2.312      return 0;
   2.313  }
   2.314 +__initcall(fixup_init);
   2.315  
   2.316 -__initcall(fixup_init);
   2.317 +static int __init fixup_setup(char *str)
   2.318 +{
   2.319 +    nosegfixup = 1;
   2.320 +    return 0;
   2.321 +}
   2.322 +__setup("nosegfixup", fixup_setup);