direct-io.hg
changeset 2093:e8ef06e458e1
bitkeeper revision 1.1145.1.2 (4112b44fQiWPPD5sUdsW9Yhi90ujCg)
Binary-rewrite patches.
Binary-rewrite patches.
author | kaf24@scramble.cl.cam.ac.uk |
---|---|
date | Thu Aug 05 22:27:27 2004 +0000 (2004-08-05) |
parents | 91b951040040 |
children | 8e0d9e45c5f7 |
files | linux-2.6.7-xen-sparse/arch/xen/kernel/fixup.c linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/fixmap.h |
line diff
1.1 --- a/linux-2.6.7-xen-sparse/arch/xen/kernel/fixup.c Thu Aug 05 22:25:17 2004 +0000 1.2 +++ b/linux-2.6.7-xen-sparse/arch/xen/kernel/fixup.c Thu Aug 05 22:27:27 2004 +0000 1.3 @@ -23,9 +23,400 @@ 1.4 */ 1.5 1.6 #include <linux/config.h> 1.7 +#include <linux/init.h> 1.8 #include <linux/sched.h> 1.9 #include <linux/kernel.h> 1.10 +#include <linux/highmem.h> 1.11 +#include <asm/fixmap.h> 1.12 +#include <asm/pgtable.h> 1.13 +#include <asm/uaccess.h> 1.14 + 1.15 +#if 0 1.16 +#define ASSERT(_p) \ 1.17 + if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \ 1.18 + __LINE__, __FILE__); *(int*)0=0; } 1.19 +#define DPRINTK(_f, _a...) printk(KERN_ALERT \ 1.20 + "(file=%s, line=%d, eip=%08lx) " _f "\n", \ 1.21 + __FILE__ , __LINE__ , eip, ## _a ) 1.22 +#else 1.23 +#define ASSERT(_p) ((void)0) 1.24 +#define DPRINTK(_f, _a...) ((void)0) 1.25 +#endif 1.26 + 1.27 +struct fixup_entry { 1.28 + unsigned long patch_addr; 1.29 + unsigned char patched_code[20]; 1.30 + unsigned short patched_code_len; 1.31 + unsigned short fixup_idx; 1.32 + struct fixup_entry *next; 1.33 +}; 1.34 + 1.35 +#define FIXUP_HASHSZ 128 1.36 +static struct fixup_entry *fixup_hash[FIXUP_HASHSZ]; 1.37 +#define FIXUP_HASH(_a) ((unsigned int)(_a) & (FIXUP_HASHSZ-1)) 1.38 + 1.39 +#define INSN_SUFFIX_BYTES (7) 1.40 +#define PREFIX_BYTE (1<<3) 1.41 +#define OPCODE_BYTE (1<<4) 1.42 +#define HAS_MODRM (1<<5) 1.43 + 1.44 +#define X 0 /* invalid */ 1.45 +#define P PREFIX_BYTE 1.46 +#define O OPCODE_BYTE 1.47 +#define M HAS_MODRM 1.48 + 1.49 +static unsigned char insn_decode[256] = { 1.50 + /* 0x00 - 0x0F */ 1.51 + O|M, O|M, O|M, O|M, O|1, O|4, O, O, 1.52 + O|M, O|M, O|M, O|M, O|1, O|4, O, X, 1.53 + /* 0x10 - 0x1F */ 1.54 + O|M, O|M, O|M, O|M, O|1, O|4, O, O, 1.55 + O|M, O|M, O|M, O|M, O|1, O|4, O, O, 1.56 + /* 0x20 - 0x2F */ 1.57 + O|M, O|M, O|M, O|M, O|1, O|4, P, O, 1.58 + O|M, O|M, O|M, O|M, O|1, O|4, P, O, 1.59 + /* 0x30 - 0x3F */ 1.60 + O|M, O|M, O|M, O|M, O|1, O|4, P, O, 1.61 + O|M, O|M, O|M, O|M, O|1, O|4, P, O, 1.62 + /* 0x40 - 0x4F */ 1.63 + O, O, O, O, O, O, O, O, 1.64 + O, O, O, O, O, O, O, O, 1.65 + /* 0x50 - 0x5F */ 1.66 + O, O, O, O, O, O, O, O, 1.67 + O, O, O, O, O, O, O, O, 1.68 + /* 0x60 - 0x6F */ 1.69 + O, O, O|M, O|M, P, P, X, X, 1.70 + O|4, O|M|4, O|1, O|M|1, O, O, O, O, 1.71 + /* 0x70 - 0x7F */ 1.72 + O|1, O|1, O|1, O|1, O|1, O|1, O|1, O|1, 1.73 + O|1, O|1, O|1, O|1, O|1, O|1, O|1, O|1, 1.74 + /* 0x80 - 0x8F */ 1.75 + O|M|1, O|M|4, O|M|1, O|M|1, O|M, O|M, O|M, O|M, 1.76 + O|M, O|M, O|M, O|M, O|M, O|M, O|M, O|M, 1.77 + /* 0x90 - 0x9F */ 1.78 + O, O, O, O, O, O, O, O, 1.79 + O, O, X, O, O, O, O, O, 1.80 + /* 0xA0 - 0xAF */ 1.81 + O|1, O|4, O|1, O|4, O, O, O, O, 1.82 + O|1, O|4, O, O, O, O, O, O, 1.83 + /* 0xB0 - 0xBF */ 1.84 + O|1, O|1, O|1, O|1, O|1, O|1, O|1, O|1, 1.85 + O|4, O|4, O|4, O|4, O|4, O|4, O|4, O|4, 1.86 + /* 0xC0 - 0xCF */ 1.87 + O|M|1, O|M|1, X, O, X, X, O|M|1, O|M|4, 1.88 + X, X, X, X, X, X, X, X, 1.89 + /* 0xD0 - 0xDF */ 1.90 + O|M, O|M, O|M, O|M, O|1, O|1, X, X, 1.91 + X, X, X, X, X, X, X, X, 1.92 + /* 0xE0 - 0xEF */ 1.93 + X, X, X, X, X, X, X, X, 1.94 + X, X, X, O|1, X, X, X, X, 1.95 + /* 0xF0 - 0xFF */ 1.96 + P, X, P, P, O, O, O|M|1, O|M|4, 1.97 + O, O, O, O, O, O, O|M, O|M 1.98 +}; 1.99 + 1.100 +static unsigned int get_insn_len(unsigned char *insn, unsigned char *p_opcode) 1.101 +{ 1.102 + unsigned char b, d, *pb, mod, rm; 1.103 + 1.104 + /* 1. Step over the prefix bytes. */ 1.105 + for ( pb = insn; (pb - insn) < 4; pb++ ) 1.106 + { 1.107 + b = *pb; 1.108 + d = insn_decode[b]; 1.109 + if ( !(d & PREFIX_BYTE) ) 1.110 + break; 1.111 + } 1.112 + 1.113 + *p_opcode = b; 1.114 + 1.115 + /* 2. Ensure we have a valid opcode byte. */ 1.116 + if ( !(d & OPCODE_BYTE) ) 1.117 + { 1.118 + printk(KERN_ALERT " !!! 0x%02x 0x%02x\n", b, *(pb+1)); 1.119 + return 0; 1.120 + } 1.121 + 1.122 + /* 3. Process Mod/RM if there is one. */ 1.123 + if ( d & HAS_MODRM ) 1.124 + { 1.125 + b = *(++pb); 1.126 + if ( (mod = (b >> 6) & 3) != 3 ) 1.127 + { 1.128 + if ( (rm = (b >> 0) & 7) == 4 ) 1.129 + pb += 1; /* SIB byte */ 1.130 + switch ( mod ) 1.131 + { 1.132 + case 0: 1.133 + if ( rm == 5 ) 1.134 + pb += 4; /* disp32 */ 1.135 + break; 1.136 + case 1: 1.137 + pb += 1; /* disp8 */ 1.138 + break; 1.139 + case 2: 1.140 + pb += 4; /* disp32 */ 1.141 + break; 1.142 + } 1.143 + } 1.144 + } 1.145 + 1.146 + /* 4. All done. Result is all byte sstepped over, plus any immediates. */ 1.147 + return ((pb - insn) + 1 + (d & INSN_SUFFIX_BYTES)); 1.148 +} 1.149 1.150 asmlinkage void do_fixup_4gb_segment(struct pt_regs *regs, long error_code) 1.151 { 1.152 + static unsigned int fixup_idx = 0; 1.153 + int relbyte_idx = -1; 1.154 + unsigned int insn_len = (unsigned int)error_code, new_insn_len; 1.155 + unsigned char b[20], modrm, mod, reg, rm, *fixup_buf, patch[5], opcode; 1.156 + unsigned long fixup_buf_user, eip = regs->eip - insn_len; 1.157 + struct fixup_entry *fe; 1.158 + pte_t *pte; 1.159 + pmd_t *pmd; 1.160 + pgd_t *pgd; 1.161 + void *veip; 1.162 + 1.163 + return; /* XXX */ 1.164 + 1.165 + /* Easy check that code segment has base 0, max limit. */ 1.166 + if ( unlikely(regs->xcs != __USER_CS) ) 1.167 + { 1.168 + DPRINTK("Unexpected CS value."); 1.169 + return; 1.170 + } 1.171 + 1.172 + if ( unlikely(eip >= (PAGE_OFFSET-32)) ) 1.173 + { 1.174 + DPRINTK("User executing out of kernel space?!"); 1.175 + return; 1.176 + } 1.177 + 1.178 + if ( unlikely(((eip ^ (eip+5)) & PAGE_MASK) != 0) ) 1.179 + { 1.180 + DPRINTK("Patch instruction would straddle a page boundary."); 1.181 + return; 1.182 + } 1.183 + 1.184 + /* Guaranteed enough room to patch? */ 1.185 + if ( unlikely(fixup_idx > (PAGE_SIZE-32)) ) 1.186 + { 1.187 + DPRINTK("Out of room in fixup page."); 1.188 + return; 1.189 + } 1.190 + 1.191 + if ( unlikely(copy_from_user(b, (void *)eip, sizeof(b)) != 0) ) 1.192 + { 1.193 + DPRINTK("Could not read instruction bytes from user space."); 1.194 + return; 1.195 + } 1.196 + 1.197 + /* Must be 'mov %gs:m32,r32' or 'mov r32,%gs:m32'. */ 1.198 + if ( (b[0] != 0x65) || ((b[1] != 0x89) && (b[1] != 0x8b)) ) 1.199 + { 1.200 + DPRINTK("No GS override, or not a MOV (%02x %02x).", b[0], b[1]); 1.201 + return; 1.202 + } 1.203 + 1.204 + modrm = b[2]; 1.205 + mod = (modrm >> 6) & 3; 1.206 + reg = (modrm >> 3) & 7; 1.207 + rm = (modrm >> 0) & 7; 1.208 + 1.209 + /* We don't grok SIB bytes. */ 1.210 + if ( rm == 4 ) 1.211 + { 1.212 + DPRINTK("We don't grok SIB bytes."); 1.213 + return; 1.214 + } 1.215 + 1.216 + /* Ensure Mod/RM specifies (r32) or disp8(r32). */ 1.217 + switch ( mod ) 1.218 + { 1.219 + case 0: 1.220 + if ( (rm == 5) || unlikely(insn_len != 3) ) 1.221 + { 1.222 + DPRINTK("Unhandleable disp32 EA, or bad insn_len (%d, %d).", 1.223 + rm, insn_len); 1.224 + return; 1.225 + } 1.226 + break; /* m32 == (r32) */ 1.227 + case 1: 1.228 + if ( unlikely(insn_len != 4) ) 1.229 + { 1.230 + DPRINTK("Bad insn_len (%d).", insn_len); 1.231 + return; 1.232 + } 1.233 + break; /* m32 == disp8(r32) */ 1.234 + default: 1.235 + DPRINTK("Unhandleable Mod value %d.", mod); 1.236 + return; 1.237 + } 1.238 + 1.239 + for ( ; ; ) 1.240 + { 1.241 + /* Bail if can't decode the following instruction. */ 1.242 + if ( unlikely((new_insn_len = 1.243 + get_insn_len(&b[insn_len], &opcode)) == 0) ) 1.244 + { 1.245 + DPRINTK("Could not decode following instruction."); 1.246 + return; 1.247 + } 1.248 + 1.249 + /* We track one 8-bit relative offset for patching later. */ 1.250 + if ( ((opcode >= 0x70) && (opcode <= 0x7f)) || (opcode == 0xeb) ) 1.251 + { 1.252 + if ( relbyte_idx != -1 ) 1.253 + { 1.254 + printk(KERN_ALERT "Multiple relative offsets in patch seq!"); 1.255 + return; 1.256 + } 1.257 + relbyte_idx = insn_len; 1.258 + while ( b[relbyte_idx] != opcode ) 1.259 + relbyte_idx++; 1.260 + relbyte_idx++; 1.261 + } 1.262 + 1.263 + if ( (insn_len += new_insn_len) > 20 ) 1.264 + { 1.265 + DPRINTK("Code to patch is too long!"); 1.266 + return; 1.267 + } 1.268 + 1.269 + /* The instructions together must be no smaller than 'jmp <disp32>'. */ 1.270 + if ( insn_len >= 5 ) 1.271 + break; 1.272 + 1.273 + /* Can't have a RET in the middle of a patch sequence. */ 1.274 + if ( (opcode == 0xc4) || (relbyte_idx != -1) ) 1.275 + { 1.276 + printk(KERN_ALERT "RET or rel. off. in middle of patch seq!\n"); 1.277 + return; 1.278 + } 1.279 + } 1.280 + 1.281 + fixup_buf = (unsigned char *)fix_to_virt(FIX_4GB_SEGMENT_FIXUP_RW); 1.282 + fixup_buf_user = fix_to_virt(FIX_4GB_SEGMENT_FIXUP_RO); 1.283 + 1.284 + /* Already created a fixup for this address and code sequence? */ 1.285 + for ( fe = fixup_hash[FIXUP_HASH(eip)]; 1.286 + fe != NULL; fe = fe->next ) 1.287 + { 1.288 + if ( (fe->patch_addr == eip) && 1.289 + (fe->patched_code_len == insn_len) && 1.290 + (memcmp(fe->patched_code, b, insn_len) == 0) ) 1.291 + { 1.292 +#if 0 1.293 + if ( fe->fixup_idx == 10000 ) 1.294 + return; 1.295 +#endif 1.296 + goto do_the_patch; 1.297 + } 1.298 + } 1.299 + 1.300 + /* No existing patch -- create an entry for one. */ 1.301 + fe = kmalloc(sizeof(struct fixup_entry), GFP_KERNEL); 1.302 + if ( unlikely(fe == NULL) ) 1.303 + { 1.304 + DPRINTK("Not enough memory to allocate a fixup_entry."); 1.305 + return; 1.306 + } 1.307 + fe->patch_addr = eip; 1.308 + fe->patched_code_len = insn_len; 1.309 + memcpy(fe->patched_code, b, insn_len); 1.310 + fe->fixup_idx = fixup_idx; 1.311 + fe->next = fixup_hash[FIXUP_HASH(eip)]; 1.312 + fixup_hash[FIXUP_HASH(eip)] = fe; 1.313 + 1.314 +#if 0 1.315 + if ( (eip & 0x3f) == 0x38 ) 1.316 + { 1.317 + int i; 1.318 + static int ii = 0; 1.319 + printk(KERN_ALERT " !!!!!!! %d'th reject\n"KERN_ALERT" .byte ", ++ii); 1.320 + for ( i = 0; i < insn_len; i++ ) 1.321 + printk("0x%02x,", b[i]); 1.322 + printk("\n"); 1.323 + fe->fixup_idx = 10000; 1.324 + return; 1.325 + } 1.326 +#endif 1.327 + 1.328 + /* push <r32> */ 1.329 + if ( reg != rm ) 1.330 + fixup_buf[fixup_idx++] = 0x50 + rm; 1.331 + 1.332 + /* add %gs:0,<r32> */ 1.333 + fixup_buf[fixup_idx++] = 0x65; 1.334 + fixup_buf[fixup_idx++] = 0x03; 1.335 + fixup_buf[fixup_idx++] = 0x05 | (rm << 3); 1.336 + *(unsigned long *)&fixup_buf[fixup_idx] = 0; 1.337 + fixup_idx += 4; 1.338 + 1.339 + /* First relocated instruction, minus the GS override. */ 1.340 + memcpy(&fixup_buf[fixup_idx], &b[1], error_code - 1); 1.341 + fixup_idx += error_code - 1; 1.342 + 1.343 + /* pop <r32> */ 1.344 + if ( reg != rm ) 1.345 + fixup_buf[fixup_idx++] = 0x58 + rm; 1.346 + 1.347 + /* Relocated instructions, minus the initial GS override. */ 1.348 + memcpy(&fixup_buf[fixup_idx], &b[error_code], insn_len - error_code); 1.349 + fixup_idx += insn_len - error_code; 1.350 + 1.351 + /* jmp <rel32> */ 1.352 + fixup_buf[fixup_idx++] = 0xe9; 1.353 + fixup_idx += 4; 1.354 + *(unsigned long *)&fixup_buf[fixup_idx-4] = 1.355 + (eip + insn_len) - (fixup_buf_user + fixup_idx); 1.356 + 1.357 + if ( relbyte_idx != -1 ) 1.358 + { 1.359 + /* Patch the 8-bit relative offset. */ 1.360 + int idx = relbyte_idx + 6; 1.361 + if ( reg != rm ) 1.362 + idx += 2; 1.363 + fixup_buf[idx] = fixup_idx - (idx + 1); 1.364 + 1.365 + /* jmp <rel32> */ 1.366 + fixup_buf[fixup_idx++] = 0xe9; 1.367 + fixup_idx += 4; 1.368 + *(unsigned long *)&fixup_buf[fixup_idx-4] = 1.369 + (eip + relbyte_idx + 1 + b[relbyte_idx]) - 1.370 + (fixup_buf_user + fixup_idx); 1.371 + 1.372 + } 1.373 + 1.374 + do_the_patch: 1.375 + /* Create the patching instruction in a temporary buffer. */ 1.376 + patch[0] = 0xe9; 1.377 + *(unsigned long *)&patch[1] = 1.378 + (fixup_buf_user + fe->fixup_idx) - (eip + 5); 1.379 + 1.380 + pgd = pgd_offset(current->mm, eip); 1.381 + pmd = pmd_offset(pgd, eip); 1.382 + pte = pte_offset_kernel(pmd, eip); 1.383 + veip = kmap(pte_page(*pte)); 1.384 + memcpy((char *)veip + (eip & ~PAGE_MASK), patch, 5); 1.385 + kunmap(pte_page(*pte)); 1.386 + 1.387 + /* Success! Return to user land to execute 2nd insn of the pair. */ 1.388 + regs->eip = fixup_buf_user + fe->fixup_idx + error_code + 6; 1.389 + if ( reg != rm ) 1.390 + regs->eip += 2; /* account for push/pop pair */ 1.391 + return; 1.392 } 1.393 + 1.394 +static int __init fixup_init(void) 1.395 +{ 1.396 + unsigned long page = get_zeroed_page(GFP_ATOMIC); 1.397 + __set_fixmap(FIX_4GB_SEGMENT_FIXUP_RO, __pa(page), PAGE_READONLY); 1.398 + __set_fixmap(FIX_4GB_SEGMENT_FIXUP_RW, __pa(page), PAGE_KERNEL); 1.399 + memset(fixup_hash, 0, sizeof(fixup_hash)); 1.400 + return 0; 1.401 +} 1.402 + 1.403 +__initcall(fixup_init);
2.1 --- a/linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/fixmap.h Thu Aug 05 22:25:17 2004 +0000 2.2 +++ b/linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/fixmap.h Thu Aug 05 22:27:27 2004 +0000 2.3 @@ -44,6 +44,8 @@ 2.4 enum fixed_addresses { 2.5 FIX_HOLE, 2.6 FIX_VSYSCALL, 2.7 + FIX_4GB_SEGMENT_FIXUP_RO, 2.8 + FIX_4GB_SEGMENT_FIXUP_RW, 2.9 #ifdef CONFIG_X86_LOCAL_APIC 2.10 FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */ 2.11 #endif 2.12 @@ -124,8 +126,8 @@ extern void __set_fixmap_ma (enum fixed_ 2.13 * This is the range that is readable by user mode, and things 2.14 * acting like user mode such as get_user_pages. 2.15 */ 2.16 -#define FIXADDR_USER_START (__fix_to_virt(FIX_VSYSCALL)) 2.17 -#define FIXADDR_USER_END (FIXADDR_USER_START + PAGE_SIZE) 2.18 +#define FIXADDR_USER_START (__fix_to_virt(FIX_4GB_SEGMENT_FIXUP_RO)) 2.19 +#define FIXADDR_USER_END (FIXADDR_USER_START + (2*PAGE_SIZE)) 2.20 2.21 2.22 extern void __this_fixmap_does_not_exist(void);