direct-io.hg
changeset 3899:74583df8e6c6
bitkeeper revision 1.1236.1.3 (421bc44363Gqj5L6SBcgLUYMYTmG8A)
More FPU cleanups. We emulate CLTS and direct mov to CR0 properly
now, so Linux execution should be correct now, I hope!
Signed-off-by: Keir Fraser <keir@xensource.com>
More FPU cleanups. We emulate CLTS and direct mov to CR0 properly
now, so Linux execution should be correct now, I hope!
Signed-off-by: Keir Fraser <keir@xensource.com>
author | kaf24@scramble.cl.cam.ac.uk |
---|---|
date | Tue Feb 22 23:46:11 2005 +0000 (2005-02-22) |
parents | 3d81ec6a75cd |
children | 53af0ad97d93 1cbbd12a4918 |
files | .rootkeys linux-2.6.10-xen-sparse/arch/xen/i386/kernel/traps.c linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/system.h linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/xor.h xen/arch/x86/traps.c |
line diff
1.1 --- a/.rootkeys Tue Feb 22 23:31:53 2005 +0000 1.2 +++ b/.rootkeys Tue Feb 22 23:46:11 2005 +0000 1.3 @@ -258,7 +258,6 @@ 40f5623bgzm_9vwxpzJswlAxg298Gg linux-2.6 1.4 40f5623bVdKP7Dt7qm8twu3NcnGNbA linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/system.h 1.5 40f5623bc8LKPRO09wY5dGDnY_YCpw linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/tlbflush.h 1.6 41062ab7uFxnCq-KtPeAm-aV8CicgA linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/vga.h 1.7 -40f5623bxUbeGjkRrjDguCy_Gm8RLw linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/xor.h 1.8 41af4017PDMuSmMWtSRU5UC9Vylw5g linux-2.6.10-xen-sparse/include/asm-xen/balloon.h 1.9 40f5623bYNP7tHE2zX6YQxp9Zq2utQ linux-2.6.10-xen-sparse/include/asm-xen/ctrl_if.h 1.10 40f5623b3Eqs8pAc5WpPX8_jTzV2qw linux-2.6.10-xen-sparse/include/asm-xen/evtchn.h
2.1 --- a/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/traps.c Tue Feb 22 23:31:53 2005 +0000 2.2 +++ b/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/traps.c Tue Feb 22 23:46:11 2005 +0000 2.3 @@ -910,7 +910,7 @@ asmlinkage void math_state_restore(struc 2.4 if ((regs.xcs & 2) == 0) 2.5 return; 2.6 2.7 - clts(); /* Allow maths ops (or we recurse) */ 2.8 + /* NB. 'clts' is done for us by Xen during virtual trap. */ 2.9 if (!tsk->used_math) 2.10 init_fpu(tsk); 2.11 restore_fpu(tsk);
3.1 --- a/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/system.h Tue Feb 22 23:31:53 2005 +0000 3.2 +++ b/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/system.h Tue Feb 22 23:46:11 2005 +0000 3.3 @@ -106,8 +106,7 @@ static inline unsigned long _get_base(ch 3.4 /* 3.5 * Clear and set 'TS' bit respectively 3.6 */ 3.7 -/* NB. 'clts' is done for us by Xen during virtual trap. */ 3.8 -#define clts() ((void)0) 3.9 +#define clts() __asm__ __volatile__ ("clts") 3.10 #define read_cr0() \ 3.11 BUG(); 3.12 #define write_cr0(x) \
4.1 --- a/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/xor.h Tue Feb 22 23:31:53 2005 +0000 4.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 4.3 @@ -1,884 +0,0 @@ 4.4 -/* 4.5 - * include/asm-i386/xor.h 4.6 - * 4.7 - * Optimized RAID-5 checksumming functions for MMX and SSE. 4.8 - * 4.9 - * This program is free software; you can redistribute it and/or modify 4.10 - * it under the terms of the GNU General Public License as published by 4.11 - * the Free Software Foundation; either version 2, or (at your option) 4.12 - * any later version. 4.13 - * 4.14 - * You should have received a copy of the GNU General Public License 4.15 - * (for example /usr/src/linux/COPYING); if not, write to the Free 4.16 - * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 4.17 - */ 4.18 - 4.19 -/* 4.20 - * High-speed RAID5 checksumming functions utilizing MMX instructions. 4.21 - * Copyright (C) 1998 Ingo Molnar. 4.22 - */ 4.23 - 4.24 -#define LD(x,y) " movq 8*("#x")(%1), %%mm"#y" ;\n" 4.25 -#define ST(x,y) " movq %%mm"#y", 8*("#x")(%1) ;\n" 4.26 -#define XO1(x,y) " pxor 8*("#x")(%2), %%mm"#y" ;\n" 4.27 -#define XO2(x,y) " pxor 8*("#x")(%3), %%mm"#y" ;\n" 4.28 -#define XO3(x,y) " pxor 8*("#x")(%4), %%mm"#y" ;\n" 4.29 -#define XO4(x,y) " pxor 8*("#x")(%5), %%mm"#y" ;\n" 4.30 - 4.31 -#include <asm/i387.h> 4.32 - 4.33 -static void 4.34 -xor_pII_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) 4.35 -{ 4.36 - unsigned long lines = bytes >> 7; 4.37 - 4.38 - kernel_fpu_begin(); 4.39 - 4.40 - __asm__ __volatile__ ( 4.41 -#undef BLOCK 4.42 -#define BLOCK(i) \ 4.43 - LD(i,0) \ 4.44 - LD(i+1,1) \ 4.45 - LD(i+2,2) \ 4.46 - LD(i+3,3) \ 4.47 - XO1(i,0) \ 4.48 - ST(i,0) \ 4.49 - XO1(i+1,1) \ 4.50 - ST(i+1,1) \ 4.51 - XO1(i+2,2) \ 4.52 - ST(i+2,2) \ 4.53 - XO1(i+3,3) \ 4.54 - ST(i+3,3) 4.55 - 4.56 - " .align 32 ;\n" 4.57 - " 1: ;\n" 4.58 - 4.59 - BLOCK(0) 4.60 - BLOCK(4) 4.61 - BLOCK(8) 4.62 - BLOCK(12) 4.63 - 4.64 - " addl $128, %1 ;\n" 4.65 - " addl $128, %2 ;\n" 4.66 - " decl %0 ;\n" 4.67 - " jnz 1b ;\n" 4.68 - : "+r" (lines), 4.69 - "+r" (p1), "+r" (p2) 4.70 - : 4.71 - : "memory"); 4.72 - 4.73 - kernel_fpu_end(); 4.74 -} 4.75 - 4.76 -static void 4.77 -xor_pII_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, 4.78 - unsigned long *p3) 4.79 -{ 4.80 - unsigned long lines = bytes >> 7; 4.81 - 4.82 - kernel_fpu_begin(); 4.83 - 4.84 - __asm__ __volatile__ ( 4.85 -#undef BLOCK 4.86 -#define BLOCK(i) \ 4.87 - LD(i,0) \ 4.88 - LD(i+1,1) \ 4.89 - LD(i+2,2) \ 4.90 - LD(i+3,3) \ 4.91 - XO1(i,0) \ 4.92 - XO1(i+1,1) \ 4.93 - XO1(i+2,2) \ 4.94 - XO1(i+3,3) \ 4.95 - XO2(i,0) \ 4.96 - ST(i,0) \ 4.97 - XO2(i+1,1) \ 4.98 - ST(i+1,1) \ 4.99 - XO2(i+2,2) \ 4.100 - ST(i+2,2) \ 4.101 - XO2(i+3,3) \ 4.102 - ST(i+3,3) 4.103 - 4.104 - " .align 32 ;\n" 4.105 - " 1: ;\n" 4.106 - 4.107 - BLOCK(0) 4.108 - BLOCK(4) 4.109 - BLOCK(8) 4.110 - BLOCK(12) 4.111 - 4.112 - " addl $128, %1 ;\n" 4.113 - " addl $128, %2 ;\n" 4.114 - " addl $128, %3 ;\n" 4.115 - " decl %0 ;\n" 4.116 - " jnz 1b ;\n" 4.117 - : "+r" (lines), 4.118 - "+r" (p1), "+r" (p2), "+r" (p3) 4.119 - : 4.120 - : "memory"); 4.121 - 4.122 - kernel_fpu_end(); 4.123 -} 4.124 - 4.125 -static void 4.126 -xor_pII_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, 4.127 - unsigned long *p3, unsigned long *p4) 4.128 -{ 4.129 - unsigned long lines = bytes >> 7; 4.130 - 4.131 - kernel_fpu_begin(); 4.132 - 4.133 - __asm__ __volatile__ ( 4.134 -#undef BLOCK 4.135 -#define BLOCK(i) \ 4.136 - LD(i,0) \ 4.137 - LD(i+1,1) \ 4.138 - LD(i+2,2) \ 4.139 - LD(i+3,3) \ 4.140 - XO1(i,0) \ 4.141 - XO1(i+1,1) \ 4.142 - XO1(i+2,2) \ 4.143 - XO1(i+3,3) \ 4.144 - XO2(i,0) \ 4.145 - XO2(i+1,1) \ 4.146 - XO2(i+2,2) \ 4.147 - XO2(i+3,3) \ 4.148 - XO3(i,0) \ 4.149 - ST(i,0) \ 4.150 - XO3(i+1,1) \ 4.151 - ST(i+1,1) \ 4.152 - XO3(i+2,2) \ 4.153 - ST(i+2,2) \ 4.154 - XO3(i+3,3) \ 4.155 - ST(i+3,3) 4.156 - 4.157 - " .align 32 ;\n" 4.158 - " 1: ;\n" 4.159 - 4.160 - BLOCK(0) 4.161 - BLOCK(4) 4.162 - BLOCK(8) 4.163 - BLOCK(12) 4.164 - 4.165 - " addl $128, %1 ;\n" 4.166 - " addl $128, %2 ;\n" 4.167 - " addl $128, %3 ;\n" 4.168 - " addl $128, %4 ;\n" 4.169 - " decl %0 ;\n" 4.170 - " jnz 1b ;\n" 4.171 - : "+r" (lines), 4.172 - "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4) 4.173 - : 4.174 - : "memory"); 4.175 - 4.176 - kernel_fpu_end(); 4.177 -} 4.178 - 4.179 - 4.180 -static void 4.181 -xor_pII_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, 4.182 - unsigned long *p3, unsigned long *p4, unsigned long *p5) 4.183 -{ 4.184 - unsigned long lines = bytes >> 7; 4.185 - 4.186 - kernel_fpu_begin(); 4.187 - 4.188 - /* Make sure GCC forgets anything it knows about p4 or p5, 4.189 - such that it won't pass to the asm volatile below a 4.190 - register that is shared with any other variable. That's 4.191 - because we modify p4 and p5 there, but we can't mark them 4.192 - as read/write, otherwise we'd overflow the 10-asm-operands 4.193 - limit of GCC < 3.1. */ 4.194 - __asm__ ("" : "+r" (p4), "+r" (p5)); 4.195 - 4.196 - __asm__ __volatile__ ( 4.197 -#undef BLOCK 4.198 -#define BLOCK(i) \ 4.199 - LD(i,0) \ 4.200 - LD(i+1,1) \ 4.201 - LD(i+2,2) \ 4.202 - LD(i+3,3) \ 4.203 - XO1(i,0) \ 4.204 - XO1(i+1,1) \ 4.205 - XO1(i+2,2) \ 4.206 - XO1(i+3,3) \ 4.207 - XO2(i,0) \ 4.208 - XO2(i+1,1) \ 4.209 - XO2(i+2,2) \ 4.210 - XO2(i+3,3) \ 4.211 - XO3(i,0) \ 4.212 - XO3(i+1,1) \ 4.213 - XO3(i+2,2) \ 4.214 - XO3(i+3,3) \ 4.215 - XO4(i,0) \ 4.216 - ST(i,0) \ 4.217 - XO4(i+1,1) \ 4.218 - ST(i+1,1) \ 4.219 - XO4(i+2,2) \ 4.220 - ST(i+2,2) \ 4.221 - XO4(i+3,3) \ 4.222 - ST(i+3,3) 4.223 - 4.224 - " .align 32 ;\n" 4.225 - " 1: ;\n" 4.226 - 4.227 - BLOCK(0) 4.228 - BLOCK(4) 4.229 - BLOCK(8) 4.230 - BLOCK(12) 4.231 - 4.232 - " addl $128, %1 ;\n" 4.233 - " addl $128, %2 ;\n" 4.234 - " addl $128, %3 ;\n" 4.235 - " addl $128, %4 ;\n" 4.236 - " addl $128, %5 ;\n" 4.237 - " decl %0 ;\n" 4.238 - " jnz 1b ;\n" 4.239 - : "+r" (lines), 4.240 - "+r" (p1), "+r" (p2), "+r" (p3) 4.241 - : "r" (p4), "r" (p5) 4.242 - : "memory"); 4.243 - 4.244 - /* p4 and p5 were modified, and now the variables are dead. 4.245 - Clobber them just to be sure nobody does something stupid 4.246 - like assuming they have some legal value. */ 4.247 - __asm__ ("" : "=r" (p4), "=r" (p5)); 4.248 - 4.249 - kernel_fpu_end(); 4.250 -} 4.251 - 4.252 -#undef LD 4.253 -#undef XO1 4.254 -#undef XO2 4.255 -#undef XO3 4.256 -#undef XO4 4.257 -#undef ST 4.258 -#undef BLOCK 4.259 - 4.260 -static void 4.261 -xor_p5_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) 4.262 -{ 4.263 - unsigned long lines = bytes >> 6; 4.264 - 4.265 - kernel_fpu_begin(); 4.266 - 4.267 - __asm__ __volatile__ ( 4.268 - " .align 32 ;\n" 4.269 - " 1: ;\n" 4.270 - " movq (%1), %%mm0 ;\n" 4.271 - " movq 8(%1), %%mm1 ;\n" 4.272 - " pxor (%2), %%mm0 ;\n" 4.273 - " movq 16(%1), %%mm2 ;\n" 4.274 - " movq %%mm0, (%1) ;\n" 4.275 - " pxor 8(%2), %%mm1 ;\n" 4.276 - " movq 24(%1), %%mm3 ;\n" 4.277 - " movq %%mm1, 8(%1) ;\n" 4.278 - " pxor 16(%2), %%mm2 ;\n" 4.279 - " movq 32(%1), %%mm4 ;\n" 4.280 - " movq %%mm2, 16(%1) ;\n" 4.281 - " pxor 24(%2), %%mm3 ;\n" 4.282 - " movq 40(%1), %%mm5 ;\n" 4.283 - " movq %%mm3, 24(%1) ;\n" 4.284 - " pxor 32(%2), %%mm4 ;\n" 4.285 - " movq 48(%1), %%mm6 ;\n" 4.286 - " movq %%mm4, 32(%1) ;\n" 4.287 - " pxor 40(%2), %%mm5 ;\n" 4.288 - " movq 56(%1), %%mm7 ;\n" 4.289 - " movq %%mm5, 40(%1) ;\n" 4.290 - " pxor 48(%2), %%mm6 ;\n" 4.291 - " pxor 56(%2), %%mm7 ;\n" 4.292 - " movq %%mm6, 48(%1) ;\n" 4.293 - " movq %%mm7, 56(%1) ;\n" 4.294 - 4.295 - " addl $64, %1 ;\n" 4.296 - " addl $64, %2 ;\n" 4.297 - " decl %0 ;\n" 4.298 - " jnz 1b ;\n" 4.299 - : "+r" (lines), 4.300 - "+r" (p1), "+r" (p2) 4.301 - : 4.302 - : "memory"); 4.303 - 4.304 - kernel_fpu_end(); 4.305 -} 4.306 - 4.307 -static void 4.308 -xor_p5_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, 4.309 - unsigned long *p3) 4.310 -{ 4.311 - unsigned long lines = bytes >> 6; 4.312 - 4.313 - kernel_fpu_begin(); 4.314 - 4.315 - __asm__ __volatile__ ( 4.316 - " .align 32,0x90 ;\n" 4.317 - " 1: ;\n" 4.318 - " movq (%1), %%mm0 ;\n" 4.319 - " movq 8(%1), %%mm1 ;\n" 4.320 - " pxor (%2), %%mm0 ;\n" 4.321 - " movq 16(%1), %%mm2 ;\n" 4.322 - " pxor 8(%2), %%mm1 ;\n" 4.323 - " pxor (%3), %%mm0 ;\n" 4.324 - " pxor 16(%2), %%mm2 ;\n" 4.325 - " movq %%mm0, (%1) ;\n" 4.326 - " pxor 8(%3), %%mm1 ;\n" 4.327 - " pxor 16(%3), %%mm2 ;\n" 4.328 - " movq 24(%1), %%mm3 ;\n" 4.329 - " movq %%mm1, 8(%1) ;\n" 4.330 - " movq 32(%1), %%mm4 ;\n" 4.331 - " movq 40(%1), %%mm5 ;\n" 4.332 - " pxor 24(%2), %%mm3 ;\n" 4.333 - " movq %%mm2, 16(%1) ;\n" 4.334 - " pxor 32(%2), %%mm4 ;\n" 4.335 - " pxor 24(%3), %%mm3 ;\n" 4.336 - " pxor 40(%2), %%mm5 ;\n" 4.337 - " movq %%mm3, 24(%1) ;\n" 4.338 - " pxor 32(%3), %%mm4 ;\n" 4.339 - " pxor 40(%3), %%mm5 ;\n" 4.340 - " movq 48(%1), %%mm6 ;\n" 4.341 - " movq %%mm4, 32(%1) ;\n" 4.342 - " movq 56(%1), %%mm7 ;\n" 4.343 - " pxor 48(%2), %%mm6 ;\n" 4.344 - " movq %%mm5, 40(%1) ;\n" 4.345 - " pxor 56(%2), %%mm7 ;\n" 4.346 - " pxor 48(%3), %%mm6 ;\n" 4.347 - " pxor 56(%3), %%mm7 ;\n" 4.348 - " movq %%mm6, 48(%1) ;\n" 4.349 - " movq %%mm7, 56(%1) ;\n" 4.350 - 4.351 - " addl $64, %1 ;\n" 4.352 - " addl $64, %2 ;\n" 4.353 - " addl $64, %3 ;\n" 4.354 - " decl %0 ;\n" 4.355 - " jnz 1b ;\n" 4.356 - : "+r" (lines), 4.357 - "+r" (p1), "+r" (p2), "+r" (p3) 4.358 - : 4.359 - : "memory" ); 4.360 - 4.361 - kernel_fpu_end(); 4.362 -} 4.363 - 4.364 -static void 4.365 -xor_p5_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, 4.366 - unsigned long *p3, unsigned long *p4) 4.367 -{ 4.368 - unsigned long lines = bytes >> 6; 4.369 - 4.370 - kernel_fpu_begin(); 4.371 - 4.372 - __asm__ __volatile__ ( 4.373 - " .align 32,0x90 ;\n" 4.374 - " 1: ;\n" 4.375 - " movq (%1), %%mm0 ;\n" 4.376 - " movq 8(%1), %%mm1 ;\n" 4.377 - " pxor (%2), %%mm0 ;\n" 4.378 - " movq 16(%1), %%mm2 ;\n" 4.379 - " pxor 8(%2), %%mm1 ;\n" 4.380 - " pxor (%3), %%mm0 ;\n" 4.381 - " pxor 16(%2), %%mm2 ;\n" 4.382 - " pxor 8(%3), %%mm1 ;\n" 4.383 - " pxor (%4), %%mm0 ;\n" 4.384 - " movq 24(%1), %%mm3 ;\n" 4.385 - " pxor 16(%3), %%mm2 ;\n" 4.386 - " pxor 8(%4), %%mm1 ;\n" 4.387 - " movq %%mm0, (%1) ;\n" 4.388 - " movq 32(%1), %%mm4 ;\n" 4.389 - " pxor 24(%2), %%mm3 ;\n" 4.390 - " pxor 16(%4), %%mm2 ;\n" 4.391 - " movq %%mm1, 8(%1) ;\n" 4.392 - " movq 40(%1), %%mm5 ;\n" 4.393 - " pxor 32(%2), %%mm4 ;\n" 4.394 - " pxor 24(%3), %%mm3 ;\n" 4.395 - " movq %%mm2, 16(%1) ;\n" 4.396 - " pxor 40(%2), %%mm5 ;\n" 4.397 - " pxor 32(%3), %%mm4 ;\n" 4.398 - " pxor 24(%4), %%mm3 ;\n" 4.399 - " movq %%mm3, 24(%1) ;\n" 4.400 - " movq 56(%1), %%mm7 ;\n" 4.401 - " movq 48(%1), %%mm6 ;\n" 4.402 - " pxor 40(%3), %%mm5 ;\n" 4.403 - " pxor 32(%4), %%mm4 ;\n" 4.404 - " pxor 48(%2), %%mm6 ;\n" 4.405 - " movq %%mm4, 32(%1) ;\n" 4.406 - " pxor 56(%2), %%mm7 ;\n" 4.407 - " pxor 40(%4), %%mm5 ;\n" 4.408 - " pxor 48(%3), %%mm6 ;\n" 4.409 - " pxor 56(%3), %%mm7 ;\n" 4.410 - " movq %%mm5, 40(%1) ;\n" 4.411 - " pxor 48(%4), %%mm6 ;\n" 4.412 - " pxor 56(%4), %%mm7 ;\n" 4.413 - " movq %%mm6, 48(%1) ;\n" 4.414 - " movq %%mm7, 56(%1) ;\n" 4.415 - 4.416 - " addl $64, %1 ;\n" 4.417 - " addl $64, %2 ;\n" 4.418 - " addl $64, %3 ;\n" 4.419 - " addl $64, %4 ;\n" 4.420 - " decl %0 ;\n" 4.421 - " jnz 1b ;\n" 4.422 - : "+r" (lines), 4.423 - "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4) 4.424 - : 4.425 - : "memory"); 4.426 - 4.427 - kernel_fpu_end(); 4.428 -} 4.429 - 4.430 -static void 4.431 -xor_p5_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, 4.432 - unsigned long *p3, unsigned long *p4, unsigned long *p5) 4.433 -{ 4.434 - unsigned long lines = bytes >> 6; 4.435 - 4.436 - kernel_fpu_begin(); 4.437 - 4.438 - /* Make sure GCC forgets anything it knows about p4 or p5, 4.439 - such that it won't pass to the asm volatile below a 4.440 - register that is shared with any other variable. That's 4.441 - because we modify p4 and p5 there, but we can't mark them 4.442 - as read/write, otherwise we'd overflow the 10-asm-operands 4.443 - limit of GCC < 3.1. */ 4.444 - __asm__ ("" : "+r" (p4), "+r" (p5)); 4.445 - 4.446 - __asm__ __volatile__ ( 4.447 - " .align 32,0x90 ;\n" 4.448 - " 1: ;\n" 4.449 - " movq (%1), %%mm0 ;\n" 4.450 - " movq 8(%1), %%mm1 ;\n" 4.451 - " pxor (%2), %%mm0 ;\n" 4.452 - " pxor 8(%2), %%mm1 ;\n" 4.453 - " movq 16(%1), %%mm2 ;\n" 4.454 - " pxor (%3), %%mm0 ;\n" 4.455 - " pxor 8(%3), %%mm1 ;\n" 4.456 - " pxor 16(%2), %%mm2 ;\n" 4.457 - " pxor (%4), %%mm0 ;\n" 4.458 - " pxor 8(%4), %%mm1 ;\n" 4.459 - " pxor 16(%3), %%mm2 ;\n" 4.460 - " movq 24(%1), %%mm3 ;\n" 4.461 - " pxor (%5), %%mm0 ;\n" 4.462 - " pxor 8(%5), %%mm1 ;\n" 4.463 - " movq %%mm0, (%1) ;\n" 4.464 - " pxor 16(%4), %%mm2 ;\n" 4.465 - " pxor 24(%2), %%mm3 ;\n" 4.466 - " movq %%mm1, 8(%1) ;\n" 4.467 - " pxor 16(%5), %%mm2 ;\n" 4.468 - " pxor 24(%3), %%mm3 ;\n" 4.469 - " movq 32(%1), %%mm4 ;\n" 4.470 - " movq %%mm2, 16(%1) ;\n" 4.471 - " pxor 24(%4), %%mm3 ;\n" 4.472 - " pxor 32(%2), %%mm4 ;\n" 4.473 - " movq 40(%1), %%mm5 ;\n" 4.474 - " pxor 24(%5), %%mm3 ;\n" 4.475 - " pxor 32(%3), %%mm4 ;\n" 4.476 - " pxor 40(%2), %%mm5 ;\n" 4.477 - " movq %%mm3, 24(%1) ;\n" 4.478 - " pxor 32(%4), %%mm4 ;\n" 4.479 - " pxor 40(%3), %%mm5 ;\n" 4.480 - " movq 48(%1), %%mm6 ;\n" 4.481 - " movq 56(%1), %%mm7 ;\n" 4.482 - " pxor 32(%5), %%mm4 ;\n" 4.483 - " pxor 40(%4), %%mm5 ;\n" 4.484 - " pxor 48(%2), %%mm6 ;\n" 4.485 - " pxor 56(%2), %%mm7 ;\n" 4.486 - " movq %%mm4, 32(%1) ;\n" 4.487 - " pxor 48(%3), %%mm6 ;\n" 4.488 - " pxor 56(%3), %%mm7 ;\n" 4.489 - " pxor 40(%5), %%mm5 ;\n" 4.490 - " pxor 48(%4), %%mm6 ;\n" 4.491 - " pxor 56(%4), %%mm7 ;\n" 4.492 - " movq %%mm5, 40(%1) ;\n" 4.493 - " pxor 48(%5), %%mm6 ;\n" 4.494 - " pxor 56(%5), %%mm7 ;\n" 4.495 - " movq %%mm6, 48(%1) ;\n" 4.496 - " movq %%mm7, 56(%1) ;\n" 4.497 - 4.498 - " addl $64, %1 ;\n" 4.499 - " addl $64, %2 ;\n" 4.500 - " addl $64, %3 ;\n" 4.501 - " addl $64, %4 ;\n" 4.502 - " addl $64, %5 ;\n" 4.503 - " decl %0 ;\n" 4.504 - " jnz 1b ;\n" 4.505 - : "+r" (lines), 4.506 - "+r" (p1), "+r" (p2), "+r" (p3) 4.507 - : "r" (p4), "r" (p5) 4.508 - : "memory"); 4.509 - 4.510 - /* p4 and p5 were modified, and now the variables are dead. 4.511 - Clobber them just to be sure nobody does something stupid 4.512 - like assuming they have some legal value. */ 4.513 - __asm__ ("" : "=r" (p4), "=r" (p5)); 4.514 - 4.515 - kernel_fpu_end(); 4.516 -} 4.517 - 4.518 -static struct xor_block_template xor_block_pII_mmx = { 4.519 - .name = "pII_mmx", 4.520 - .do_2 = xor_pII_mmx_2, 4.521 - .do_3 = xor_pII_mmx_3, 4.522 - .do_4 = xor_pII_mmx_4, 4.523 - .do_5 = xor_pII_mmx_5, 4.524 -}; 4.525 - 4.526 -static struct xor_block_template xor_block_p5_mmx = { 4.527 - .name = "p5_mmx", 4.528 - .do_2 = xor_p5_mmx_2, 4.529 - .do_3 = xor_p5_mmx_3, 4.530 - .do_4 = xor_p5_mmx_4, 4.531 - .do_5 = xor_p5_mmx_5, 4.532 -}; 4.533 - 4.534 -/* 4.535 - * Cache avoiding checksumming functions utilizing KNI instructions 4.536 - * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo) 4.537 - */ 4.538 - 4.539 -#define XMMS_SAVE do { \ 4.540 - preempt_disable(); \ 4.541 - if (!(current_thread_info()->status & TS_USEDFPU)) \ 4.542 - clts(); \ 4.543 - __asm__ __volatile__ ( \ 4.544 - "movups %%xmm0,(%1) ;\n\t" \ 4.545 - "movups %%xmm1,0x10(%1) ;\n\t" \ 4.546 - "movups %%xmm2,0x20(%1) ;\n\t" \ 4.547 - "movups %%xmm3,0x30(%1) ;\n\t" \ 4.548 - : "=&r" (cr0) \ 4.549 - : "r" (xmm_save) \ 4.550 - : "memory"); \ 4.551 -} while(0) 4.552 - 4.553 -#define XMMS_RESTORE do { \ 4.554 - __asm__ __volatile__ ( \ 4.555 - "sfence ;\n\t" \ 4.556 - "movups (%1),%%xmm0 ;\n\t" \ 4.557 - "movups 0x10(%1),%%xmm1 ;\n\t" \ 4.558 - "movups 0x20(%1),%%xmm2 ;\n\t" \ 4.559 - "movups 0x30(%1),%%xmm3 ;\n\t" \ 4.560 - : \ 4.561 - : "r" (cr0), "r" (xmm_save) \ 4.562 - : "memory"); \ 4.563 - if (!(current_thread_info()->status & TS_USEDFPU)) \ 4.564 - stts(); \ 4.565 - preempt_enable(); \ 4.566 -} while(0) 4.567 - 4.568 -#define ALIGN16 __attribute__((aligned(16))) 4.569 - 4.570 -#define OFFS(x) "16*("#x")" 4.571 -#define PF_OFFS(x) "256+16*("#x")" 4.572 -#define PF0(x) " prefetchnta "PF_OFFS(x)"(%1) ;\n" 4.573 -#define LD(x,y) " movaps "OFFS(x)"(%1), %%xmm"#y" ;\n" 4.574 -#define ST(x,y) " movaps %%xmm"#y", "OFFS(x)"(%1) ;\n" 4.575 -#define PF1(x) " prefetchnta "PF_OFFS(x)"(%2) ;\n" 4.576 -#define PF2(x) " prefetchnta "PF_OFFS(x)"(%3) ;\n" 4.577 -#define PF3(x) " prefetchnta "PF_OFFS(x)"(%4) ;\n" 4.578 -#define PF4(x) " prefetchnta "PF_OFFS(x)"(%5) ;\n" 4.579 -#define PF5(x) " prefetchnta "PF_OFFS(x)"(%6) ;\n" 4.580 -#define XO1(x,y) " xorps "OFFS(x)"(%2), %%xmm"#y" ;\n" 4.581 -#define XO2(x,y) " xorps "OFFS(x)"(%3), %%xmm"#y" ;\n" 4.582 -#define XO3(x,y) " xorps "OFFS(x)"(%4), %%xmm"#y" ;\n" 4.583 -#define XO4(x,y) " xorps "OFFS(x)"(%5), %%xmm"#y" ;\n" 4.584 -#define XO5(x,y) " xorps "OFFS(x)"(%6), %%xmm"#y" ;\n" 4.585 - 4.586 - 4.587 -static void 4.588 -xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) 4.589 -{ 4.590 - unsigned long lines = bytes >> 8; 4.591 - char xmm_save[16*4] ALIGN16; 4.592 - int cr0; 4.593 - 4.594 - XMMS_SAVE; 4.595 - 4.596 - __asm__ __volatile__ ( 4.597 -#undef BLOCK 4.598 -#define BLOCK(i) \ 4.599 - LD(i,0) \ 4.600 - LD(i+1,1) \ 4.601 - PF1(i) \ 4.602 - PF1(i+2) \ 4.603 - LD(i+2,2) \ 4.604 - LD(i+3,3) \ 4.605 - PF0(i+4) \ 4.606 - PF0(i+6) \ 4.607 - XO1(i,0) \ 4.608 - XO1(i+1,1) \ 4.609 - XO1(i+2,2) \ 4.610 - XO1(i+3,3) \ 4.611 - ST(i,0) \ 4.612 - ST(i+1,1) \ 4.613 - ST(i+2,2) \ 4.614 - ST(i+3,3) \ 4.615 - 4.616 - 4.617 - PF0(0) 4.618 - PF0(2) 4.619 - 4.620 - " .align 32 ;\n" 4.621 - " 1: ;\n" 4.622 - 4.623 - BLOCK(0) 4.624 - BLOCK(4) 4.625 - BLOCK(8) 4.626 - BLOCK(12) 4.627 - 4.628 - " addl $256, %1 ;\n" 4.629 - " addl $256, %2 ;\n" 4.630 - " decl %0 ;\n" 4.631 - " jnz 1b ;\n" 4.632 - : "+r" (lines), 4.633 - "+r" (p1), "+r" (p2) 4.634 - : 4.635 - : "memory"); 4.636 - 4.637 - XMMS_RESTORE; 4.638 -} 4.639 - 4.640 -static void 4.641 -xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, 4.642 - unsigned long *p3) 4.643 -{ 4.644 - unsigned long lines = bytes >> 8; 4.645 - char xmm_save[16*4] ALIGN16; 4.646 - int cr0; 4.647 - 4.648 - XMMS_SAVE; 4.649 - 4.650 - __asm__ __volatile__ ( 4.651 -#undef BLOCK 4.652 -#define BLOCK(i) \ 4.653 - PF1(i) \ 4.654 - PF1(i+2) \ 4.655 - LD(i,0) \ 4.656 - LD(i+1,1) \ 4.657 - LD(i+2,2) \ 4.658 - LD(i+3,3) \ 4.659 - PF2(i) \ 4.660 - PF2(i+2) \ 4.661 - PF0(i+4) \ 4.662 - PF0(i+6) \ 4.663 - XO1(i,0) \ 4.664 - XO1(i+1,1) \ 4.665 - XO1(i+2,2) \ 4.666 - XO1(i+3,3) \ 4.667 - XO2(i,0) \ 4.668 - XO2(i+1,1) \ 4.669 - XO2(i+2,2) \ 4.670 - XO2(i+3,3) \ 4.671 - ST(i,0) \ 4.672 - ST(i+1,1) \ 4.673 - ST(i+2,2) \ 4.674 - ST(i+3,3) \ 4.675 - 4.676 - 4.677 - PF0(0) 4.678 - PF0(2) 4.679 - 4.680 - " .align 32 ;\n" 4.681 - " 1: ;\n" 4.682 - 4.683 - BLOCK(0) 4.684 - BLOCK(4) 4.685 - BLOCK(8) 4.686 - BLOCK(12) 4.687 - 4.688 - " addl $256, %1 ;\n" 4.689 - " addl $256, %2 ;\n" 4.690 - " addl $256, %3 ;\n" 4.691 - " decl %0 ;\n" 4.692 - " jnz 1b ;\n" 4.693 - : "+r" (lines), 4.694 - "+r" (p1), "+r"(p2), "+r"(p3) 4.695 - : 4.696 - : "memory" ); 4.697 - 4.698 - XMMS_RESTORE; 4.699 -} 4.700 - 4.701 -static void 4.702 -xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, 4.703 - unsigned long *p3, unsigned long *p4) 4.704 -{ 4.705 - unsigned long lines = bytes >> 8; 4.706 - char xmm_save[16*4] ALIGN16; 4.707 - int cr0; 4.708 - 4.709 - XMMS_SAVE; 4.710 - 4.711 - __asm__ __volatile__ ( 4.712 -#undef BLOCK 4.713 -#define BLOCK(i) \ 4.714 - PF1(i) \ 4.715 - PF1(i+2) \ 4.716 - LD(i,0) \ 4.717 - LD(i+1,1) \ 4.718 - LD(i+2,2) \ 4.719 - LD(i+3,3) \ 4.720 - PF2(i) \ 4.721 - PF2(i+2) \ 4.722 - XO1(i,0) \ 4.723 - XO1(i+1,1) \ 4.724 - XO1(i+2,2) \ 4.725 - XO1(i+3,3) \ 4.726 - PF3(i) \ 4.727 - PF3(i+2) \ 4.728 - PF0(i+4) \ 4.729 - PF0(i+6) \ 4.730 - XO2(i,0) \ 4.731 - XO2(i+1,1) \ 4.732 - XO2(i+2,2) \ 4.733 - XO2(i+3,3) \ 4.734 - XO3(i,0) \ 4.735 - XO3(i+1,1) \ 4.736 - XO3(i+2,2) \ 4.737 - XO3(i+3,3) \ 4.738 - ST(i,0) \ 4.739 - ST(i+1,1) \ 4.740 - ST(i+2,2) \ 4.741 - ST(i+3,3) \ 4.742 - 4.743 - 4.744 - PF0(0) 4.745 - PF0(2) 4.746 - 4.747 - " .align 32 ;\n" 4.748 - " 1: ;\n" 4.749 - 4.750 - BLOCK(0) 4.751 - BLOCK(4) 4.752 - BLOCK(8) 4.753 - BLOCK(12) 4.754 - 4.755 - " addl $256, %1 ;\n" 4.756 - " addl $256, %2 ;\n" 4.757 - " addl $256, %3 ;\n" 4.758 - " addl $256, %4 ;\n" 4.759 - " decl %0 ;\n" 4.760 - " jnz 1b ;\n" 4.761 - : "+r" (lines), 4.762 - "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4) 4.763 - : 4.764 - : "memory" ); 4.765 - 4.766 - XMMS_RESTORE; 4.767 -} 4.768 - 4.769 -static void 4.770 -xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, 4.771 - unsigned long *p3, unsigned long *p4, unsigned long *p5) 4.772 -{ 4.773 - unsigned long lines = bytes >> 8; 4.774 - char xmm_save[16*4] ALIGN16; 4.775 - int cr0; 4.776 - 4.777 - XMMS_SAVE; 4.778 - 4.779 - /* Make sure GCC forgets anything it knows about p4 or p5, 4.780 - such that it won't pass to the asm volatile below a 4.781 - register that is shared with any other variable. That's 4.782 - because we modify p4 and p5 there, but we can't mark them 4.783 - as read/write, otherwise we'd overflow the 10-asm-operands 4.784 - limit of GCC < 3.1. */ 4.785 - __asm__ ("" : "+r" (p4), "+r" (p5)); 4.786 - 4.787 - __asm__ __volatile__ ( 4.788 -#undef BLOCK 4.789 -#define BLOCK(i) \ 4.790 - PF1(i) \ 4.791 - PF1(i+2) \ 4.792 - LD(i,0) \ 4.793 - LD(i+1,1) \ 4.794 - LD(i+2,2) \ 4.795 - LD(i+3,3) \ 4.796 - PF2(i) \ 4.797 - PF2(i+2) \ 4.798 - XO1(i,0) \ 4.799 - XO1(i+1,1) \ 4.800 - XO1(i+2,2) \ 4.801 - XO1(i+3,3) \ 4.802 - PF3(i) \ 4.803 - PF3(i+2) \ 4.804 - XO2(i,0) \ 4.805 - XO2(i+1,1) \ 4.806 - XO2(i+2,2) \ 4.807 - XO2(i+3,3) \ 4.808 - PF4(i) \ 4.809 - PF4(i+2) \ 4.810 - PF0(i+4) \ 4.811 - PF0(i+6) \ 4.812 - XO3(i,0) \ 4.813 - XO3(i+1,1) \ 4.814 - XO3(i+2,2) \ 4.815 - XO3(i+3,3) \ 4.816 - XO4(i,0) \ 4.817 - XO4(i+1,1) \ 4.818 - XO4(i+2,2) \ 4.819 - XO4(i+3,3) \ 4.820 - ST(i,0) \ 4.821 - ST(i+1,1) \ 4.822 - ST(i+2,2) \ 4.823 - ST(i+3,3) \ 4.824 - 4.825 - 4.826 - PF0(0) 4.827 - PF0(2) 4.828 - 4.829 - " .align 32 ;\n" 4.830 - " 1: ;\n" 4.831 - 4.832 - BLOCK(0) 4.833 - BLOCK(4) 4.834 - BLOCK(8) 4.835 - BLOCK(12) 4.836 - 4.837 - " addl $256, %1 ;\n" 4.838 - " addl $256, %2 ;\n" 4.839 - " addl $256, %3 ;\n" 4.840 - " addl $256, %4 ;\n" 4.841 - " addl $256, %5 ;\n" 4.842 - " decl %0 ;\n" 4.843 - " jnz 1b ;\n" 4.844 - : "+r" (lines), 4.845 - "+r" (p1), "+r" (p2), "+r" (p3) 4.846 - : "r" (p4), "r" (p5) 4.847 - : "memory"); 4.848 - 4.849 - /* p4 and p5 were modified, and now the variables are dead. 4.850 - Clobber them just to be sure nobody does something stupid 4.851 - like assuming they have some legal value. */ 4.852 - __asm__ ("" : "=r" (p4), "=r" (p5)); 4.853 - 4.854 - XMMS_RESTORE; 4.855 -} 4.856 - 4.857 -static struct xor_block_template xor_block_pIII_sse = { 4.858 - .name = "pIII_sse", 4.859 - .do_2 = xor_sse_2, 4.860 - .do_3 = xor_sse_3, 4.861 - .do_4 = xor_sse_4, 4.862 - .do_5 = xor_sse_5, 4.863 -}; 4.864 - 4.865 -/* Also try the generic routines. */ 4.866 -#include <asm-generic/xor.h> 4.867 - 4.868 -#undef XOR_TRY_TEMPLATES 4.869 -#define XOR_TRY_TEMPLATES \ 4.870 - do { \ 4.871 - xor_speed(&xor_block_8regs); \ 4.872 - xor_speed(&xor_block_8regs_p); \ 4.873 - xor_speed(&xor_block_32regs); \ 4.874 - xor_speed(&xor_block_32regs_p); \ 4.875 - if (cpu_has_xmm) \ 4.876 - xor_speed(&xor_block_pIII_sse); \ 4.877 - if (cpu_has_mmx) { \ 4.878 - xor_speed(&xor_block_pII_mmx); \ 4.879 - xor_speed(&xor_block_p5_mmx); \ 4.880 - } \ 4.881 - } while (0) 4.882 - 4.883 -/* We force the use of the SSE xor block because it can write around L2. 4.884 - We may also be able to load into the L1 only depending on how the cpu 4.885 - deals with a load to a line that is being prefetched. */ 4.886 -#define XOR_SELECT_TEMPLATE(FASTEST) \ 4.887 - (cpu_has_xmm ? &xor_block_pIII_sse : FASTEST)
5.1 --- a/xen/arch/x86/traps.c Tue Feb 22 23:31:53 2005 +0000 5.2 +++ b/xen/arch/x86/traps.c Tue Feb 22 23:46:11 2005 +0000 5.3 @@ -351,7 +351,6 @@ asmlinkage int do_page_fault(struct xen_ 5.4 5.5 static int emulate_privileged_op(struct xen_regs *regs) 5.6 { 5.7 - extern long do_fpu_taskswitch(void); 5.8 extern void *decode_reg(struct xen_regs *regs, u8 b); 5.9 5.10 struct exec_domain *ed = current; 5.11 @@ -423,7 +422,16 @@ static int emulate_privileged_op(struct 5.12 { 5.13 case 0: /* Write CR0 */ 5.14 if ( *reg & X86_CR0_TS ) 5.15 - (void)do_fpu_taskswitch(); 5.16 + { 5.17 + set_bit(EDF_GUEST_STTS, &ed->ed_flags); 5.18 + stts(); 5.19 + } 5.20 + else 5.21 + { 5.22 + clear_bit(EDF_GUEST_STTS, &ed->ed_flags); 5.23 + if ( test_bit(EDF_USEDFPU, &ed->ed_flags) ) 5.24 + clts(); 5.25 + } 5.26 break; 5.27 5.28 case 2: /* Write CR2 */