win-pvdrivers
changeset 531:f966a0d692bc
added missing file
author | James Harper <james.harper@bendigoit.com.au> |
---|---|
date | Thu Jan 22 08:17:31 2009 +1100 (2009-01-22) |
parents | db0ea4b20c3c |
children | b7491d4ebb3c |
files | xenpci/xenpci_patch_kernel.c |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/xenpci/xenpci_patch_kernel.c Thu Jan 22 08:17:31 2009 +1100 1.3 @@ -0,0 +1,413 @@ 1.4 +/* 1.5 +PV Drivers for Windows Xen HVM Domains 1.6 +Copyright (C) 2007 James Harper 1.7 +Inspired by amdvopt by Travis Betak 1.8 + 1.9 +This program is free software; you can redistribute it and/or 1.10 +modify it under the terms of the GNU General Public License 1.11 +as published by the Free Software Foundation; either version 2 1.12 +of the License, or (at your option) any later version. 1.13 + 1.14 +This program is distributed in the hope that it will be useful, 1.15 +but WITHOUT ANY WARRANTY; without even the implied warranty of 1.16 +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 1.17 +GNU General Public License for more details. 1.18 + 1.19 +You should have received a copy of the GNU General Public License 1.20 +along with this program; if not, write to the Free Software 1.21 +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 1.22 +*/ 1.23 + 1.24 +#include "xenpci.h" 1.25 + 1.26 +#if defined(_X86_) 1.27 + 1.28 +/* Is LOCK MOV CR0 available? */ 1.29 +#define CPUID_ALTMOVCR8 (1UL << 4) 1.30 +/* Task priority register address */ 1.31 +#define LAPIC_TASKPRI 0xFFFE0080 1.32 +#define TPR_BYTES 0x80, 0x00, 0xfe, 0xff 1.33 + 1.34 +extern VOID MoveTprToEax(VOID); 1.35 +extern VOID MoveTprToEcx(VOID); 1.36 +extern VOID MoveTprToEdx(VOID); 1.37 +extern VOID MoveTprToEsi(VOID); 1.38 +extern VOID PushTpr(VOID); 1.39 +extern VOID MoveEaxToTpr(VOID); 1.40 +extern VOID MoveEbxToTpr(VOID); 1.41 +extern VOID MoveEcxToTpr(VOID); 1.42 +extern VOID MoveEdxToTpr(VOID); 1.43 +extern VOID MoveEsiToTpr(VOID); 1.44 +extern VOID MoveConstToTpr(ULONG new_tpr_value); 1.45 +extern VOID MoveZeroToTpr(VOID); 1.46 + 1.47 +static PHYSICAL_ADDRESS lapic_page[MAX_VIRT_CPUS]; 1.48 +static volatile PVOID lapic[MAX_VIRT_CPUS]; 1.49 +static ULONG tpr_cache[MAX_VIRT_CPUS]; 1.50 +#define PATCH_METHOD_LOCK_MOVE_CR0 1 1.51 +#define PATCH_METHOD_MAPPED_VLAPIC 2 1.52 +#define PATCH_METHOD_CACHED_TPR 3 1.53 +static ULONG patch_method; 1.54 + 1.55 +static ULONG 1.56 +SaveTprProcValue(ULONG cpu, ULONG value) 1.57 +{ 1.58 + switch (patch_method) 1.59 + { 1.60 + case PATCH_METHOD_LOCK_MOVE_CR0: 1.61 + case PATCH_METHOD_CACHED_TPR: 1.62 + tpr_cache[cpu] = value; 1.63 + break; 1.64 + case PATCH_METHOD_MAPPED_VLAPIC: 1.65 + /* no need to save here */ 1.66 + break; 1.67 + } 1.68 + return value; 1.69 +} 1.70 + 1.71 +static ULONG 1.72 +SaveTpr() 1.73 +{ 1.74 + switch (patch_method) 1.75 + { 1.76 + case PATCH_METHOD_LOCK_MOVE_CR0: 1.77 + case PATCH_METHOD_CACHED_TPR: 1.78 + return SaveTprProcValue(KeGetCurrentProcessorNumber(), *(PULONG)LAPIC_TASKPRI); 1.79 + case PATCH_METHOD_MAPPED_VLAPIC: 1.80 + /* no need to save here */ 1.81 + break; 1.82 + } 1.83 + return 0; 1.84 +} 1.85 + 1.86 +/* called with interrupts disabled (via CLI) from an arbitrary location inside HAL.DLL */ 1.87 +static __inline LONG 1.88 +ApicHighestVector(PULONG bitmap) 1.89 +{ 1.90 + int i; 1.91 + ULONG bit; 1.92 + ULONG value; 1.93 + for (i = 0; i < 8; i++) 1.94 + { 1.95 + value = bitmap[(7 - i) * 4]; 1.96 + if (value) 1.97 + { 1.98 + _BitScanReverse(&bit, value); 1.99 + return ((7 - i) << 5) | bit; 1.100 + } 1.101 + } 1.102 + return -1; 1.103 +} 1.104 + 1.105 +/* called with interrupts disabled (via CLI) from an arbitrary location inside HAL.DLL */ 1.106 +VOID 1.107 +WriteTpr(ULONG new_tpr_value) 1.108 +{ 1.109 + LONG ISR; 1.110 + LONG IRR; 1.111 + ULONG cpu = KeGetCurrentProcessorNumber(); 1.112 + 1.113 + switch (patch_method) 1.114 + { 1.115 + case PATCH_METHOD_LOCK_MOVE_CR0: 1.116 + tpr_cache[cpu] = new_tpr_value; 1.117 + __asm { 1.118 + mov eax, new_tpr_value; 1.119 + shr eax, 4; 1.120 + lock mov cr0, eax; /* this is actually mov cr8, eax */ 1.121 + } 1.122 + break; 1.123 + case PATCH_METHOD_CACHED_TPR: 1.124 + if (new_tpr_value != tpr_cache[cpu]) 1.125 + { 1.126 + *(PULONG)LAPIC_TASKPRI = new_tpr_value; 1.127 + tpr_cache[cpu] = new_tpr_value; 1.128 + } 1.129 + break; 1.130 + case PATCH_METHOD_MAPPED_VLAPIC: 1.131 + /* need to set the new tpr value and then check for pending interrupts to avoid a race */ 1.132 + *(PULONG)((PUCHAR)lapic[cpu] + 0x80) = new_tpr_value & 0xff; 1.133 + KeMemoryBarrier(); 1.134 + IRR = ApicHighestVector((PULONG)((PUCHAR)lapic[cpu] + 0x200)); 1.135 + if (IRR == -1) 1.136 + return; 1.137 + ISR = ApicHighestVector((PULONG)((PUCHAR)lapic[cpu] + 0x100)); 1.138 + if (ISR == -1) 1.139 + ISR = 0; 1.140 + if ((ULONG)(IRR >> 4) > max((ULONG)(ISR >> 4), ((new_tpr_value & 0xf0) >> 4))) 1.141 + *(PULONG)LAPIC_TASKPRI = new_tpr_value; 1.142 + break; 1.143 + } 1.144 +} 1.145 + 1.146 +/* called with interrupts disabled (via CLI) from an arbitrary location inside HAL.DLL */ 1.147 +ULONG 1.148 +ReadTpr() 1.149 +{ 1.150 + switch (patch_method) 1.151 + { 1.152 + case PATCH_METHOD_LOCK_MOVE_CR0: 1.153 + case PATCH_METHOD_CACHED_TPR: 1.154 + return tpr_cache[KeGetCurrentProcessorNumber()]; 1.155 + case PATCH_METHOD_MAPPED_VLAPIC: 1.156 + return *(PULONG)((PUCHAR)lapic[KeGetCurrentProcessorNumber()] + 0x80); 1.157 + default: 1.158 + return 0; 1.159 + } 1.160 +} 1.161 + 1.162 +static __inline VOID 1.163 +InsertCallRel32(PUCHAR address, ULONG target) 1.164 +{ 1.165 + *address = 0xE8; /* call near */ 1.166 + *(PULONG)(address + 1) = (ULONG)target - ((ULONG)address + 5); 1.167 +} 1.168 + 1.169 +#define PATCH_SIZE 10 1.170 + 1.171 +typedef struct { 1.172 + ULONG patch_type; 1.173 + ULONG match_size; 1.174 + ULONG function; 1.175 + UCHAR bytes[PATCH_SIZE]; 1.176 +} patch_t; 1.177 + 1.178 +#define PATCH_NONE 0 1.179 +#define PATCH_1B4 1 /* 1 byte opcode with 4 bytes of data - replace with call function */ 1.180 +#define PATCH_2B4 2 /* 2 byte opcode with 4 bytes of data - replace with nop + call function*/ 1.181 +#define PATCH_2B5 3 /* 2 byte opcode with 1 + 4 bytes of data - replace with nop + nop + call function */ 1.182 +#define PATCH_2B8 4 /* 2 byte opcode with 4 + 4 bytes of data - replace with push const + call function*/ 1.183 + 1.184 +static patch_t patches[] = 1.185 +{ 1.186 + { PATCH_1B4, 5, (ULONG)MoveTprToEax, { 0xa1, TPR_BYTES } }, 1.187 + { PATCH_2B4, 6, (ULONG)MoveTprToEcx, { 0x8b, 0x0d, TPR_BYTES } }, 1.188 + { PATCH_2B4, 6, (ULONG)MoveTprToEdx, { 0x8b, 0x15, TPR_BYTES } }, 1.189 + { PATCH_2B4, 6, (ULONG)MoveTprToEsi, { 0x8b, 0x35, TPR_BYTES } }, 1.190 + { PATCH_2B4, 6, (ULONG)PushTpr, { 0xff, 0x35, TPR_BYTES } }, 1.191 + { PATCH_1B4, 5, (ULONG)MoveEaxToTpr, { 0xa3, TPR_BYTES } }, 1.192 + { PATCH_2B4, 6, (ULONG)MoveEbxToTpr, { 0x89, 0x1D, TPR_BYTES } }, 1.193 + { PATCH_2B4, 6, (ULONG)MoveEcxToTpr, { 0x89, 0x0D, TPR_BYTES } }, 1.194 + { PATCH_2B4, 6, (ULONG)MoveEdxToTpr, { 0x89, 0x15, TPR_BYTES } }, 1.195 + { PATCH_2B4, 6, (ULONG)MoveEsiToTpr, { 0x89, 0x35, TPR_BYTES } }, 1.196 + { PATCH_2B8, 6, (ULONG)MoveConstToTpr, { 0xC7, 0x05, TPR_BYTES } }, /* + another 4 bytes of const */ 1.197 + { PATCH_2B5, 7, (ULONG)MoveZeroToTpr, { 0x83, 0x25, TPR_BYTES, 0 } }, 1.198 + { PATCH_NONE, 0, 0, { 0 } } 1.199 +}; 1.200 + 1.201 +static BOOLEAN 1.202 +XenPci_TestAndPatchInstruction(PVOID address) 1.203 +{ 1.204 + PUCHAR instruction = address; 1.205 + ULONG i; 1.206 + /* don't declare patches[] on the stack - windows gets grumpy if we allocate too much space on the stack at HIGH_LEVEL */ 1.207 + 1.208 + for (i = 0; patches[i].patch_type != PATCH_NONE; i++) 1.209 + { 1.210 + if (memcmp(address, patches[i].bytes, patches[i].match_size) == 0) 1.211 + break; 1.212 + } 1.213 + if (patches[i].patch_type == PATCH_NONE) 1.214 + return FALSE; 1.215 + 1.216 + switch (patches[i].patch_type) 1.217 + { 1.218 + case PATCH_1B4: 1.219 + InsertCallRel32(instruction + 0, patches[i].function); 1.220 + break; 1.221 + case PATCH_2B4: 1.222 + *(instruction + 0) = 0x90; /* nop */ 1.223 + InsertCallRel32(instruction + 1, patches[i].function); 1.224 + break; 1.225 + case PATCH_2B8: 1.226 + *(instruction + 0) = 0x68; /* push value */ 1.227 + *(PULONG)(instruction + 1) = *(PULONG)(instruction + 6); 1.228 + InsertCallRel32(instruction + 5, patches[i].function); 1.229 + break; 1.230 + case PATCH_2B5: 1.231 + *(instruction + 0) = 0x90; /* nop */ 1.232 + *(instruction + 1) = 0x90; /* nop */ 1.233 + InsertCallRel32(instruction + 2, patches[i].function); 1.234 + break; 1.235 + default: 1.236 + /* wtf? */ 1.237 + break; 1.238 + } 1.239 + return TRUE; 1.240 +} 1.241 + 1.242 +typedef struct { 1.243 + PVOID base; 1.244 + ULONG length; 1.245 +} patch_info_t; 1.246 + 1.247 +static PVOID patch_positions[256]; 1.248 +static PVOID potential_patch_positions[256]; 1.249 + 1.250 +static VOID 1.251 +XenPci_DoPatchKernel0(PVOID context) 1.252 +{ 1.253 + patch_info_t *pi = context; 1.254 + ULONG i; 1.255 + ULONG high_level_tpr; 1.256 + ULONG patch_position_index = 0; 1.257 + ULONG potential_patch_position_index = 0; 1.258 + 1.259 + FUNCTION_ENTER(); 1.260 + 1.261 + high_level_tpr = SaveTpr(); 1.262 + 1.263 + /* we know all the other CPUs are at HIGH_LEVEL so set them all to the same as cpu 0 */ 1.264 + for (i = 1; i < MAX_VIRT_CPUS; i++) 1.265 + SaveTprProcValue(i, high_level_tpr); 1.266 + 1.267 + /* we can't use KdPrint while patching as it may involve the TPR while we are patching it */ 1.268 + for (i = 0; i < pi->length; i++) 1.269 + { 1.270 + if (XenPci_TestAndPatchInstruction((PUCHAR)pi->base + i)) 1.271 + { 1.272 + patch_positions[patch_position_index++] = (PUCHAR)pi->base + i; 1.273 + } 1.274 + else if (*(PULONG)((PUCHAR)pi->base + i) == LAPIC_TASKPRI) 1.275 + { 1.276 + potential_patch_positions[potential_patch_position_index++] = (PUCHAR)pi->base + i; 1.277 + } 1.278 + } 1.279 + for (i = 0; i < patch_position_index; i++) 1.280 + KdPrint((__DRIVER_NAME " Patch added at %p\n", patch_positions[i])); 1.281 + 1.282 + for (i = 0; i < potential_patch_position_index; i++) 1.283 + KdPrint((__DRIVER_NAME " Unpatch TPR address found at %p\n", potential_patch_positions[i])); 1.284 + 1.285 + FUNCTION_EXIT(); 1.286 +} 1.287 + 1.288 +static VOID 1.289 +XenPci_DoPatchKernelN(PVOID context) 1.290 +{ 1.291 + UNREFERENCED_PARAMETER(context); 1.292 + 1.293 + FUNCTION_ENTER(); 1.294 + 1.295 + FUNCTION_EXIT(); 1.296 +} 1.297 + 1.298 +static BOOLEAN 1.299 +IsMoveCr8Supported() 1.300 +{ 1.301 + DWORD32 cpuid_output[4]; 1.302 + 1.303 + __cpuid(cpuid_output, 0x80000001UL); 1.304 + if (cpuid_output[2] & CPUID_ALTMOVCR8) 1.305 + return TRUE; 1.306 + else 1.307 + return FALSE; 1.308 +} 1.309 + 1.310 +static ULONG 1.311 +MapVlapic(PXENPCI_DEVICE_DATA xpdd) 1.312 +{ 1.313 + struct xen_add_to_physmap xatp; 1.314 + ULONG rc = EINVAL; 1.315 + int i; 1.316 + 1.317 + FUNCTION_ENTER(); 1.318 + 1.319 + for (i = 0; i < KeNumberProcessors; i++) 1.320 + { 1.321 + KdPrint((__DRIVER_NAME " mapping lapic for cpu = %d\n", i)); 1.322 + 1.323 + lapic_page[i] = XenPci_AllocMMIO(xpdd, PAGE_SIZE); 1.324 + lapic[i] = MmMapIoSpace(lapic_page[i], PAGE_SIZE, MmCached); 1.325 + 1.326 + xatp.domid = DOMID_SELF; 1.327 + xatp.idx = i; 1.328 + xatp.space = XENMAPSPACE_vlapic; 1.329 + xatp.gpfn = (xen_pfn_t)(lapic_page[i].QuadPart >> PAGE_SHIFT); 1.330 + KdPrint((__DRIVER_NAME " gpfn = %x\n", xatp.gpfn)); 1.331 + rc = HYPERVISOR_memory_op(xpdd, XENMEM_add_to_physmap, &xatp); 1.332 + KdPrint((__DRIVER_NAME " hypervisor memory op (XENMAPSPACE_vlapic_regs) ret = %d\n", rc)); 1.333 + if (rc != 0) 1.334 + { 1.335 + FUNCTION_EXIT(); 1.336 + return rc; 1.337 + } 1.338 + } 1.339 + FUNCTION_EXIT(); 1.340 + 1.341 + return rc; 1.342 +} 1.343 + 1.344 +VOID 1.345 +XenPci_PatchKernel(PXENPCI_DEVICE_DATA xpdd, PVOID base, ULONG length) 1.346 +{ 1.347 + patch_info_t patch_info; 1.348 + ULONG rc; 1.349 + RTL_OSVERSIONINFOEXW version_info; 1.350 + 1.351 + FUNCTION_ENTER(); 1.352 + 1.353 + version_info.dwOSVersionInfoSize = sizeof(RTL_OSVERSIONINFOEXW); 1.354 + RtlGetVersion((PRTL_OSVERSIONINFOW)&version_info); 1.355 + if (version_info.dwMajorVersion >= 6) 1.356 + { 1.357 + KdPrint((__DRIVER_NAME " Vista or newer - no need for patch\n")); 1.358 + return; 1.359 + } 1.360 + if (version_info.dwMajorVersion == 5 1.361 + && version_info.dwMinorVersion > 2) 1.362 + { 1.363 + KdPrint((__DRIVER_NAME " Windows 2003 sp2 or newer - no need for patch\n")); 1.364 + return; 1.365 + } 1.366 + if (version_info.dwMajorVersion == 5 1.367 + && version_info.dwMinorVersion == 2 1.368 + && version_info.wServicePackMajor >= 2) 1.369 + { 1.370 + KdPrint((__DRIVER_NAME " Windows 2003 sp2 or newer - no need for patch\n")); 1.371 + return; 1.372 + } 1.373 + 1.374 + if (IsMoveCr8Supported()) 1.375 + { 1.376 + KdPrint((__DRIVER_NAME " Using LOCK MOVE CR0 TPR patch\n")); 1.377 + patch_method = PATCH_METHOD_LOCK_MOVE_CR0; 1.378 + } 1.379 + else 1.380 + { 1.381 + rc = MapVlapic(xpdd); 1.382 + if (rc == EACCES) 1.383 + { 1.384 + KdPrint((__DRIVER_NAME " Xen already using VMX LAPIC acceleration. No patch required\n")); 1.385 + return; 1.386 + } 1.387 + if (!rc) 1.388 + { 1.389 + KdPrint((__DRIVER_NAME " Using mapped vLAPIC TPR patch\n")); 1.390 + patch_method = PATCH_METHOD_MAPPED_VLAPIC; 1.391 + } 1.392 + else 1.393 + { 1.394 + KdPrint((__DRIVER_NAME " Using cached TPR patch\n")); 1.395 + patch_method = PATCH_METHOD_CACHED_TPR; 1.396 + } 1.397 + } 1.398 + patch_info.base = base; 1.399 + patch_info.length = length; 1.400 + 1.401 + XenPci_HighSync(XenPci_DoPatchKernel0, XenPci_DoPatchKernelN, &patch_info); 1.402 + 1.403 + FUNCTION_EXIT(); 1.404 +} 1.405 + 1.406 +#else 1.407 + 1.408 +VOID 1.409 +XenPci_PatchKernel(PXENPCI_DEVICE_DATA xpdd, PVOID base, ULONG length) 1.410 +{ 1.411 + UNREFERENCED_PARAMETER(xpdd); 1.412 + UNREFERENCED_PARAMETER(base); 1.413 + UNREFERENCED_PARAMETER(length); 1.414 +} 1.415 + 1.416 +#endif