win-pvdrivers

view xenpci/xenpci_patch_kernel.c @ 1059:6f69b45af0fb

Mask CPU with 0xff as high bits can contain info. Fix vlapic detection. Tidy up.
author James Harper <james.harper@bendigoit.com.au>
date Tue Oct 01 11:42:11 2013 +1000 (2013-10-01)
parents 471c94d04d8a
children 1d6eae98e552
line source
1 /*
2 PV Drivers for Windows Xen HVM Domains
3 Copyright (C) 2007 James Harper
4 Inspired by amdvopt by Travis Betak
6 This program is free software; you can redistribute it and/or
7 modify it under the terms of the GNU General Public License
8 as published by the Free Software Foundation; either version 2
9 of the License, or (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
21 #include "xenpci.h"
23 #if defined(_X86_)
25 /* Is LOCK MOV CR0 available? */
26 #define CPUID_ALTMOVCR8 (1UL << 4)
27 /* Task priority register address */
28 #define LAPIC_TASKPRI 0xFFFE0080
29 #define TPR_BYTES 0x80, 0x00, 0xfe, 0xff
31 extern VOID MoveTprToEax(VOID);
32 extern VOID MoveTprToEcx(VOID);
33 extern VOID MoveTprToEdx(VOID);
34 extern VOID MoveTprToEsi(VOID);
35 extern VOID PushTpr(VOID);
36 extern VOID MoveEaxToTpr(VOID);
37 extern VOID MoveEbxToTpr(VOID);
38 extern VOID MoveEcxToTpr(VOID);
39 extern VOID MoveEdxToTpr(VOID);
40 extern VOID MoveEsiToTpr(VOID);
41 extern VOID MoveConstToTpr(ULONG new_tpr_value);
42 extern VOID MoveZeroToTpr(VOID);
44 static PHYSICAL_ADDRESS lapic_page[MAX_VIRT_CPUS];
45 static volatile PVOID lapic[MAX_VIRT_CPUS];
46 static ULONG tpr_cache[MAX_VIRT_CPUS];
47 #define PATCH_METHOD_LOCK_MOVE_CR0 1
48 #define PATCH_METHOD_MAPPED_VLAPIC 2
49 #define PATCH_METHOD_CACHED_TPR 3
50 static ULONG patch_method;
52 static ULONG
53 SaveTprProcValue(ULONG cpu, ULONG value) {
54 switch (patch_method) {
55 case PATCH_METHOD_LOCK_MOVE_CR0:
56 case PATCH_METHOD_CACHED_TPR:
57 tpr_cache[cpu] = value;
58 break;
59 case PATCH_METHOD_MAPPED_VLAPIC:
60 /* no need to save here */
61 break;
62 }
63 return value;
64 }
66 static ULONG
67 SaveTpr() {
68 ULONG cpu = KeGetCurrentProcessorNumber() & 0xff;
70 switch (patch_method) {
71 case PATCH_METHOD_LOCK_MOVE_CR0:
72 case PATCH_METHOD_CACHED_TPR:
73 return SaveTprProcValue(cpu, *(PULONG)LAPIC_TASKPRI);
74 case PATCH_METHOD_MAPPED_VLAPIC:
75 /* no need to save here */
76 break;
77 }
78 return 0;
79 }
81 /* called with interrupts disabled (via CLI) from an arbitrary location inside HAL.DLL */
82 static __inline LONG
83 ApicHighestVector(PULONG bitmap) {
84 int i;
85 ULONG bit;
86 ULONG value;
87 for (i = 0; i < 8; i++) {
88 value = bitmap[(7 - i) * 4];
89 if (value) {
90 _BitScanReverse(&bit, value);
91 return ((7 - i) << 5) | bit;
92 }
93 }
94 return -1;
95 }
97 /* called with interrupts disabled (via CLI) from an arbitrary location inside HAL.DLL */
98 VOID
99 WriteTpr(ULONG new_tpr_value) {
100 LONG ISR;
101 LONG IRR;
102 ULONG cpu = KeGetCurrentProcessorNumber() & 0xff;
104 switch (patch_method) {
105 case PATCH_METHOD_LOCK_MOVE_CR0:
106 tpr_cache[cpu] = new_tpr_value;
107 __asm {
108 mov eax, new_tpr_value;
109 shr eax, 4;
110 lock mov cr0, eax; /* this is actually mov cr8, eax */
111 }
112 break;
113 case PATCH_METHOD_CACHED_TPR:
114 if (new_tpr_value != tpr_cache[cpu]) {
115 *(PULONG)LAPIC_TASKPRI = new_tpr_value;
116 tpr_cache[cpu] = new_tpr_value;
117 }
118 break;
119 case PATCH_METHOD_MAPPED_VLAPIC:
120 /* need to set the new tpr value and then check for pending interrupts to avoid a race */
121 *(PULONG)((PUCHAR)lapic[cpu] + 0x80) = new_tpr_value & 0xff;
122 KeMemoryBarrier();
123 IRR = ApicHighestVector((PULONG)((PUCHAR)lapic[cpu] + 0x200));
124 if (IRR == -1)
125 return;
126 ISR = ApicHighestVector((PULONG)((PUCHAR)lapic[cpu] + 0x100));
127 if (ISR == -1)
128 ISR = 0;
129 if ((ULONG)(IRR >> 4) > max((ULONG)(ISR >> 4), ((new_tpr_value & 0xf0) >> 4)))
130 *(PULONG)LAPIC_TASKPRI = new_tpr_value;
131 break;
132 }
133 }
135 /* called with interrupts disabled (via CLI) from an arbitrary location inside HAL.DLL */
136 ULONG
137 ReadTpr() {
138 ULONG cpu = KeGetCurrentProcessorNumber() & 0xff;
140 switch (patch_method) {
141 case PATCH_METHOD_LOCK_MOVE_CR0:
142 case PATCH_METHOD_CACHED_TPR:
143 return tpr_cache[cpu];
144 case PATCH_METHOD_MAPPED_VLAPIC:
145 return *(PULONG)((PUCHAR)lapic[cpu] + 0x80);
146 default:
147 return 0;
148 }
149 }
151 static __inline VOID
152 InsertCallRel32(PUCHAR address, ULONG target) {
153 *address = 0xE8; /* call near */
154 *(PULONG)(address + 1) = (ULONG)target - ((ULONG)address + 5);
155 }
157 #define PATCH_SIZE 10
159 typedef struct {
160 ULONG patch_type;
161 ULONG match_size;
162 ULONG function;
163 UCHAR bytes[PATCH_SIZE];
164 } patch_t;
166 #define PATCH_NONE 0
167 #define PATCH_1B4 1 /* 1 byte opcode with 4 bytes of data - replace with call function */
168 #define PATCH_2B4 2 /* 2 byte opcode with 4 bytes of data - replace with nop + call function*/
169 #define PATCH_2B5 3 /* 2 byte opcode with 1 + 4 bytes of data - replace with nop + nop + call function */
170 #define PATCH_2B8 4 /* 2 byte opcode with 4 + 4 bytes of data - replace with push const + call function*/
172 static patch_t patches[] = {
173 { PATCH_1B4, 5, (ULONG)MoveTprToEax, { 0xa1, TPR_BYTES } },
174 { PATCH_2B4, 6, (ULONG)MoveTprToEcx, { 0x8b, 0x0d, TPR_BYTES } },
175 { PATCH_2B4, 6, (ULONG)MoveTprToEdx, { 0x8b, 0x15, TPR_BYTES } },
176 { PATCH_2B4, 6, (ULONG)MoveTprToEsi, { 0x8b, 0x35, TPR_BYTES } },
177 { PATCH_2B4, 6, (ULONG)PushTpr, { 0xff, 0x35, TPR_BYTES } },
178 { PATCH_1B4, 5, (ULONG)MoveEaxToTpr, { 0xa3, TPR_BYTES } },
179 { PATCH_2B4, 6, (ULONG)MoveEbxToTpr, { 0x89, 0x1D, TPR_BYTES } },
180 { PATCH_2B4, 6, (ULONG)MoveEcxToTpr, { 0x89, 0x0D, TPR_BYTES } },
181 { PATCH_2B4, 6, (ULONG)MoveEdxToTpr, { 0x89, 0x15, TPR_BYTES } },
182 { PATCH_2B4, 6, (ULONG)MoveEsiToTpr, { 0x89, 0x35, TPR_BYTES } },
183 { PATCH_2B8, 6, (ULONG)MoveConstToTpr, { 0xC7, 0x05, TPR_BYTES } }, /* + another 4 bytes of const */
184 { PATCH_2B5, 7, (ULONG)MoveZeroToTpr, { 0x83, 0x25, TPR_BYTES, 0 } },
185 { PATCH_NONE, 0, 0, { 0 } }
186 };
188 static BOOLEAN
189 XenPci_TestAndPatchInstruction(PVOID address) {
190 PUCHAR instruction = address;
191 ULONG i;
192 /* don't declare patches[] on the stack - windows gets grumpy if we allocate too much space on the stack at HIGH_LEVEL */
194 for (i = 0; patches[i].patch_type != PATCH_NONE; i++) {
195 if (memcmp(address, patches[i].bytes, patches[i].match_size) == 0)
196 break;
197 }
199 switch (patches[i].patch_type) {
200 case PATCH_1B4:
201 InsertCallRel32(instruction + 0, patches[i].function);
202 break;
203 case PATCH_2B4:
204 *(instruction + 0) = 0x90; /* nop */
205 InsertCallRel32(instruction + 1, patches[i].function);
206 break;
207 case PATCH_2B8:
208 *(instruction + 0) = 0x68; /* push value */
209 *(PULONG)(instruction + 1) = *(PULONG)(instruction + 6);
210 InsertCallRel32(instruction + 5, patches[i].function);
211 break;
212 case PATCH_2B5:
213 *(instruction + 0) = 0x90; /* nop */
214 *(instruction + 1) = 0x90; /* nop */
215 InsertCallRel32(instruction + 2, patches[i].function);
216 break;
217 default:
218 return FALSE;
219 }
220 return TRUE;
221 }
223 typedef struct {
224 PVOID base;
225 ULONG length;
226 } patch_info_t;
228 static PVOID patch_positions[256];
229 static PVOID potential_patch_positions[256];
231 static VOID
232 XenPci_DoPatchKernel0(PVOID context) {
233 patch_info_t *pi = context;
234 ULONG i;
235 ULONG high_level_tpr;
236 ULONG patch_position_index = 0;
237 ULONG potential_patch_position_index = 0;
239 FUNCTION_ENTER();
241 high_level_tpr = SaveTpr();
242 /* we know all the other CPUs are at HIGH_LEVEL so set them all to the same as cpu 0 */
243 for (i = 1; i < MAX_VIRT_CPUS; i++)
244 SaveTprProcValue(i, high_level_tpr);
246 /* we can't use KdPrint while patching as it may involve the TPR while we are patching it */
247 for (i = 0; i < pi->length; i++) {
248 if (XenPci_TestAndPatchInstruction((PUCHAR)pi->base + i)) {
249 patch_positions[patch_position_index++] = (PUCHAR)pi->base + i;
250 } else if (*(PULONG)((PUCHAR)pi->base + i) == LAPIC_TASKPRI) {
251 potential_patch_positions[potential_patch_position_index++] = (PUCHAR)pi->base + i;
252 }
253 }
255 for (i = 0; i < patch_position_index; i++)
256 FUNCTION_MSG("Patch added at %p\n", patch_positions[i]);
258 for (i = 0; i < potential_patch_position_index; i++)
259 FUNCTION_MSG("Unpatch TPR address found at %p\n", potential_patch_positions[i]);
261 FUNCTION_EXIT();
262 }
264 static VOID
265 XenPci_DoPatchKernelN(PVOID context) {
266 UNREFERENCED_PARAMETER(context);
268 FUNCTION_ENTER();
270 FUNCTION_EXIT();
271 }
273 static BOOLEAN
274 IsMoveCr8Supported() {
275 DWORD32 cpuid_output[4];
277 __cpuid(cpuid_output, 0x80000001UL);
278 if (cpuid_output[2] & CPUID_ALTMOVCR8)
279 return TRUE;
280 else
281 return FALSE;
282 }
284 static ULONG
285 MapVlapic(PXENPCI_DEVICE_DATA xpdd) {
286 struct xen_add_to_physmap xatp;
287 ULONG rc = EINVAL;
288 ULONG ActiveProcessorCount;
289 int i;
291 FUNCTION_ENTER();
293 #if (NTDDI_VERSION >= NTDDI_WINXP)
294 ActiveProcessorCount = (ULONG)KeNumberProcessors;
295 #else
296 ActiveProcessorCount = (ULONG)*KeNumberProcessors;
297 #endif
299 for (i = 0; i < (int)ActiveProcessorCount; i++) {
300 FUNCTION_MSG("mapping lapic for cpu = %d\n", i);
302 lapic_page[i] = XenPci_AllocMMIO(xpdd, PAGE_SIZE);
303 lapic[i] = MmMapIoSpace(lapic_page[i], PAGE_SIZE, MmCached);
305 xatp.domid = DOMID_SELF;
306 xatp.idx = i;
307 xatp.space = XENMAPSPACE_vlapic;
308 xatp.gpfn = (xen_pfn_t)(lapic_page[i].QuadPart >> PAGE_SHIFT);
309 FUNCTION_MSG("gpfn = %x\n", xatp.gpfn);
310 rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp);
311 FUNCTION_MSG("hypervisor memory op (XENMAPSPACE_vlapic_regs) ret = %d\n", rc);
312 if (rc != 0) {
313 FUNCTION_EXIT();
314 return rc;
315 }
316 }
317 FUNCTION_EXIT();
319 return rc;
320 }
322 VOID
323 XenPci_PatchKernel(PXENPCI_DEVICE_DATA xpdd, PVOID base, ULONG length) {
324 patch_info_t patch_info;
325 ULONG rc;
326 #if (NTDDI_VERSION >= NTDDI_WINXP)
327 RTL_OSVERSIONINFOEXW version_info;
328 #endif
330 FUNCTION_ENTER();
332 /* if we're compiled for 2000 then assume we need patching */
333 #if (NTDDI_VERSION >= NTDDI_WINXP)
334 version_info.dwOSVersionInfoSize = sizeof(RTL_OSVERSIONINFOEXW);
336 RtlGetVersion((PRTL_OSVERSIONINFOW)&version_info);
337 if (version_info.dwMajorVersion >= 6) {
338 FUNCTION_MSG("Vista or newer - no need for patch\n");
339 return;
340 }
341 if (version_info.dwMajorVersion == 5
342 && version_info.dwMinorVersion > 2) {
343 FUNCTION_MSG("Windows 2003 sp2 or newer - no need for patch\n");
344 return;
345 }
346 if (version_info.dwMajorVersion == 5
347 && version_info.dwMinorVersion == 2
348 && version_info.wServicePackMajor >= 2) {
349 FUNCTION_MSG("Windows 2003 sp2 or newer - no need for patch\n");
350 return;
351 }
352 #endif
353 if (IsMoveCr8Supported()) {
354 FUNCTION_MSG("Using LOCK MOVE CR0 TPR patch\n");
355 patch_method = PATCH_METHOD_LOCK_MOVE_CR0;
356 } else {
357 rc = MapVlapic(xpdd);
358 if (rc == -EACCES) {
359 FUNCTION_MSG("Xen already using VMX LAPIC acceleration. No patch required\n");
360 return;
361 }
362 if (!rc) {
363 FUNCTION_MSG("Using mapped vLAPIC TPR patch\n");
364 patch_method = PATCH_METHOD_MAPPED_VLAPIC;
365 } else {
366 FUNCTION_MSG("Using cached TPR patch\n");
367 patch_method = PATCH_METHOD_CACHED_TPR;
368 }
369 }
370 patch_info.base = base;
371 patch_info.length = length;
373 XenPci_HighSync(XenPci_DoPatchKernel0, XenPci_DoPatchKernelN, &patch_info);
375 xpdd->removable = FALSE;
377 FUNCTION_EXIT();
378 }
380 #else
382 VOID
383 XenPci_PatchKernel(PXENPCI_DEVICE_DATA xpdd, PVOID base, ULONG length) {
384 UNREFERENCED_PARAMETER(xpdd);
385 UNREFERENCED_PARAMETER(base);
386 UNREFERENCED_PARAMETER(length);
387 }
389 #endif