ia64/xen-unstable

view xen/arch/x86/vmx_io.c @ 6538:84ee014ebd41

Merge xen-vtx-unstable.hg
author adsharma@los-vmm.sc.intel.com
date Wed Aug 17 12:34:38 2005 -0800 (2005-08-17)
parents 23979fb12c49 f294acb25858
children 99914b54f7bf
line source
1 /*
2 * vmx_io.c: handling I/O, interrupts related VMX entry/exit
3 * Copyright (c) 2004, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
16 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 *
18 */
19 #include <xen/config.h>
20 #include <xen/init.h>
21 #include <xen/mm.h>
22 #include <xen/lib.h>
23 #include <xen/errno.h>
24 #include <xen/trace.h>
25 #include <xen/event.h>
27 #include <asm/current.h>
28 #include <asm/cpufeature.h>
29 #include <asm/processor.h>
30 #include <asm/msr.h>
31 #include <asm/vmx.h>
32 #include <asm/vmx_vmcs.h>
33 #include <asm/vmx_platform.h>
34 #include <asm/vmx_virpit.h>
35 #include <asm/apic.h>
37 #include <public/io/ioreq.h>
38 #include <public/io/vmx_vlapic.h>
40 #ifdef CONFIG_VMX
41 #if defined (__i386__)
42 void load_cpu_user_regs(struct cpu_user_regs *regs)
43 {
44 /*
45 * Write the guest register value into VMCS
46 */
47 __vmwrite(GUEST_SS_SELECTOR, regs->ss);
48 __vmwrite(GUEST_RSP, regs->esp);
50 __vmwrite(GUEST_RFLAGS, regs->eflags);
51 if (regs->eflags & EF_TF)
52 __vm_set_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB);
53 else
54 __vm_clear_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB);
56 __vmwrite(GUEST_CS_SELECTOR, regs->cs);
57 __vmwrite(GUEST_RIP, regs->eip);
58 }
60 static void set_reg_value (int size, int index, int seg, struct cpu_user_regs *regs, long value)
61 {
62 switch (size) {
63 case BYTE:
64 switch (index) {
65 case 0:
66 regs->eax &= 0xFFFFFF00;
67 regs->eax |= (value & 0xFF);
68 break;
69 case 1:
70 regs->ecx &= 0xFFFFFF00;
71 regs->ecx |= (value & 0xFF);
72 break;
73 case 2:
74 regs->edx &= 0xFFFFFF00;
75 regs->edx |= (value & 0xFF);
76 break;
77 case 3:
78 regs->ebx &= 0xFFFFFF00;
79 regs->ebx |= (value & 0xFF);
80 break;
81 case 4:
82 regs->eax &= 0xFFFF00FF;
83 regs->eax |= ((value & 0xFF) << 8);
84 break;
85 case 5:
86 regs->ecx &= 0xFFFF00FF;
87 regs->ecx |= ((value & 0xFF) << 8);
88 break;
89 case 6:
90 regs->edx &= 0xFFFF00FF;
91 regs->edx |= ((value & 0xFF) << 8);
92 break;
93 case 7:
94 regs->ebx &= 0xFFFF00FF;
95 regs->ebx |= ((value & 0xFF) << 8);
96 break;
97 default:
98 printk("Error: size:%x, index:%x are invalid!\n", size, index);
99 domain_crash_synchronous();
100 break;
102 }
103 break;
104 case WORD:
105 switch (index) {
106 case 0:
107 regs->eax &= 0xFFFF0000;
108 regs->eax |= (value & 0xFFFF);
109 break;
110 case 1:
111 regs->ecx &= 0xFFFF0000;
112 regs->ecx |= (value & 0xFFFF);
113 break;
114 case 2:
115 regs->edx &= 0xFFFF0000;
116 regs->edx |= (value & 0xFFFF);
117 break;
118 case 3:
119 regs->ebx &= 0xFFFF0000;
120 regs->ebx |= (value & 0xFFFF);
121 break;
122 case 4:
123 regs->esp &= 0xFFFF0000;
124 regs->esp |= (value & 0xFFFF);
125 break;
127 case 5:
128 regs->ebp &= 0xFFFF0000;
129 regs->ebp |= (value & 0xFFFF);
130 break;
131 case 6:
132 regs->esi &= 0xFFFF0000;
133 regs->esi |= (value & 0xFFFF);
134 break;
135 case 7:
136 regs->edi &= 0xFFFF0000;
137 regs->edi |= (value & 0xFFFF);
138 break;
139 default:
140 printk("Error: size:%x, index:%x are invalid!\n", size, index);
141 domain_crash_synchronous();
142 break;
143 }
144 break;
145 case LONG:
146 switch (index) {
147 case 0:
148 regs->eax = value;
149 break;
150 case 1:
151 regs->ecx = value;
152 break;
153 case 2:
154 regs->edx = value;
155 break;
156 case 3:
157 regs->ebx = value;
158 break;
159 case 4:
160 regs->esp = value;
161 break;
162 case 5:
163 regs->ebp = value;
164 break;
165 case 6:
166 regs->esi = value;
167 break;
168 case 7:
169 regs->edi = value;
170 break;
171 default:
172 printk("Error: size:%x, index:%x are invalid!\n", size, index);
173 domain_crash_synchronous();
174 break;
175 }
176 break;
177 default:
178 printk("Error: size:%x, index:%x are invalid!\n", size, index);
179 domain_crash_synchronous();
180 break;
181 }
182 }
183 #else
184 void load_cpu_user_regs(struct cpu_user_regs *regs)
185 {
186 __vmwrite(GUEST_SS_SELECTOR, regs->ss);
187 __vmwrite(GUEST_RSP, regs->rsp);
189 __vmwrite(GUEST_RFLAGS, regs->rflags);
190 if (regs->rflags & EF_TF)
191 __vm_set_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB);
192 else
193 __vm_clear_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB);
195 __vmwrite(GUEST_CS_SELECTOR, regs->cs);
196 __vmwrite(GUEST_RIP, regs->rip);
197 }
199 static inline void __set_reg_value(unsigned long *reg, int size, long value)
200 {
201 switch (size) {
202 case BYTE_64:
203 *reg &= ~0xFF;
204 *reg |= (value & 0xFF);
205 break;
206 case WORD:
207 *reg &= ~0xFFFF;
208 *reg |= (value & 0xFFFF);
209 break;
211 case LONG:
212 *reg &= ~0xFFFFFFFF;
213 *reg |= (value & 0xFFFFFFFF);
214 break;
215 case QUAD:
216 *reg = value;
217 break;
218 default:
219 printk("Error: <__set_reg_value> : Unknown size for register\n");
220 domain_crash_synchronous();
221 }
222 }
224 static void set_reg_value (int size, int index, int seg, struct cpu_user_regs *regs, long value)
225 {
226 if (size == BYTE) {
227 switch (index) {
228 case 0:
229 regs->rax &= ~0xFF;
230 regs->rax |= (value & 0xFF);
231 break;
232 case 1:
233 regs->rcx &= ~0xFF;
234 regs->rcx |= (value & 0xFF);
235 break;
236 case 2:
237 regs->rdx &= ~0xFF;
238 regs->rdx |= (value & 0xFF);
239 break;
240 case 3:
241 regs->rbx &= ~0xFF;
242 regs->rbx |= (value & 0xFF);
243 break;
244 case 4:
245 regs->rax &= 0xFFFFFFFFFFFF00FF;
246 regs->rax |= ((value & 0xFF) << 8);
247 break;
248 case 5:
249 regs->rcx &= 0xFFFFFFFFFFFF00FF;
250 regs->rcx |= ((value & 0xFF) << 8);
251 break;
252 case 6:
253 regs->rdx &= 0xFFFFFFFFFFFF00FF;
254 regs->rdx |= ((value & 0xFF) << 8);
255 break;
256 case 7:
257 regs->rbx &= 0xFFFFFFFFFFFF00FF;
258 regs->rbx |= ((value & 0xFF) << 8);
259 break;
260 default:
261 printk("Error: size:%x, index:%x are invalid!\n", size, index);
262 domain_crash_synchronous();
263 break;
264 }
266 }
268 switch (index) {
269 case 0:
270 __set_reg_value(&regs->rax, size, value);
271 break;
272 case 1:
273 __set_reg_value(&regs->rcx, size, value);
274 break;
275 case 2:
276 __set_reg_value(&regs->rdx, size, value);
277 break;
278 case 3:
279 __set_reg_value(&regs->rbx, size, value);
280 break;
281 case 4:
282 __set_reg_value(&regs->rsp, size, value);
283 break;
284 case 5:
285 __set_reg_value(&regs->rbp, size, value);
286 break;
287 case 6:
288 __set_reg_value(&regs->rsi, size, value);
289 break;
290 case 7:
291 __set_reg_value(&regs->rdi, size, value);
292 break;
293 case 8:
294 __set_reg_value(&regs->r8, size, value);
295 break;
296 case 9:
297 __set_reg_value(&regs->r9, size, value);
298 break;
299 case 10:
300 __set_reg_value(&regs->r10, size, value);
301 break;
302 case 11:
303 __set_reg_value(&regs->r11, size, value);
304 break;
305 case 12:
306 __set_reg_value(&regs->r12, size, value);
307 break;
308 case 13:
309 __set_reg_value(&regs->r13, size, value);
310 break;
311 case 14:
312 __set_reg_value(&regs->r14, size, value);
313 break;
314 case 15:
315 __set_reg_value(&regs->r15, size, value);
316 break;
317 default:
318 printk("Error: <set_reg_value> Invalid index\n");
319 domain_crash_synchronous();
320 }
321 return;
322 }
323 #endif
325 void vmx_io_assist(struct vcpu *v)
326 {
327 vcpu_iodata_t *vio;
328 ioreq_t *p;
329 struct cpu_user_regs *regs = guest_cpu_user_regs();
330 unsigned long old_eax;
331 int sign;
332 struct mi_per_cpu_info *mpci_p;
333 struct cpu_user_regs *inst_decoder_regs;
335 mpci_p = &v->domain->arch.vmx_platform.mpci;
336 inst_decoder_regs = mpci_p->inst_decoder_regs;
338 vio = get_vio(v->domain, v->vcpu_id);
340 if (vio == 0) {
341 VMX_DBG_LOG(DBG_LEVEL_1,
342 "bad shared page: %lx", (unsigned long) vio);
343 domain_crash_synchronous();
344 }
345 p = &vio->vp_ioreq;
347 if (p->state == STATE_IORESP_HOOK){
348 vmx_hooks_assist(v);
349 }
351 /* clear IO wait VMX flag */
352 if (test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags)) {
353 if (p->state != STATE_IORESP_READY) {
354 /* An interrupt send event raced us */
355 return;
356 } else {
357 p->state = STATE_INVALID;
358 }
359 clear_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags);
360 } else {
361 return;
362 }
364 sign = (p->df) ? -1 : 1;
365 if (p->port_mm) {
366 if (p->pdata_valid) {
367 regs->esi += sign * p->count * p->size;
368 regs->edi += sign * p->count * p->size;
369 } else {
370 if (p->dir == IOREQ_WRITE) {
371 return;
372 }
373 int size = -1, index = -1;
375 size = operand_size(v->domain->arch.vmx_platform.mpci.mmio_target);
376 index = operand_index(v->domain->arch.vmx_platform.mpci.mmio_target);
378 if (v->domain->arch.vmx_platform.mpci.mmio_target & WZEROEXTEND) {
379 p->u.data = p->u.data & 0xffff;
380 }
381 set_reg_value(size, index, 0, regs, p->u.data);
383 }
384 load_cpu_user_regs(regs);
385 return;
386 }
388 if (p->dir == IOREQ_WRITE) {
389 if (p->pdata_valid) {
390 regs->esi += sign * p->count * p->size;
391 regs->ecx -= p->count;
392 }
393 return;
394 } else {
395 if (p->pdata_valid) {
396 regs->edi += sign * p->count * p->size;
397 regs->ecx -= p->count;
398 return;
399 }
400 }
402 old_eax = regs->eax;
404 switch(p->size) {
405 case 1:
406 regs->eax = (old_eax & 0xffffff00) | (p->u.data & 0xff);
407 break;
408 case 2:
409 regs->eax = (old_eax & 0xffff0000) | (p->u.data & 0xffff);
410 break;
411 case 4:
412 regs->eax = (p->u.data & 0xffffffff);
413 break;
414 default:
415 printk("Error: %s unknwon port size\n", __FUNCTION__);
416 domain_crash_synchronous();
417 }
418 }
420 int vmx_clear_pending_io_event(struct vcpu *v)
421 {
422 struct domain *d = v->domain;
423 int port = iopacket_port(d);
425 /* evtchn_pending is shared by other event channels in 0-31 range */
426 if (!d->shared_info->evtchn_pending[port>>5])
427 clear_bit(port>>5, &v->vcpu_info->evtchn_pending_sel);
429 /* Note: VMX domains may need upcalls as well */
430 if (!v->vcpu_info->evtchn_pending_sel)
431 clear_bit(0, &v->vcpu_info->evtchn_upcall_pending);
433 /* clear the pending bit for port */
434 return test_and_clear_bit(port, &d->shared_info->evtchn_pending[0]);
435 }
437 /* Because we've cleared the pending events first, we need to guarantee that
438 * all events to be handled by xen for VMX domains are taken care of here.
439 *
440 * interrupts are guaranteed to be checked before resuming guest.
441 * VMX upcalls have been already arranged for if necessary.
442 */
443 void vmx_check_events(struct vcpu *d)
444 {
445 /* clear the event *before* checking for work. This should avoid
446 the set-and-check races */
447 if (vmx_clear_pending_io_event(current))
448 vmx_io_assist(d);
449 }
451 /* On exit from vmx_wait_io, we're guaranteed to have a I/O response from
452 the device model */
453 void vmx_wait_io()
454 {
455 extern void do_block();
456 int port = iopacket_port(current->domain);
458 do {
459 if(!test_bit(port, &current->domain->shared_info->evtchn_pending[0]))
460 do_block();
461 vmx_check_events(current);
462 if (!test_bit(ARCH_VMX_IO_WAIT, &current->arch.arch_vmx.flags))
463 break;
464 /* Events other than IOPACKET_PORT might have woken us up. In that
465 case, safely go back to sleep. */
466 clear_bit(port>>5, &current->vcpu_info->evtchn_pending_sel);
467 clear_bit(0, &current->vcpu_info->evtchn_upcall_pending);
468 } while(1);
469 }
471 #if defined(__i386__) || defined(__x86_64__)
472 static inline int __fls(u32 word)
473 {
474 int bit;
476 __asm__("bsrl %1,%0"
477 :"=r" (bit)
478 :"rm" (word));
479 return word ? bit : -1;
480 }
481 #else
482 #define __fls(x) generic_fls(x)
483 static __inline__ int generic_fls(u32 x)
484 {
485 int r = 31;
487 if (!x)
488 return -1;
489 if (!(x & 0xffff0000u)) {
490 x <<= 16;
491 r -= 16;
492 }
493 if (!(x & 0xff000000u)) {
494 x <<= 8;
495 r -= 8;
496 }
497 if (!(x & 0xf0000000u)) {
498 x <<= 4;
499 r -= 4;
500 }
501 if (!(x & 0xc0000000u)) {
502 x <<= 2;
503 r -= 2;
504 }
505 if (!(x & 0x80000000u)) {
506 x <<= 1;
507 r -= 1;
508 }
509 return r;
510 }
511 #endif
513 /* Simple minded Local APIC priority implementation. Fix later */
514 static __inline__ int find_highest_irq(u32 *pintr)
515 {
516 if (pintr[7])
517 return __fls(pintr[7]) + (256-32*1);
518 if (pintr[6])
519 return __fls(pintr[6]) + (256-32*2);
520 if (pintr[5])
521 return __fls(pintr[5]) + (256-32*3);
522 if (pintr[4])
523 return __fls(pintr[4]) + (256-32*4);
524 if (pintr[3])
525 return __fls(pintr[3]) + (256-32*5);
526 if (pintr[2])
527 return __fls(pintr[2]) + (256-32*6);
528 if (pintr[1])
529 return __fls(pintr[1]) + (256-32*7);
530 return __fls(pintr[0]);
531 }
533 #define BSP_CPU(d) (!(d->vcpu_id))
534 static inline void clear_extint(struct vcpu *v)
535 {
536 global_iodata_t *spg;
537 int i;
538 spg = &get_sp(v->domain)->sp_global;
540 for(i = 0; i < INTR_LEN; i++)
541 spg->pic_intr[i] = 0;
542 }
544 static inline void clear_highest_bit(struct vcpu *v, int vector)
545 {
546 global_iodata_t *spg;
548 spg = &get_sp(v->domain)->sp_global;
550 clear_bit(vector, &spg->pic_intr[0]);
551 }
553 static inline int find_highest_pic_irq(struct vcpu *v)
554 {
555 u64 intr[INTR_LEN];
556 global_iodata_t *spg;
557 int i;
559 if(!BSP_CPU(v))
560 return -1;
562 spg = &get_sp(v->domain)->sp_global;
564 for(i = 0; i < INTR_LEN; i++){
565 intr[i] = spg->pic_intr[i] & ~spg->pic_mask[i];
566 }
568 return find_highest_irq((u32 *)&intr[0]);
569 }
571 /*
572 * Return 0-255 for pending irq.
573 * -1 when no pending.
574 */
575 static inline int find_highest_pending_irq(struct vcpu *v, int *type)
576 {
577 int result = -1;
578 if ((result = find_highest_pic_irq(v)) != -1){
579 *type = VLAPIC_DELIV_MODE_EXT;
580 return result;
581 }
582 return result;
583 }
585 static inline void
586 interrupt_post_injection(struct vcpu * v, int vector, int type)
587 {
588 struct vmx_virpit_t *vpit = &(v->domain->arch.vmx_platform.vmx_pit);
589 switch(type)
590 {
591 case VLAPIC_DELIV_MODE_EXT:
592 if (vpit->pending_intr_nr && vector == vpit->vector)
593 vpit->pending_intr_nr--;
594 else
595 clear_highest_bit(v, vector);
597 if (vector == vpit->vector && !vpit->first_injected){
598 vpit->first_injected = 1;
599 vpit->pending_intr_nr = 0;
600 }
601 if (vector == vpit->vector)
602 vpit->inject_point = NOW();
603 break;
605 default:
606 printk("Not support interrupt type\n");
607 break;
608 }
609 }
611 static inline void
612 enable_irq_window(unsigned long cpu_exec_control)
613 {
614 if (!(cpu_exec_control & CPU_BASED_VIRTUAL_INTR_PENDING)) {
615 cpu_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
616 __vmwrite(CPU_BASED_VM_EXEC_CONTROL, cpu_exec_control);
617 }
618 }
620 static inline void
621 disable_irq_window(unsigned long cpu_exec_control)
622 {
623 if ( cpu_exec_control & CPU_BASED_VIRTUAL_INTR_PENDING ) {
624 cpu_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
625 __vmwrite(CPU_BASED_VM_EXEC_CONTROL, cpu_exec_control);
626 }
627 }
629 static inline int irq_masked(unsigned long eflags)
630 {
631 return ((eflags & X86_EFLAGS_IF) == 0);
632 }
634 void vmx_intr_assist(struct vcpu *v)
635 {
636 int intr_type = 0;
637 int highest_vector = find_highest_pending_irq(v, &intr_type);
638 unsigned long intr_fields, eflags, interruptibility, cpu_exec_control;
640 __vmread(CPU_BASED_VM_EXEC_CONTROL, &cpu_exec_control);
642 if (highest_vector == -1) {
643 disable_irq_window(cpu_exec_control);
644 return;
645 }
647 __vmread(VM_ENTRY_INTR_INFO_FIELD, &intr_fields);
649 if (intr_fields & INTR_INFO_VALID_MASK) {
650 VMX_DBG_LOG(DBG_LEVEL_1, "vmx_intr_assist: intr_fields: %lx",
651 intr_fields);
652 return;
653 }
655 __vmread(GUEST_INTERRUPTIBILITY_INFO, &interruptibility);
657 if (interruptibility) {
658 enable_irq_window(cpu_exec_control);
659 VMX_DBG_LOG(DBG_LEVEL_1, "guesting pending: %x, interruptibility: %lx",
660 highest_vector, interruptibility);
661 return;
662 }
664 __vmread(GUEST_RFLAGS, &eflags);
666 switch (intr_type) {
667 case VLAPIC_DELIV_MODE_EXT:
668 if (irq_masked(eflags)) {
669 enable_irq_window(cpu_exec_control);
670 VMX_DBG_LOG(DBG_LEVEL_1, "guesting pending: %x, eflags: %lx",
671 highest_vector, eflags);
672 return;
673 }
675 vmx_inject_extint(v, highest_vector, VMX_INVALID_ERROR_CODE);
676 TRACE_3D(TRC_VMX_INT, v->domain->domain_id, highest_vector, 0);
677 break;
678 case VLAPIC_DELIV_MODE_FIXED:
679 case VLAPIC_DELIV_MODE_LPRI:
680 case VLAPIC_DELIV_MODE_SMI:
681 case VLAPIC_DELIV_MODE_NMI:
682 case VLAPIC_DELIV_MODE_INIT:
683 case VLAPIC_DELIV_MODE_STARTUP:
684 default:
685 printk("Unsupported interrupt type\n");
686 BUG();
687 break;
688 }
690 interrupt_post_injection(v, highest_vector, intr_type);
691 return;
692 }
694 void vmx_do_resume(struct vcpu *d)
695 {
696 vmx_stts();
697 if ( vmx_paging_enabled(d) )
698 __vmwrite(GUEST_CR3, pagetable_get_paddr(d->arch.shadow_table));
699 else
700 // paging is not enabled in the guest
701 __vmwrite(GUEST_CR3, pagetable_get_paddr(d->domain->arch.phys_table));
703 __vmwrite(HOST_CR3, pagetable_get_paddr(d->arch.monitor_table));
704 __vmwrite(HOST_RSP, (unsigned long)get_stack_bottom());
706 if (event_pending(d)) {
707 vmx_check_events(d);
709 if (test_bit(ARCH_VMX_IO_WAIT, &d->arch.arch_vmx.flags))
710 vmx_wait_io();
711 }
713 /* We can't resume the guest if we're waiting on I/O */
714 ASSERT(!test_bit(ARCH_VMX_IO_WAIT, &d->arch.arch_vmx.flags));
716 /* We always check for interrupts before resuming guest */
717 vmx_intr_assist(d);
718 }
720 #endif /* CONFIG_VMX */
722 /*
723 * Local variables:
724 * mode: C
725 * c-set-style: "BSD"
726 * c-basic-offset: 4
727 * tab-width: 4
728 * indent-tabs-mode: nil
729 * End:
730 */