direct-io.hg

view xen/arch/ia64/vmx/vlsapic.c @ 12431:4816a891b3d6

[IA64] Fix SMP Windows boot failure

Sometime SMP Windows can't boot, the root cause is guest timer interrupt
is lost.

This patch fixes following issues.
1. Windows uses different way to sync itc.
2. Previously when Guest timer fires and guest ITV is masked, XEN will
desert this Guest timer interrupt. It is not correct for windows,
windows may expect this timer interrupt.
3. Windows may use different way to set timer in some situations.
Windows first sets itm (which may be smaller than current itc), and
then sets itc (which is samller than itm).
XEN can support this way to set timer.

Signed-off-by: Anthony Xu <anthony.xu@intel.com>
author awilliam@xenbuild.aw
date Fri Nov 10 11:19:57 2006 -0700 (2006-11-10)
parents 452010ddef24
children 169687dab774
line source
2 /* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
3 /*
4 * vlsapic.c: virtual lsapic model including ITC timer.
5 * Copyright (c) 2005, Intel Corporation.
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms and conditions of the GNU General Public License,
9 * version 2, as published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 * more details.
15 *
16 * You should have received a copy of the GNU General Public License along with
17 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
18 * Place - Suite 330, Boston, MA 02111-1307 USA.
19 *
20 * Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com)
21 */
23 #include <linux/sched.h>
24 #include <public/xen.h>
25 #include <asm/ia64_int.h>
26 #include <asm/vcpu.h>
27 #include <asm/regionreg.h>
28 #include <asm/tlb.h>
29 #include <asm/processor.h>
30 #include <asm/delay.h>
31 #include <asm/vmx_vcpu.h>
32 #include <asm/vmx_vcpu.h>
33 #include <asm/regs.h>
34 #include <asm/gcc_intrin.h>
35 #include <asm/vmx_mm_def.h>
36 #include <asm/vmx.h>
37 #include <asm/hw_irq.h>
38 #include <asm/vmx_pal_vsa.h>
39 #include <asm/kregs.h>
40 #include <asm/vmx_platform.h>
41 #include <asm/hvm/vioapic.h>
42 #include <asm/linux/jiffies.h>
44 //u64 fire_itc;
45 //u64 fire_itc2;
46 //u64 fire_itm;
47 //u64 fire_itm2;
48 /*
49 * Update the checked last_itc.
50 */
52 extern void vmx_reflect_interruption(u64 ifa, u64 isr, u64 iim,
53 u64 vector, REGS *regs);
54 static void update_last_itc(vtime_t *vtm, uint64_t cur_itc)
55 {
56 vtm->last_itc = cur_itc;
57 }
59 /*
60 * Next for vLSapic
61 */
63 #define NMI_VECTOR 2
64 #define ExtINT_VECTOR 0
65 #define NULL_VECTOR -1
67 static void update_vhpi(VCPU *vcpu, int vec)
68 {
69 u64 vhpi;
71 if (vec == NULL_VECTOR)
72 vhpi = 0;
73 else if (vec == NMI_VECTOR)
74 vhpi = 32;
75 else if (vec == ExtINT_VECTOR)
76 vhpi = 16;
77 else
78 vhpi = vec >> 4;
80 VCPU(vcpu,vhpi) = vhpi;
81 // TODO: Add support for XENO
82 if (VCPU(vcpu,vac).a_int)
83 ia64_call_vsa(PAL_VPS_SET_PENDING_INTERRUPT,
84 (uint64_t)vcpu->arch.privregs, 0, 0, 0, 0, 0, 0);
85 }
88 /*
89 * May come from virtualization fault or
90 * nested host interrupt.
91 */
92 static int vmx_vcpu_unpend_interrupt(VCPU *vcpu, uint8_t vector)
93 {
94 uint64_t spsr;
95 int ret;
97 if (vector & ~0xff) {
98 dprintk(XENLOG_WARNING, "vmx_vcpu_pend_interrupt: bad vector\n");
99 return -1;
100 }
102 local_irq_save(spsr);
103 ret = test_and_clear_bit(vector, &VCPU(vcpu, irr[0]));
104 local_irq_restore(spsr);
106 if (ret)
107 vcpu->arch.irq_new_pending = 1;
109 return ret;
110 }
112 /*
113 * ITC value saw in guest (host+offset+drift).
114 */
115 static uint64_t now_itc(vtime_t *vtm)
116 {
117 uint64_t guest_itc=vtm->vtm_offset+ia64_get_itc();
119 if ( vtm->vtm_local_drift ) {
120 // guest_itc -= vtm->vtm_local_drift;
121 }
122 if (guest_itc >= vtm->last_itc)
123 return guest_itc;
124 else
125 /* guest ITC backwarded due after LP switch */
126 return vtm->last_itc;
127 }
129 /*
130 * Interval time components reset.
131 */
132 static void vtm_reset(VCPU *vcpu)
133 {
134 int i;
135 u64 vtm_offset;
136 VCPU *v;
137 struct domain *d = vcpu->domain;
138 vtime_t *vtm = &VMX(vcpu, vtm);
140 if (vcpu->vcpu_id == 0) {
141 vtm_offset = 0UL - ia64_get_itc();
142 for (i = MAX_VIRT_CPUS - 1; i >= 0; i--) {
143 if ((v = d->vcpu[i]) != NULL) {
144 VMX(v, vtm).vtm_offset = vtm_offset;
145 VMX(v, vtm).last_itc = 0;
146 }
147 }
148 }
149 vtm->vtm_local_drift = 0;
150 VCPU(vcpu, itm) = 0;
151 VCPU(vcpu, itv) = 0x10000;
152 vtm->last_itc = 0;
153 }
155 /* callback function when vtm_timer expires */
156 static void vtm_timer_fn(void *data)
157 {
158 VCPU *vcpu = data;
159 vtime_t *vtm = &VMX(vcpu, vtm);
160 u64 vitv;
162 vitv = VCPU(vcpu, itv);
163 if (!ITV_IRQ_MASK(vitv)) {
164 vmx_vcpu_pend_interrupt(vcpu, ITV_VECTOR(vitv));
165 vcpu_unblock(vcpu);
166 } else
167 vtm->pending = 1;
169 update_last_itc(vtm, VCPU(vcpu, itm)); // update vITC
170 }
172 void vtm_init(VCPU *vcpu)
173 {
174 vtime_t *vtm;
175 uint64_t itc_freq;
177 vtm = &VMX(vcpu, vtm);
179 itc_freq = local_cpu_data->itc_freq;
180 vtm->cfg_max_jump=itc_freq*MAX_JUMP_STEP/1000;
181 vtm->cfg_min_grun=itc_freq*MIN_GUEST_RUNNING_TIME/1000;
182 init_timer(&vtm->vtm_timer, vtm_timer_fn, vcpu, vcpu->processor);
183 vtm_reset(vcpu);
184 }
186 /*
187 * Action when guest read ITC.
188 */
189 uint64_t vtm_get_itc(VCPU *vcpu)
190 {
191 uint64_t guest_itc;
192 vtime_t *vtm = &VMX(vcpu, vtm);
194 guest_itc = now_itc(vtm);
195 return guest_itc;
196 }
199 void vtm_set_itc(VCPU *vcpu, uint64_t new_itc)
200 {
201 int i;
202 uint64_t vitm, vtm_offset;
203 vtime_t *vtm;
204 VCPU *v;
205 struct domain *d = vcpu->domain;
207 vitm = VCPU(vcpu, itm);
208 vtm = &VMX(vcpu, vtm);
209 if (vcpu->vcpu_id == 0) {
210 vtm_offset = new_itc - ia64_get_itc();
211 for (i = MAX_VIRT_CPUS - 1; i >= 0; i--) {
212 if ((v = d->vcpu[i]) != NULL) {
213 VMX(v, vtm).vtm_offset = vtm_offset;
214 VMX(v, vtm).last_itc = 0;
215 }
216 }
217 }
218 vtm->last_itc = 0;
219 if (vitm <= new_itc)
220 stop_timer(&vtm->vtm_timer);
221 else
222 vtm_set_itm(vcpu, vitm);
223 }
226 #define TIMER_SLOP (50*1000) /* ns */ /* copy from timer.c */
227 extern u64 cycle_to_ns(u64 cyle);
230 void vtm_set_itm(VCPU *vcpu, uint64_t val)
231 {
232 vtime_t *vtm;
233 uint64_t vitv, cur_itc, expires;
235 vitv = VCPU(vcpu, itv);
236 vtm = &VMX(vcpu, vtm);
237 VCPU(vcpu, itm) = val;
238 if (val > vtm->last_itc) {
239 cur_itc = now_itc(vtm);
240 if (time_before(val, cur_itc))
241 val = cur_itc;
242 expires = NOW() + cycle_to_ns(val-cur_itc) + TIMER_SLOP;
243 vmx_vcpu_unpend_interrupt(vcpu, ITV_VECTOR(vitv));
244 set_timer(&vtm->vtm_timer, expires);
245 }else{
246 stop_timer(&vtm->vtm_timer);
247 }
248 }
251 void vtm_set_itv(VCPU *vcpu, uint64_t val)
252 {
253 vtime_t *vtm = &VMX(vcpu, vtm);
255 VCPU(vcpu, itv) = val;
257 if (!ITV_IRQ_MASK(val) && vtm->pending) {
258 vmx_vcpu_pend_interrupt(vcpu, ITV_VECTOR(val));
259 vtm->pending = 0;
260 }
261 }
264 /*
265 * Update interrupt or hook the vtm timer for fire
266 * At this point vtm_timer should be removed if itv is masked.
267 */
268 /* Interrupt must be disabled at this point */
269 /*
270 void vtm_interruption_update(VCPU *vcpu, vtime_t* vtm)
271 {
272 uint64_t cur_itc,vitm,vitv;
273 uint64_t expires;
274 long diff_now, diff_last;
275 uint64_t spsr;
277 vitv = VCPU(vcpu, itv);
278 if ( ITV_IRQ_MASK(vitv) ) {
279 return;
280 }
282 vitm =VCPU(vcpu, itm);
283 local_irq_save(spsr);
284 cur_itc =now_itc(vtm);
285 diff_last = vtm->last_itc - vitm;
286 diff_now = cur_itc - vitm;
288 if ( diff_last >= 0 ) {
289 // interrupt already fired.
290 stop_timer(&vtm->vtm_timer);
291 }
292 else if ( diff_now >= 0 ) {
293 // ITV is fired.
294 vmx_vcpu_pend_interrupt(vcpu, vitv&0xff);
295 }
296 */
297 /* Both last_itc & cur_itc < itm, wait for fire condition */
298 /* else {
299 expires = NOW() + cycle_to_ns(0-diff_now) + TIMER_SLOP;
300 set_timer(&vtm->vtm_timer, expires);
301 }
302 local_irq_restore(spsr);
303 }
304 */
306 /*
307 * Action for vtm when the domain is scheduled out.
308 * Remove the timer for vtm.
309 */
310 /*
311 void vtm_domain_out(VCPU *vcpu)
312 {
313 if(!is_idle_domain(vcpu->domain))
314 stop_timer(&vcpu->arch.arch_vmx.vtm.vtm_timer);
315 }
316 */
317 /*
318 * Action for vtm when the domain is scheduled in.
319 * Fire vtm IRQ or add the timer for vtm.
320 */
321 /*
322 void vtm_domain_in(VCPU *vcpu)
323 {
324 vtime_t *vtm;
326 if(!is_idle_domain(vcpu->domain)) {
327 vtm=&(vcpu->arch.arch_vmx.vtm);
328 vtm_interruption_update(vcpu, vtm);
329 }
330 }
331 */
333 #ifdef V_IOSAPIC_READY
334 int vlapic_match_logical_addr(struct vlapic *vlapic, uint16_t dest)
335 {
336 return (VLAPIC_ID(vlapic) == dest);
337 }
339 struct vlapic* apic_round_robin(struct domain *d,
340 uint8_t vector,
341 uint32_t bitmap)
342 {
343 uint8_t bit = 0;
345 if (!bitmap) {
346 printk("<apic_round_robin> no bit on bitmap\n");
347 return NULL;
348 }
350 while (!(bitmap & (1 << bit)))
351 bit++;
353 return vcpu_vlapic(d->vcpu[bit]);
354 }
355 #endif
357 void vlsapic_reset(VCPU *vcpu)
358 {
359 int i;
361 VCPU(vcpu, lid) = VCPU_LID(vcpu);
362 VCPU(vcpu, ivr) = 0;
363 VCPU(vcpu,tpr) = 0x10000;
364 VCPU(vcpu, eoi) = 0;
365 VCPU(vcpu, irr[0]) = 0;
366 VCPU(vcpu, irr[1]) = 0;
367 VCPU(vcpu, irr[2]) = 0;
368 VCPU(vcpu, irr[3]) = 0;
369 VCPU(vcpu, pmv) = 0x10000;
370 VCPU(vcpu, cmcv) = 0x10000;
371 VCPU(vcpu, lrr0) = 0x10000; // default reset value?
372 VCPU(vcpu, lrr1) = 0x10000; // default reset value?
373 update_vhpi(vcpu, NULL_VECTOR);
374 for ( i=0; i<4; i++) {
375 VLSAPIC_INSVC(vcpu,i) = 0;
376 }
378 #ifdef V_IOSAPIC_READY
379 vcpu->arch.arch_vmx.vlapic.vcpu = vcpu;
380 #endif
381 dprintk(XENLOG_INFO, "VLSAPIC inservice base=%p\n", &VLSAPIC_INSVC(vcpu,0) );
382 }
384 /*
385 * Find highest signaled bits in 4 words (long).
386 *
387 * return 0-255: highest bits.
388 * -1 : Not found.
389 */
390 static __inline__ int highest_bits(uint64_t *dat)
391 {
392 uint64_t bits, bitnum;
393 int i;
395 /* loop for all 256 bits */
396 for ( i=3; i >= 0 ; i -- ) {
397 bits = dat[i];
398 if ( bits ) {
399 bitnum = ia64_fls(bits);
400 return i*64+bitnum;
401 }
402 }
403 return NULL_VECTOR;
404 }
406 /*
407 * Return 0-255 for pending irq.
408 * NULL_VECTOR: when no pending.
409 */
410 static int highest_pending_irq(VCPU *vcpu)
411 {
412 if ( VCPU(vcpu, irr[0]) & (1UL<<NMI_VECTOR) ) return NMI_VECTOR;
413 if ( VCPU(vcpu, irr[0]) & (1UL<<ExtINT_VECTOR) ) return ExtINT_VECTOR;
414 return highest_bits(&VCPU(vcpu, irr[0]));
415 }
417 static int highest_inservice_irq(VCPU *vcpu)
418 {
419 if ( VLSAPIC_INSVC(vcpu, 0) & (1UL<<NMI_VECTOR) ) return NMI_VECTOR;
420 if ( VLSAPIC_INSVC(vcpu, 0) & (1UL<<ExtINT_VECTOR) ) return ExtINT_VECTOR;
421 return highest_bits(&(VLSAPIC_INSVC(vcpu, 0)));
422 }
424 /*
425 * The pending irq is higher than the inservice one.
426 *
427 */
428 static int is_higher_irq(int pending, int inservice)
429 {
430 return ( (pending > inservice) ||
431 ((pending != NULL_VECTOR) && (inservice == NULL_VECTOR)) );
432 }
434 static int is_higher_class(int pending, int mic)
435 {
436 return ( (pending >> 4) > mic );
437 }
438 #if 0
439 static int is_invalid_irq(int vec)
440 {
441 return (vec == 1 || ((vec <= 14 && vec >= 3)));
442 }
443 #endif //shadow it due to no use currently
445 #define IRQ_NO_MASKED 0
446 #define IRQ_MASKED_BY_VTPR 1
447 #define IRQ_MASKED_BY_INSVC 2 // masked by inservice IRQ
449 /* See Table 5-8 in SDM vol2 for the definition */
450 static int
451 _xirq_masked(VCPU *vcpu, int h_pending, int h_inservice)
452 {
453 tpr_t vtpr;
455 vtpr.val = VCPU(vcpu, tpr);
457 if ( h_inservice == NMI_VECTOR ) {
458 return IRQ_MASKED_BY_INSVC;
459 }
460 if ( h_pending == NMI_VECTOR ) {
461 // Non Maskable Interrupt
462 return IRQ_NO_MASKED;
463 }
464 if ( h_inservice == ExtINT_VECTOR ) {
465 return IRQ_MASKED_BY_INSVC;
466 }
468 if ( h_pending == ExtINT_VECTOR ) {
469 if ( vtpr.mmi ) {
470 // mask all external IRQ
471 return IRQ_MASKED_BY_VTPR;
472 }
473 else {
474 return IRQ_NO_MASKED;
475 }
476 }
478 if ( is_higher_irq(h_pending, h_inservice) ) {
479 if ( is_higher_class(h_pending, vtpr.mic + (vtpr.mmi << 4)) ) {
480 return IRQ_NO_MASKED;
481 }
482 else {
483 return IRQ_MASKED_BY_VTPR;
484 }
485 }
486 else {
487 return IRQ_MASKED_BY_INSVC;
488 }
489 }
491 static int irq_masked(VCPU *vcpu, int h_pending, int h_inservice)
492 {
493 int mask;
495 mask = _xirq_masked(vcpu, h_pending, h_inservice);
496 return mask;
497 }
500 /*
501 * May come from virtualization fault or
502 * nested host interrupt.
503 */
504 int vmx_vcpu_pend_interrupt(VCPU *vcpu, uint8_t vector)
505 {
506 uint64_t spsr;
507 int ret;
509 if (vector & ~0xff) {
510 gdprintk(XENLOG_INFO, "vmx_vcpu_pend_interrupt: bad vector\n");
511 return -1;
512 }
513 local_irq_save(spsr);
514 ret = test_and_set_bit(vector, &VCPU(vcpu, irr[0]));
515 local_irq_restore(spsr);
517 if (!ret)
518 vcpu->arch.irq_new_pending = 1;
520 return ret;
521 }
524 /*
525 * Add batch of pending interrupt.
526 * The interrupt source is contained in pend_irr[0-3] with
527 * each bits stand for one interrupt.
528 */
529 void vmx_vcpu_pend_batch_interrupt(VCPU *vcpu, u64 *pend_irr)
530 {
531 uint64_t spsr;
532 int i;
534 local_irq_save(spsr);
535 for (i=0 ; i<4; i++ ) {
536 VCPU(vcpu,irr[i]) |= pend_irr[i];
537 }
538 local_irq_restore(spsr);
539 vcpu->arch.irq_new_pending = 1;
540 }
542 /*
543 * If the new pending interrupt is enabled and not masked, we directly inject
544 * it into the guest. Otherwise, we set the VHPI if vac.a_int=1 so that when
545 * the interrupt becomes unmasked, it gets injected.
546 * RETURN:
547 * the highest unmasked interrupt.
548 *
549 * Optimization: We defer setting the VHPI until the EOI time, if a higher
550 * priority interrupt is in-service. The idea is to reduce the
551 * number of unnecessary calls to inject_vhpi.
552 */
553 int vmx_check_pending_irq(VCPU *vcpu)
554 {
555 int mask, h_pending, h_inservice;
556 uint64_t isr;
557 IA64_PSR vpsr;
558 REGS *regs=vcpu_regs(vcpu);
559 h_pending = highest_pending_irq(vcpu);
560 if ( h_pending == NULL_VECTOR ) {
561 update_vhpi(vcpu, NULL_VECTOR);
562 h_pending = SPURIOUS_VECTOR;
563 goto chk_irq_exit;
564 }
565 h_inservice = highest_inservice_irq(vcpu);
567 vpsr.val = VCPU(vcpu, vpsr);
568 mask = irq_masked(vcpu, h_pending, h_inservice);
569 if ( vpsr.i && IRQ_NO_MASKED == mask ) {
570 isr = vpsr.val & IA64_PSR_RI;
571 if ( !vpsr.ic )
572 panic_domain(regs,"Interrupt when IC=0\n");
573 update_vhpi(vcpu, h_pending);
574 vmx_reflect_interruption(0, isr, 0, 12, regs); // EXT IRQ
575 } else if (mask == IRQ_MASKED_BY_INSVC) {
576 if (VCPU(vcpu, vhpi))
577 update_vhpi(vcpu, NULL_VECTOR);
578 }
579 else {
580 // masked by vpsr.i or vtpr.
581 update_vhpi(vcpu,h_pending);
582 }
584 chk_irq_exit:
585 return h_pending;
586 }
588 /*
589 * Only coming from virtualization fault.
590 */
591 void guest_write_eoi(VCPU *vcpu)
592 {
593 int vec;
595 vec = highest_inservice_irq(vcpu);
596 if ( vec == NULL_VECTOR )
597 panic_domain(vcpu_regs(vcpu), "Wrong vector to EOI\n");
598 VLSAPIC_INSVC(vcpu,vec>>6) &= ~(1UL <<(vec&63));
599 VCPU(vcpu, eoi)=0; // overwrite the data
600 vcpu->arch.irq_new_pending=1;
601 }
603 int is_unmasked_irq(VCPU *vcpu)
604 {
605 int h_pending, h_inservice;
607 h_pending = highest_pending_irq(vcpu);
608 h_inservice = highest_inservice_irq(vcpu);
609 if ( h_pending == NULL_VECTOR ||
610 irq_masked(vcpu, h_pending, h_inservice) != IRQ_NO_MASKED ) {
611 return 0;
612 }
613 else
614 return 1;
615 }
617 uint64_t guest_read_vivr(VCPU *vcpu)
618 {
619 int vec, h_inservice, mask;
620 vec = highest_pending_irq(vcpu);
621 h_inservice = highest_inservice_irq(vcpu);
622 mask = irq_masked(vcpu, vec, h_inservice);
623 if (vec == NULL_VECTOR || mask == IRQ_MASKED_BY_INSVC) {
624 if (VCPU(vcpu, vhpi))
625 update_vhpi(vcpu, NULL_VECTOR);
626 return IA64_SPURIOUS_INT_VECTOR;
627 }
628 if (mask == IRQ_MASKED_BY_VTPR) {
629 update_vhpi(vcpu, vec);
630 return IA64_SPURIOUS_INT_VECTOR;
631 }
632 VLSAPIC_INSVC(vcpu,vec>>6) |= (1UL <<(vec&63));
633 vmx_vcpu_unpend_interrupt(vcpu, vec);
634 return (uint64_t)vec;
635 }
637 static void generate_exirq(VCPU *vcpu)
638 {
639 IA64_PSR vpsr;
640 uint64_t isr;
641 REGS *regs=vcpu_regs(vcpu);
642 vpsr.val = VCPU(vcpu, vpsr);
643 isr = vpsr.val & IA64_PSR_RI;
644 if ( !vpsr.ic )
645 panic_domain(regs,"Interrupt when IC=0\n");
646 vmx_reflect_interruption(0,isr,0, 12, regs); // EXT IRQ
647 }
649 void vhpi_detection(VCPU *vcpu)
650 {
651 uint64_t threshold,vhpi;
652 tpr_t vtpr;
653 IA64_PSR vpsr;
654 vpsr.val = VCPU(vcpu, vpsr);
655 vtpr.val = VCPU(vcpu, tpr);
657 threshold = ((!vpsr.i) << 5) | (vtpr.mmi << 4) | vtpr.mic;
658 vhpi = VCPU(vcpu,vhpi);
659 if ( vhpi > threshold ) {
660 // interrupt actived
661 generate_exirq (vcpu);
662 }
663 }
665 void vmx_vexirq(VCPU *vcpu)
666 {
667 generate_exirq (vcpu);
668 }