ia64/xen-unstable

view xen/arch/ia64/xen/hyperprivop.S @ 7728:12d4d2dc06a2

Punt to slow path for itc.i/d for region0 to avoid metaphys problem
Signed-off by: Dan Magenheimer <dan.magenheimer@hp.com>
author djm@kirby.fc.hp.com
date Fri Nov 11 13:05:38 2005 -0600 (2005-11-11)
parents d51b071bfcfc
children 45c4e735fc8c
line source
1 /*
2 * arch/ia64/kernel/hyperprivop.S
3 *
4 * Copyright (C) 2005 Hewlett-Packard Co
5 * Dan Magenheimer <dan.magenheimer@hp.com>
6 */
8 #include <linux/config.h>
10 #include <asm/asmmacro.h>
11 #include <asm/kregs.h>
12 #include <asm/offsets.h>
13 #include <asm/processor.h>
14 #include <asm/system.h>
15 #include <public/arch-ia64.h>
18 #define _PAGE_PPN_MASK 0x0003fffffffff000 //asm/pgtable.h doesn't do assembly
19 #define PAGE_PHYS 0x0010000000000761 //__pgprot(__DIRTY_BITS|_PAGE_PL_2|_PAGE_AR_RWX)
20 #define _PAGE_PL_2 (2<<7)
22 #if 1 // change to 0 to turn off all fast paths
23 #define FAST_HYPERPRIVOPS
24 #define FAST_HYPERPRIVOP_CNT
25 #define FAST_REFLECT_CNT
26 //#define FAST_TICK // mostly working (unat problems) but default off for now
27 //#define FAST_TLB_MISS_REFLECT // mostly working but default off for now
28 //#define FAST_ITC // working but default off for now
29 #define FAST_BREAK
30 #define FAST_ACCESS_REFLECT
31 #define FAST_RFI
32 #define FAST_SSM_I
33 #define FAST_PTC_GA
34 #undef RFI_TO_INTERRUPT // not working yet
35 #endif
37 #define XEN_HYPER_RFI 0x1
38 #define XEN_HYPER_RSM_DT 0x2
39 #define XEN_HYPER_SSM_DT 0x3
40 #define XEN_HYPER_COVER 0x4
41 #define XEN_HYPER_ITC_D 0x5
42 #define XEN_HYPER_ITC_I 0x6
43 #define XEN_HYPER_SSM_I 0x7
44 #define XEN_HYPER_GET_IVR 0x8
45 #define XEN_HYPER_GET_TPR 0x9
46 #define XEN_HYPER_SET_TPR 0xa
47 #define XEN_HYPER_EOI 0xb
48 #define XEN_HYPER_SET_ITM 0xc
49 #define XEN_HYPER_THASH 0xd
50 #define XEN_HYPER_PTC_GA 0xe
51 #define XEN_HYPER_ITR_D 0xf
52 #define XEN_HYPER_GET_RR 0x10
53 #define XEN_HYPER_SET_RR 0x11
54 #define XEN_HYPER_SET_KR 0x12
56 #ifdef CONFIG_SMP
57 #warning "FIXME: ptc.ga instruction requires spinlock for SMP"
58 #undef FAST_PTC_GA
59 #endif
61 // FIXME: turn off for now... but NaTs may crash Xen so re-enable soon!
62 #define HANDLE_AR_UNAT
64 // FIXME: This is defined in include/asm-ia64/hw_irq.h but this
65 // doesn't appear to be include'able from assembly?
66 #define IA64_TIMER_VECTOR 0xef
68 // Should be included from common header file (also in process.c)
69 // NO PSR_CLR IS DIFFERENT! (CPL)
70 #define IA64_PSR_CPL1 (__IA64_UL(1) << IA64_PSR_CPL1_BIT)
71 #define IA64_PSR_CPL0 (__IA64_UL(1) << IA64_PSR_CPL0_BIT)
72 // note IA64_PSR_PK removed from following, why is this necessary?
73 #define DELIVER_PSR_SET (IA64_PSR_IC | IA64_PSR_I | \
74 IA64_PSR_DT | IA64_PSR_RT | IA64_PSR_CPL1 | \
75 IA64_PSR_IT | IA64_PSR_BN)
77 #define DELIVER_PSR_CLR (IA64_PSR_AC | IA64_PSR_DFL | IA64_PSR_DFH | \
78 IA64_PSR_SP | IA64_PSR_DI | IA64_PSR_SI | \
79 IA64_PSR_DB | IA64_PSR_LP | IA64_PSR_TB | \
80 IA64_PSR_MC | IA64_PSR_IS | \
81 IA64_PSR_ID | IA64_PSR_DA | IA64_PSR_DD | \
82 IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | IA64_PSR_IA)
84 // Note: not hand-scheduled for now
85 // Registers at entry
86 // r16 == cr.isr
87 // r17 == cr.iim
88 // r18 == XSI_PSR_IC_OFS
89 // r19 == vpsr.ic (low 32 bits) | vpsr.i (high 32 bits)
90 // r31 == pr
91 GLOBAL_ENTRY(fast_hyperprivop)
92 #ifndef FAST_HYPERPRIVOPS // see beginning of file
93 br.sptk.many dispatch_break_fault ;;
94 #endif
95 // HYPERPRIVOP_SSM_I?
96 // assumes domain interrupts pending, so just do it
97 cmp.eq p7,p6=XEN_HYPER_SSM_I,r17
98 (p7) br.sptk.many hyper_ssm_i;;
100 // FIXME. This algorithm gives up (goes to the slow path) if there
101 // are ANY interrupts pending, even if they are currently
102 // undeliverable. This should be improved later...
103 adds r20=XSI_PEND_OFS-XSI_PSR_IC_OFS,r18 ;;
104 ld4 r20=[r20] ;;
105 cmp.eq p7,p0=r0,r20
106 (p7) br.cond.sptk.many 1f
107 movl r20=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
108 ld8 r20=[r20];;
109 adds r21=IA64_VCPU_IRR0_OFFSET,r20;
110 adds r22=IA64_VCPU_IRR0_OFFSET+8,r20;;
111 ld8 r23=[r21],16; ld8 r24=[r22],16;;
112 ld8 r21=[r21]; ld8 r22=[r22];;
113 or r23=r23,r24; or r21=r21,r22;;
114 or r20=r23,r21;;
115 1: // when we get to here r20=~=interrupts pending
117 // HYPERPRIVOP_RFI?
118 cmp.eq p7,p6=XEN_HYPER_RFI,r17
119 (p7) br.sptk.many hyper_rfi;;
121 // HYPERPRIVOP_GET_IVR?
122 cmp.eq p7,p6=XEN_HYPER_GET_IVR,r17
123 (p7) br.sptk.many hyper_get_ivr;;
125 cmp.ne p7,p0=r20,r0
126 (p7) br.spnt.many dispatch_break_fault ;;
128 // HYPERPRIVOP_COVER?
129 cmp.eq p7,p6=XEN_HYPER_COVER,r17
130 (p7) br.sptk.many hyper_cover;;
132 // HYPERPRIVOP_SSM_DT?
133 cmp.eq p7,p6=XEN_HYPER_SSM_DT,r17
134 (p7) br.sptk.many hyper_ssm_dt;;
136 // HYPERPRIVOP_RSM_DT?
137 cmp.eq p7,p6=XEN_HYPER_RSM_DT,r17
138 (p7) br.sptk.many hyper_rsm_dt;;
140 // HYPERPRIVOP_GET_TPR?
141 cmp.eq p7,p6=XEN_HYPER_GET_TPR,r17
142 (p7) br.sptk.many hyper_get_tpr;;
144 // HYPERPRIVOP_SET_TPR?
145 cmp.eq p7,p6=XEN_HYPER_SET_TPR,r17
146 (p7) br.sptk.many hyper_set_tpr;;
148 // HYPERPRIVOP_EOI?
149 cmp.eq p7,p6=XEN_HYPER_EOI,r17
150 (p7) br.sptk.many hyper_eoi;;
152 // HYPERPRIVOP_SET_ITM?
153 cmp.eq p7,p6=XEN_HYPER_SET_ITM,r17
154 (p7) br.sptk.many hyper_set_itm;;
156 // HYPERPRIVOP_SET_RR?
157 cmp.eq p7,p6=XEN_HYPER_SET_RR,r17
158 (p7) br.sptk.many hyper_set_rr;;
160 // HYPERPRIVOP_GET_RR?
161 cmp.eq p7,p6=XEN_HYPER_GET_RR,r17
162 (p7) br.sptk.many hyper_get_rr;;
164 // HYPERPRIVOP_PTC_GA?
165 cmp.eq p7,p6=XEN_HYPER_PTC_GA,r17
166 (p7) br.sptk.many hyper_ptc_ga;;
168 // HYPERPRIVOP_ITC_D?
169 cmp.eq p7,p6=XEN_HYPER_ITC_D,r17
170 (p7) br.sptk.many hyper_itc_d;;
172 // HYPERPRIVOP_ITC_I?
173 cmp.eq p7,p6=XEN_HYPER_ITC_I,r17
174 (p7) br.sptk.many hyper_itc_i;;
176 // HYPERPRIVOP_THASH?
177 cmp.eq p7,p6=XEN_HYPER_THASH,r17
178 (p7) br.sptk.many hyper_thash;;
180 // HYPERPRIVOP_SET_KR?
181 cmp.eq p7,p6=XEN_HYPER_SET_KR,r17
182 (p7) br.sptk.many hyper_set_kr;;
184 // if not one of the above, give up for now and do it the slow way
185 br.sptk.many dispatch_break_fault ;;
188 // give up for now if: ipsr.be==1, ipsr.pp==1
189 // from reflect_interruption, don't need to:
190 // - printf first extint (debug only)
191 // - check for interrupt collection enabled (routine will force on)
192 // - set ifa (not valid for extint)
193 // - set iha (not valid for extint)
194 // - set itir (not valid for extint)
195 // DO need to
196 // - increment the HYPER_SSM_I fast_hyperprivop counter
197 // - set shared_mem iip to instruction after HYPER_SSM_I
198 // - set cr.iip to guest iva+0x3000
199 // - set shared_mem ipsr to [vcpu_get_ipsr_int_state]
200 // be = pp = bn = 0; dt = it = rt = 1; cpl = 3 or 0;
201 // i = shared_mem interrupt_delivery_enabled
202 // ic = shared_mem interrupt_collection_enabled
203 // ri = instruction after HYPER_SSM_I
204 // all other bits unchanged from real cr.ipsr
205 // - set cr.ipsr (DELIVER_PSR_SET/CLEAR, don't forget cpl!)
206 // - set shared_mem isr: isr.ei to instr following HYPER_SSM_I
207 // and isr.ri to cr.isr.ri (all other bits zero)
208 // - cover and set shared_mem precover_ifs to cr.ifs
209 // ^^^ MISSED THIS FOR fast_break??
210 // - set shared_mem ifs and incomplete_regframe to 0
211 // - set shared_mem interrupt_delivery_enabled to 0
212 // - set shared_mem interrupt_collection_enabled to 0
213 // - set r31 to SHAREDINFO_ADDR
214 // - virtual bank switch 0
215 // maybe implement later
216 // - verify that there really IS a deliverable interrupt pending
217 // - set shared_mem iva
218 // needs to be done but not implemented (in reflect_interruption)
219 // - set shared_mem iipa
220 // don't know for sure
221 // - set shared_mem unat
222 // r16 == cr.isr
223 // r17 == cr.iim
224 // r18 == XSI_PSR_IC
225 // r19 == vpsr.ic (low 32 bits) | vpsr.i (high 32 bits)
226 // r31 == pr
227 ENTRY(hyper_ssm_i)
228 #ifndef FAST_SSM_I
229 br.spnt.few dispatch_break_fault ;;
230 #endif
231 // give up for now if: ipsr.be==1, ipsr.pp==1
232 mov r30=cr.ipsr;;
233 mov r29=cr.iip;;
234 extr.u r21=r30,IA64_PSR_BE_BIT,1 ;;
235 cmp.ne p7,p0=r21,r0
236 (p7) br.sptk.many dispatch_break_fault ;;
237 extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
238 cmp.ne p7,p0=r21,r0
239 (p7) br.sptk.many dispatch_break_fault ;;
240 #ifdef FAST_HYPERPRIVOP_CNT
241 movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SSM_I);;
242 ld8 r21=[r20];;
243 adds r21=1,r21;;
244 st8 [r20]=r21;;
245 #endif
246 // set shared_mem iip to instruction after HYPER_SSM_I
247 extr.u r20=r30,41,2 ;;
248 cmp.eq p6,p7=2,r20 ;;
249 (p6) mov r20=0
250 (p6) adds r29=16,r29
251 (p7) adds r20=1,r20 ;;
252 dep r30=r20,r30,41,2;; // adjust cr.ipsr.ri but don't save yet
253 adds r21=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;;
254 st8 [r21]=r29 ;;
255 // set shared_mem isr
256 extr.u r16=r16,38,1;; // grab cr.isr.ir bit
257 dep r16=r16,r0,38,1 ;; // insert into cr.isr (rest of bits zero)
258 dep r16=r20,r16,41,2 ;; // deposit cr.isr.ri
259 adds r21=XSI_ISR_OFS-XSI_PSR_IC_OFS,r18 ;;
260 st8 [r21]=r16 ;;
261 // set cr.ipsr
262 mov r29=r30 ;;
263 movl r28=DELIVER_PSR_SET;;
264 movl r27=~DELIVER_PSR_CLR;;
265 or r29=r29,r28;;
266 and r29=r29,r27;;
267 mov cr.ipsr=r29;;
268 // set shared_mem ipsr (from ipsr in r30 with ipsr.ri already set)
269 extr.u r29=r30,IA64_PSR_CPL0_BIT,2;;
270 cmp.eq p6,p7=3,r29;;
271 (p6) dep r30=-1,r30,IA64_PSR_CPL0_BIT,2
272 (p7) dep r30=0,r30,IA64_PSR_CPL0_BIT,2
273 ;;
274 // FOR SSM_I ONLY, also turn on psr.i and psr.ic
275 movl r28=(IA64_PSR_DT|IA64_PSR_IT|IA64_PSR_RT|IA64_PSR_I|IA64_PSR_IC);;
276 // movl r27=~(IA64_PSR_BE|IA64_PSR_PP|IA64_PSR_BN);;
277 movl r27=~(IA64_PSR_BE|IA64_PSR_BN);;
278 or r30=r30,r28;;
279 and r30=r30,r27;;
280 adds r21=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;;
281 st8 [r21]=r30 ;;
282 // set shared_mem interrupt_delivery_enabled to 0
283 // set shared_mem interrupt_collection_enabled to 0
284 st8 [r18]=r0;;
285 // cover and set shared_mem precover_ifs to cr.ifs
286 // set shared_mem ifs and incomplete_regframe to 0
287 cover ;;
288 mov r20=cr.ifs;;
289 adds r21=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;;
290 st4 [r21]=r0 ;;
291 adds r21=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
292 st8 [r21]=r0 ;;
293 adds r21=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
294 st8 [r21]=r20 ;;
295 // leave cr.ifs alone for later rfi
296 // set iip to go to domain IVA break instruction vector
297 movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
298 ld8 r22=[r22];;
299 adds r22=IA64_VCPU_IVA_OFFSET,r22;;
300 ld8 r23=[r22];;
301 movl r24=0x3000;;
302 add r24=r24,r23;;
303 mov cr.iip=r24;;
304 // OK, now all set to go except for switch to virtual bank0
305 mov r30=r2; mov r29=r3;;
306 adds r2=XSI_BANK1_OFS-XSI_PSR_IC_OFS,r18;
307 adds r3=(XSI_BANK1_OFS+8)-XSI_PSR_IC_OFS,r18;;
308 bsw.1;;
309 // FIXME?: ar.unat is not really handled correctly,
310 // but may not matter if the OS is NaT-clean
311 .mem.offset 0,0; st8.spill [r2]=r16,16;
312 .mem.offset 8,0; st8.spill [r3]=r17,16 ;;
313 .mem.offset 0,0; st8.spill [r2]=r18,16;
314 .mem.offset 8,0; st8.spill [r3]=r19,16 ;;
315 .mem.offset 0,0; st8.spill [r2]=r20,16;
316 .mem.offset 8,0; st8.spill [r3]=r21,16 ;;
317 .mem.offset 0,0; st8.spill [r2]=r22,16;
318 .mem.offset 8,0; st8.spill [r3]=r23,16 ;;
319 .mem.offset 0,0; st8.spill [r2]=r24,16;
320 .mem.offset 8,0; st8.spill [r3]=r25,16 ;;
321 .mem.offset 0,0; st8.spill [r2]=r26,16;
322 .mem.offset 8,0; st8.spill [r3]=r27,16 ;;
323 .mem.offset 0,0; st8.spill [r2]=r28,16;
324 .mem.offset 8,0; st8.spill [r3]=r29,16 ;;
325 .mem.offset 0,0; st8.spill [r2]=r30,16;
326 .mem.offset 8,0; st8.spill [r3]=r31,16 ;;
327 movl r31=XSI_IPSR;;
328 bsw.0 ;;
329 mov r2=r30; mov r3=r29;;
330 adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
331 st4 [r20]=r0 ;;
332 mov pr=r31,-1 ;;
333 rfi
334 ;;
336 // reflect domain clock interrupt
337 // r31 == pr
338 // r30 == cr.ivr
339 // r29 == rp
340 GLOBAL_ENTRY(fast_tick_reflect)
341 #ifndef FAST_TICK // see beginning of file
342 br.cond.sptk.many rp;;
343 #endif
344 mov r28=IA64_TIMER_VECTOR;;
345 cmp.ne p6,p0=r28,r30
346 (p6) br.cond.spnt.few rp;;
347 movl r20=THIS_CPU(cpu_info)+IA64_CPUINFO_ITM_NEXT_OFFSET;;
348 ld8 r26=[r20];;
349 mov r27=ar.itc;;
350 adds r27=200,r27;; // safety margin
351 cmp.ltu p6,p0=r26,r27
352 (p6) br.cond.spnt.few rp;;
353 mov r17=cr.ipsr;;
354 // slow path if: ipsr.be==1, ipsr.pp==1
355 extr.u r21=r17,IA64_PSR_BE_BIT,1 ;;
356 cmp.ne p6,p0=r21,r0
357 (p6) br.cond.spnt.few rp;;
358 extr.u r21=r17,IA64_PSR_PP_BIT,1 ;;
359 cmp.ne p6,p0=r21,r0
360 (p6) br.cond.spnt.few rp;;
361 // definitely have a domain tick
362 mov cr.eoi=r0;;
363 mov rp=r29;;
364 mov cr.itm=r26;; // ensure next tick
365 #ifdef FAST_REFLECT_CNT
366 movl r20=fast_reflect_count+((0x3000>>8)*8);;
367 ld8 r21=[r20];;
368 adds r21=1,r21;;
369 st8 [r20]=r21;;
370 #endif
371 // vcpu_pend_timer(current)
372 movl r18=XSI_PSR_IC;;
373 adds r20=XSI_ITV_OFS-XSI_PSR_IC_OFS,r18 ;;
374 ld8 r20=[r20];;
375 cmp.eq p6,p0=r20,r0 // if cr.itv==0 done
376 (p6) br.cond.spnt.few fast_tick_reflect_done;;
377 tbit.nz p6,p0=r20,16;; // check itv.m (discard) bit
378 (p6) br.cond.spnt.few fast_tick_reflect_done;;
379 extr.u r27=r20,0,6 // r27 has low 6 bits of itv.vector
380 extr.u r26=r20,6,2;; // r26 has irr index of itv.vector
381 movl r19=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
382 ld8 r19=[r19];;
383 adds r22=IA64_VCPU_DOMAIN_ITM_LAST_OFFSET,r19
384 adds r23=IA64_VCPU_DOMAIN_ITM_OFFSET,r19;;
385 ld8 r24=[r22];;
386 ld8 r23=[r23];;
387 cmp.eq p6,p0=r23,r24 // skip if this tick already delivered
388 (p6) br.cond.spnt.few fast_tick_reflect_done;;
389 // set irr bit
390 adds r21=IA64_VCPU_IRR0_OFFSET,r19;
391 shl r26=r26,3;;
392 add r21=r21,r26;;
393 mov r25=1;;
394 shl r22=r25,r27;;
395 ld8 r23=[r21];;
396 or r22=r22,r23;;
397 st8 [r21]=r22;;
398 // set PSCB(pending_interruption)!
399 adds r20=XSI_PEND_OFS-XSI_PSR_IC_OFS,r18 ;;
400 st4 [r20]=r25;;
402 // if interrupted at pl0, we're done
403 extr.u r16=r17,IA64_PSR_CPL0_BIT,2;;
404 cmp.eq p6,p0=r16,r0;;
405 (p6) br.cond.spnt.few fast_tick_reflect_done;;
406 // if guest vpsr.i is off, we're done
407 adds r21=XSI_PSR_I_OFS-XSI_PSR_IC_OFS,r18 ;;
408 ld4 r21=[r21];;
409 cmp.eq p6,p0=r21,r0
410 (p6) br.cond.spnt.few fast_tick_reflect_done;;
412 // OK, we have a clock tick to deliver to the active domain!
413 // so deliver to iva+0x3000
414 // r17 == cr.ipsr
415 // r18 == XSI_PSR_IC
416 // r19 == IA64_KR(CURRENT)
417 // r31 == pr
418 mov r16=cr.isr;;
419 mov r29=cr.iip;;
420 adds r21=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;;
421 st8 [r21]=r29 ;;
422 // set shared_mem isr
423 extr.u r16=r16,38,1;; // grab cr.isr.ir bit
424 dep r16=r16,r0,38,1 ;; // insert into cr.isr (rest of bits zero)
425 extr.u r20=r17,41,2 ;; // get ipsr.ri
426 dep r16=r20,r16,41,2 ;; // deposit cr.isr.ei
427 adds r21=XSI_ISR_OFS-XSI_PSR_IC_OFS,r18 ;;
428 st8 [r21]=r16 ;;
429 // set cr.ipsr (make sure cpl==2!)
430 mov r29=r17 ;;
431 movl r28=DELIVER_PSR_SET;;
432 movl r27=~(DELIVER_PSR_CLR|IA64_PSR_CPL0);;
433 or r29=r29,r28;;
434 and r29=r29,r27;;
435 mov cr.ipsr=r29;;
436 // set shared_mem ipsr (from ipsr in r17 with ipsr.ri already set)
437 extr.u r29=r17,IA64_PSR_CPL0_BIT,2;;
438 cmp.eq p6,p7=3,r29;;
439 (p6) dep r17=-1,r17,IA64_PSR_CPL0_BIT,2
440 (p7) dep r17=0,r17,IA64_PSR_CPL0_BIT,2
441 ;;
442 movl r28=(IA64_PSR_DT|IA64_PSR_IT|IA64_PSR_RT);;
443 movl r27=~(IA64_PSR_BE|IA64_PSR_PP|IA64_PSR_BN|IA64_PSR_I|IA64_PSR_IC);;
444 dep r21=-1,r21,IA64_PSR_CPL1_BIT,1 ;;
445 or r17=r17,r28;;
446 and r17=r17,r27;;
447 ld4 r16=[r18],4;;
448 cmp.ne p6,p0=r16,r0;;
449 (p6) dep r17=-1,r17,IA64_PSR_IC_BIT,1 ;;
450 ld4 r16=[r18],-4;;
451 cmp.ne p6,p0=r16,r0;;
452 (p6) dep r17=-1,r17,IA64_PSR_I_BIT,1 ;;
453 adds r21=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;;
454 st8 [r21]=r17 ;;
455 // set shared_mem interrupt_delivery_enabled to 0
456 // set shared_mem interrupt_collection_enabled to 0
457 st8 [r18]=r0;;
458 // cover and set shared_mem precover_ifs to cr.ifs
459 // set shared_mem ifs and incomplete_regframe to 0
460 cover ;;
461 mov r20=cr.ifs;;
462 adds r21=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;;
463 st4 [r21]=r0 ;;
464 adds r21=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
465 st8 [r21]=r0 ;;
466 adds r21=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
467 st8 [r21]=r20 ;;
468 // leave cr.ifs alone for later rfi
469 // set iip to go to domain IVA break instruction vector
470 adds r22=IA64_VCPU_IVA_OFFSET,r19;;
471 ld8 r23=[r22];;
472 movl r24=0x3000;;
473 add r24=r24,r23;;
474 mov cr.iip=r24;;
475 // OK, now all set to go except for switch to virtual bank0
476 mov r30=r2; mov r29=r3;;
477 #ifdef HANDLE_AR_UNAT
478 mov r28=ar.unat;
479 #endif
480 adds r2=XSI_BANK1_OFS-XSI_PSR_IC_OFS,r18;
481 adds r3=(XSI_BANK1_OFS+8)-XSI_PSR_IC_OFS,r18;;
482 bsw.1;;
483 .mem.offset 0,0; st8.spill [r2]=r16,16;
484 .mem.offset 8,0; st8.spill [r3]=r17,16 ;;
485 .mem.offset 0,0; st8.spill [r2]=r18,16;
486 .mem.offset 8,0; st8.spill [r3]=r19,16 ;;
487 .mem.offset 0,0; st8.spill [r2]=r20,16;
488 .mem.offset 8,0; st8.spill [r3]=r21,16 ;;
489 .mem.offset 0,0; st8.spill [r2]=r22,16;
490 .mem.offset 8,0; st8.spill [r3]=r23,16 ;;
491 .mem.offset 0,0; st8.spill [r2]=r24,16;
492 .mem.offset 8,0; st8.spill [r3]=r25,16 ;;
493 .mem.offset 0,0; st8.spill [r2]=r26,16;
494 .mem.offset 8,0; st8.spill [r3]=r27,16 ;;
495 .mem.offset 0,0; st8.spill [r2]=r28,16;
496 .mem.offset 8,0; st8.spill [r3]=r29,16 ;;
497 .mem.offset 0,0; st8.spill [r2]=r30,16;
498 .mem.offset 8,0; st8.spill [r3]=r31,16 ;;
499 #ifdef HANDLE_AR_UNAT
500 // r16~r23 are preserved regsin bank0 regs, we need to restore them,
501 // r24~r31 are scratch regs, we don't need to handle NaT bit,
502 // because OS handler must assign it before access it
503 ld8 r16=[r2],16;
504 ld8 r17=[r3],16;;
505 ld8 r18=[r2],16;
506 ld8 r19=[r3],16;;
507 ld8 r20=[r2],16;
508 ld8 r21=[r3],16;;
509 ld8 r22=[r2],16;
510 ld8 r23=[r3],16;;
511 #endif
512 movl r31=XSI_IPSR;;
513 bsw.0 ;;
514 mov r24=ar.unat;
515 mov r2=r30; mov r3=r29;;
516 #ifdef HANDLE_AR_UNAT
517 mov ar.unat=r28;
518 #endif
519 adds r25=XSI_B1NATS_OFS-XSI_PSR_IC_OFS,r18 ;
520 adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
521 st8 [r25]=r24;
522 st4 [r20]=r0 ;;
523 fast_tick_reflect_done:
524 mov pr=r31,-1 ;;
525 rfi
526 END(fast_tick_reflect)
528 // reflect domain breaks directly to domain
529 // r16 == cr.isr
530 // r17 == cr.iim
531 // r18 == XSI_PSR_IC
532 // r19 == vpsr.ic (low 32 bits) | vpsr.i (high 32 bits)
533 // r31 == pr
534 GLOBAL_ENTRY(fast_break_reflect)
535 #ifndef FAST_BREAK // see beginning of file
536 br.sptk.many dispatch_break_fault ;;
537 #endif
538 mov r30=cr.ipsr;;
539 mov r29=cr.iip;;
540 extr.u r21=r30,IA64_PSR_BE_BIT,1 ;;
541 cmp.ne p7,p0=r21,r0 ;;
542 (p7) br.spnt.few dispatch_break_fault ;;
543 extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
544 cmp.ne p7,p0=r21,r0 ;;
545 (p7) br.spnt.few dispatch_break_fault ;;
546 #if 1 /* special handling in case running on simulator */
547 movl r20=first_break;;
548 ld4 r23=[r20];;
549 movl r21=0x80001;
550 movl r22=0x80002;;
551 cmp.ne p7,p0=r23,r0;;
552 (p7) br.spnt.few dispatch_break_fault ;;
553 cmp.eq p7,p0=r21,r17;
554 (p7) br.spnt.few dispatch_break_fault ;;
555 cmp.eq p7,p0=r22,r17;
556 (p7) br.spnt.few dispatch_break_fault ;;
557 #endif
558 movl r20=0x2c00;
559 // save iim in shared_info
560 adds r21=XSI_IIM_OFS-XSI_PSR_IC_OFS,r18 ;;
561 st8 [r21]=r17;;
562 // fall through
565 // reflect to domain ivt+r20
566 // sets up isr,iip,ipsr,ifs (FIXME: do iipa too)
567 // r16 == cr.isr
568 // r18 == XSI_PSR_IC
569 // r20 == offset into ivt
570 // r29 == iip
571 // r30 == ipsr
572 // r31 == pr
573 ENTRY(fast_reflect)
574 #ifdef FAST_REFLECT_CNT
575 movl r22=fast_reflect_count;
576 shr r23=r20,5;;
577 add r22=r22,r23;;
578 ld8 r21=[r22];;
579 adds r21=1,r21;;
580 st8 [r22]=r21;;
581 #endif
582 // save iip in shared_info (DON'T POINT TO NEXT INSTRUCTION!)
583 adds r21=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;;
584 st8 [r21]=r29;;
585 // set shared_mem isr
586 adds r21=XSI_ISR_OFS-XSI_PSR_IC_OFS,r18 ;;
587 st8 [r21]=r16 ;;
588 // set cr.ipsr
589 mov r29=r30 ;;
590 movl r28=DELIVER_PSR_SET;;
591 movl r27=~(DELIVER_PSR_CLR|IA64_PSR_CPL0);;
592 or r29=r29,r28;;
593 and r29=r29,r27;;
594 mov cr.ipsr=r29;;
595 // set shared_mem ipsr (from ipsr in r30 with ipsr.ri already set)
596 extr.u r29=r30,IA64_PSR_CPL0_BIT,2;;
597 cmp.eq p6,p7=3,r29;;
598 (p6) dep r30=-1,r30,IA64_PSR_CPL0_BIT,2
599 (p7) dep r30=0,r30,IA64_PSR_CPL0_BIT,2
600 ;;
601 movl r28=(IA64_PSR_DT|IA64_PSR_IT|IA64_PSR_RT);;
602 movl r27=~(IA64_PSR_BE|IA64_PSR_PP|IA64_PSR_BN);;
603 or r30=r30,r28;;
604 and r30=r30,r27;;
605 // also set shared_mem ipsr.i and ipsr.ic appropriately
606 ld8 r24=[r18];;
607 extr.u r22=r24,32,32
608 cmp4.eq p6,p7=r24,r0;;
609 (p6) dep r30=0,r30,IA64_PSR_IC_BIT,1
610 (p7) dep r30=-1,r30,IA64_PSR_IC_BIT,1 ;;
611 cmp4.eq p6,p7=r22,r0;;
612 (p6) dep r30=0,r30,IA64_PSR_I_BIT,1
613 (p7) dep r30=-1,r30,IA64_PSR_I_BIT,1 ;;
614 adds r21=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;;
615 st8 [r21]=r30 ;;
616 // set shared_mem interrupt_delivery_enabled to 0
617 // set shared_mem interrupt_collection_enabled to 0
618 st8 [r18]=r0;;
619 // cover and set shared_mem precover_ifs to cr.ifs
620 // set shared_mem ifs and incomplete_regframe to 0
621 cover ;;
622 mov r24=cr.ifs;;
623 adds r21=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;;
624 st4 [r21]=r0 ;;
625 adds r21=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
626 st8 [r21]=r0 ;;
627 adds r21=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
628 st8 [r21]=r24 ;;
629 // vpsr.i = vpsr.ic = 0 on delivery of interruption
630 st8 [r18]=r0;;
631 // FIXME: need to save iipa and isr to be arch-compliant
632 // set iip to go to domain IVA break instruction vector
633 movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
634 ld8 r22=[r22];;
635 adds r22=IA64_VCPU_IVA_OFFSET,r22;;
636 ld8 r23=[r22];;
637 add r20=r20,r23;;
638 mov cr.iip=r20;;
639 // OK, now all set to go except for switch to virtual bank0
640 mov r30=r2; mov r29=r3;;
641 #ifdef HANDLE_AR_UNAT
642 mov r28=ar.unat;
643 #endif
644 adds r2=XSI_BANK1_OFS-XSI_PSR_IC_OFS,r18;
645 adds r3=(XSI_BANK1_OFS+8)-XSI_PSR_IC_OFS,r18;;
646 bsw.1;;
647 .mem.offset 0,0; st8.spill [r2]=r16,16;
648 .mem.offset 8,0; st8.spill [r3]=r17,16 ;;
649 .mem.offset 0,0; st8.spill [r2]=r18,16;
650 .mem.offset 8,0; st8.spill [r3]=r19,16 ;;
651 .mem.offset 0,0; st8.spill [r2]=r20,16;
652 .mem.offset 8,0; st8.spill [r3]=r21,16 ;;
653 .mem.offset 0,0; st8.spill [r2]=r22,16;
654 .mem.offset 8,0; st8.spill [r3]=r23,16 ;;
655 .mem.offset 0,0; st8.spill [r2]=r24,16;
656 .mem.offset 8,0; st8.spill [r3]=r25,16 ;;
657 .mem.offset 0,0; st8.spill [r2]=r26,16;
658 .mem.offset 8,0; st8.spill [r3]=r27,16 ;;
659 .mem.offset 0,0; st8.spill [r2]=r28,16;
660 .mem.offset 8,0; st8.spill [r3]=r29,16 ;;
661 .mem.offset 0,0; st8.spill [r2]=r30,16;
662 .mem.offset 8,0; st8.spill [r3]=r31,16 ;;
663 #ifdef HANDLE_AR_UNAT
664 // r16~r23 are preserved regsin bank0 regs, we need to restore them,
665 // r24~r31 are scratch regs, we don't need to handle NaT bit,
666 // because OS handler must assign it before access it
667 ld8 r16=[r2],16;
668 ld8 r17=[r3],16;;
669 ld8 r18=[r2],16;
670 ld8 r19=[r3],16;;
671 ld8 r20=[r2],16;
672 ld8 r21=[r3],16;;
673 ld8 r22=[r2],16;
674 ld8 r23=[r3],16;;
675 #endif
676 movl r31=XSI_IPSR;;
677 bsw.0 ;;
678 mov r24=ar.unat;
679 mov r2=r30; mov r3=r29;;
680 #ifdef HANDLE_AR_UNAT
681 mov ar.unat=r28;
682 #endif
683 adds r25=XSI_B1NATS_OFS-XSI_PSR_IC_OFS,r18 ;
684 adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
685 st8 [r25]=r24;
686 st4 [r20]=r0 ;;
687 mov pr=r31,-1 ;;
688 rfi
689 ;;
691 // reflect access faults (0x2400,0x2800,0x5300) directly to domain
692 // r16 == isr
693 // r17 == ifa
694 // r19 == reflect number (only pass-thru to dispatch_reflection)
695 // r20 == offset into ivt
696 // r31 == pr
697 GLOBAL_ENTRY(fast_access_reflect)
698 #ifndef FAST_ACCESS_REFLECT // see beginning of file
699 br.spnt.few dispatch_reflection ;;
700 #endif
701 mov r30=cr.ipsr;;
702 mov r29=cr.iip;;
703 extr.u r21=r30,IA64_PSR_BE_BIT,1 ;;
704 cmp.ne p7,p0=r21,r0
705 (p7) br.spnt.few dispatch_reflection ;;
706 extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
707 cmp.ne p7,p0=r21,r0
708 (p7) br.spnt.few dispatch_reflection ;;
709 extr.u r21=r30,IA64_PSR_CPL0_BIT,2 ;;
710 cmp.eq p7,p0=r21,r0
711 (p7) br.spnt.few dispatch_reflection ;;
712 movl r18=XSI_PSR_IC;;
713 ld8 r21=[r18];;
714 cmp.eq p7,p0=r0,r21
715 (p7) br.spnt.few dispatch_reflection ;;
716 // set shared_mem ifa, FIXME: should we validate it?
717 mov r17=cr.ifa;;
718 adds r21=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;;
719 st8 [r21]=r17 ;;
720 // get rr[ifa] and save to itir in shared memory (extra bits ignored)
721 shr.u r22=r17,61
722 adds r23=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18
723 adds r21=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18 ;;
724 shladd r22=r22,3,r21;;
725 ld8 r22=[r22];;
726 st8 [r23]=r22;;
727 br.cond.sptk.many fast_reflect;;
729 // when we get to here, VHPT_CCHAIN_LOOKUP has failed and everything
730 // is as it was at the time of original miss. We want to preserve that
731 // so if we get a nested fault, we can just branch to page_fault
732 GLOBAL_ENTRY(fast_tlb_miss_reflect)
733 #ifndef FAST_TLB_MISS_REFLECT // see beginning of file
734 br.spnt.few page_fault ;;
735 #endif
736 mov r31=pr
737 mov r30=cr.ipsr
738 mov r29=cr.iip
739 mov r16=cr.isr
740 mov r17=cr.ifa;;
741 // for now, always take slow path for region 0 (e.g. metaphys mode)
742 extr.u r21=r17,61,3;;
743 cmp.eq p7,p0=r0,r21
744 (p7) br.spnt.few page_fault ;;
745 // always take slow path for PL0 (e.g. __copy_from_user)
746 extr.u r21=r30,IA64_PSR_CPL0_BIT,2 ;;
747 cmp.eq p7,p0=r21,r0
748 (p7) br.spnt.few page_fault ;;
749 // slow path if strange ipsr or isr bits set
750 extr.u r21=r30,IA64_PSR_BE_BIT,1 ;;
751 cmp.ne p7,p0=r21,r0
752 (p7) br.spnt.few page_fault ;;
753 extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
754 cmp.ne p7,p0=r21,r0
755 (p7) br.spnt.few page_fault ;;
756 movl r21=IA64_ISR_IR|IA64_ISR_SP|IA64_ISR_NA ;;
757 and r21=r16,r21;;
758 cmp.ne p7,p0=r0,r21
759 (p7) br.spnt.few page_fault ;;
760 // also take slow path if virtual psr.ic=0
761 movl r18=XSI_PSR_IC;;
762 ld4 r21=[r18];;
763 cmp.eq p7,p0=r0,r21
764 (p7) br.spnt.few page_fault ;;
765 // OK, if we get to here, we are doing a fast vcpu_translate. Need to:
766 // 1) look in the virtual TR's (pinned), if not there
767 // 2) look in the 1-entry TLB (pinned), if not there
768 // 3) check the domain VHPT (NOT pinned, accesses domain memory!)
769 // If we find it in any of these places, we need to effectively do
770 // a hyper_itc_i/d
772 // short-term hack for now, if in region 5-7, take slow path
773 // since all Linux TRs are in region 5 or 7, we need not check TRs
774 extr.u r21=r17,61,3;;
775 cmp.le p7,p0=5,r21
776 (p7) br.spnt.few page_fault ;;
777 fast_tlb_no_tr_match:
778 movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
779 ld8 r27=[r27];;
780 tbit.nz p6,p7=r16,IA64_ISR_X_BIT;;
781 (p6) adds r25=IA64_VCPU_ITLB_OFFSET,r27;;
782 (p7) adds r25=IA64_VCPU_DTLB_OFFSET,r27;;
783 ld8 r20=[r25],8;;
784 tbit.z p7,p0=r20,0;; // present?
785 (p7) br.cond.spnt.few 1f;;
786 // if ifa is in range of tlb, don't bother to check rid, go slow path
787 ld8 r21=[r25],8;;
788 mov r23=1
789 extr.u r21=r21,2,6;;
790 shl r22=r23,r21
791 ld8 r21=[r25],8;;
792 cmp.ltu p7,p0=r17,r21
793 (p7) br.cond.sptk.many 1f;
794 add r21=r22,r21;;
795 cmp.ltu p7,p0=r17,r21
796 (p7) br.cond.spnt.few page_fault;;
798 1: // check the guest VHPT
799 adds r19 = XSI_PTA_OFS-XSI_PSR_IC_OFS, r18;;
800 ld8 r19=[r19];;
801 tbit.nz p7,p0=r19,IA64_PTA_VF_BIT;; // long format VHPT
802 (p7) br.cond.spnt.few page_fault;;
803 // if (!rr.ve || !(pta & IA64_PTA_VE)) take slow way for now
804 // FIXME: later, we deliver an alt_d/i vector after thash and itir
805 tbit.z p7,p0=r19,IA64_PTA_VE_BIT;; //
806 (p7) br.cond.spnt.few page_fault;;
807 extr.u r25=r17,61,3;;
808 adds r21=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18 ;;
809 shl r25=r25,3;;
810 add r21=r21,r25;;
811 ld8 r22=[r21];;
812 tbit.z p7,p0=r22,0
813 (p7) br.cond.spnt.few page_fault;;
815 // compute and save away itir (r22 & RR_PS_MASK)
816 movl r21=0xfc;;
817 and r22=r22,r21;;
818 adds r21=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18 ;;
819 st8 [r21]=r22;;
821 // save away ifa
822 adds r21=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;;
823 st8 [r21]=r17;;
824 // see vcpu_thash to save away iha
825 shr.u r20 = r17, 61
826 addl r25 = 1, r0
827 movl r30 = 0xe000000000000000
828 ;;
829 and r21 = r30, r17 // VHPT_Addr1
830 ;;
831 shladd r28 = r20, 3, r18
832 adds r19 = XSI_PTA_OFS-XSI_PSR_IC_OFS, r18
833 ;;
834 adds r27 = XSI_RR0_OFS-XSI_PSR_IC_OFS, r28
835 addl r28 = 32767, r0
836 ld8 r24 = [r19] // pta
837 ;;
838 ld8 r23 = [r27] // rrs[vadr>>61]
839 extr.u r26 = r24, 2, 6
840 ;;
841 extr.u r22 = r23, 2, 6
842 shl r30 = r25, r26
843 ;;
844 shr.u r19 = r17, r22
845 shr.u r29 = r24, 15
846 ;;
847 adds r30 = -1, r30
848 ;;
849 shladd r27 = r19, 3, r0
850 extr.u r26 = r30, 15, 46
851 ;;
852 andcm r24 = r29, r26
853 and r19 = r28, r27
854 shr.u r25 = r27, 15
855 ;;
856 and r23 = r26, r25
857 ;;
858 or r22 = r24, r23
859 ;;
860 dep.z r20 = r22, 15, 46
861 ;;
862 or r30 = r20, r21
863 ;;
864 //or r8 = r19, r30
865 or r19 = r19, r30
866 ;;
867 adds r23=XSI_IHA_OFS-XSI_PSR_IC_OFS,r18 ;;
868 st8 [r23]=r19;;
869 // done with thash, check guest VHPT
871 adds r20 = XSI_PTA_OFS-XSI_PSR_IC_OFS, r18;;
872 ld8 r24 = [r20];; // pta
873 // avoid recursively walking the VHPT
874 // if (((r17=address ^ r24=pta) & ((itir_mask(pta) << 3) >> 3)) != 0) {
875 mov r20=-8
876 xor r21=r17,r24
877 extr.u r24=r24,2,6;;
878 shl r20=r20,r24;;
879 shr.u r20=r20,3;;
880 and r21=r20,r21;;
881 cmp.eq p7,p0=r21,r0
882 (p7) br.cond.spnt.few 1f;;
883 // __copy_from_user(&pte, r19=(void *)(*iha), sizeof(pte)=8)
884 // prepare for possible nested dtlb fault
885 mov r29=b0
886 movl r30=guest_vhpt_miss;;
887 // now go fetch the entry from the guest VHPT
888 ld8 r20=[r19];;
889 // if we wind up here, we successfully loaded the VHPT entry
891 // this VHPT walker aborts on non-present pages instead
892 // of inserting a not-present translation, this allows
893 // vectoring directly to the miss handler
894 tbit.z p7,p0=r20,0
895 (p7) br.cond.spnt.few page_not_present;;
897 #ifdef FAST_REFLECT_CNT
898 movl r21=fast_vhpt_translate_count;;
899 ld8 r22=[r21];;
900 adds r22=1,r22;;
901 st8 [r21]=r22;;
902 #endif
904 // prepare for fast_insert(PSCB(ifa),PSCB(itir),r16=pte)
905 // r16 == pte
906 // r17 == bit0: 1=inst, 0=data; bit1: 1=itc, 0=vcpu_translate
907 // r18 == XSI_PSR_IC_OFS
908 // r24 == ps
909 // r29 == saved value of b0 in case of recovery
910 // r30 == recovery ip if failure occurs
911 // r31 == pr
912 tbit.nz p6,p7=r16,IA64_ISR_X_BIT;;
913 (p6) mov r17=1;;
914 (p7) mov r17=0;;
915 mov r16=r20
916 mov r29=b0 ;;
917 movl r30=recover_and_page_fault ;;
918 adds r21=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18 ;;
919 ld8 r24=[r21];;
920 extr.u r24=r24,2,6;;
921 // IFA already in PSCB
922 br.cond.sptk.many fast_insert;;
924 // we get here if fast_insert fails (e.g. due to metaphysical lookup)
925 ENTRY(recover_and_page_fault)
926 #ifdef FAST_REFLECT_CNT
927 movl r21=recover_to_page_fault_count;;
928 ld8 r22=[r21];;
929 adds r22=1,r22;;
930 st8 [r21]=r22;;
931 #endif
932 mov b0=r29;;
933 br.cond.sptk.many page_fault;;
935 // if we wind up here, we missed in guest VHPT so recover
936 // from nested dtlb fault and reflect a tlb fault to the guest
937 guest_vhpt_miss:
938 mov b0=r29;;
939 // fault = IA64_VHPT_FAULT
940 mov r20=r0
941 br.cond.sptk.many 1f;
943 // if we get to here, we are ready to reflect
944 // need to set up virtual ifa, iha, itir (fast_reflect handles
945 // virtual isr, iip, ipsr, ifs
946 // see vcpu_get_itir_on_fault: get ps,rid,(FIXME key) from rr[ifa]
947 page_not_present:
948 tbit.nz p6,p7=r16,IA64_ISR_X_BIT;;
949 (p6) movl r20=0x400;;
950 (p7) movl r20=0x800;;
952 1: extr.u r25=r17,61,3;;
953 adds r21=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18 ;;
954 shl r25=r25,3;;
955 add r21=r21,r25;;
956 ld8 r22=[r21];;
957 extr.u r22=r22,2,30;;
958 dep.z r22=r22,2,30;;
959 adds r23=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18 ;;
960 st8 [r23]=r22;;
962 // fast reflect expects
963 // r16 == cr.isr
964 // r18 == XSI_PSR_IC
965 // r20 == offset into ivt
966 // r29 == iip
967 // r30 == ipsr
968 // r31 == pr
969 //mov r16=cr.isr
970 mov r29=cr.iip
971 mov r30=cr.ipsr
972 br.sptk.many fast_reflect;;
973 END(fast_tlb_miss_reflect)
975 // ensure that, if giving up, registers at entry to fast_hyperprivop unchanged
976 ENTRY(hyper_rfi)
977 #ifndef FAST_RFI
978 br.spnt.few dispatch_break_fault ;;
979 #endif
980 // if no interrupts pending, proceed
981 mov r30=r0
982 cmp.eq p7,p0=r20,r0
983 (p7) br.sptk.many 1f
984 ;;
985 adds r20=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;;
986 ld8 r21=[r20];; // r21 = vcr.ipsr
987 extr.u r22=r21,IA64_PSR_I_BIT,1 ;;
988 mov r30=r22
989 // r30 determines whether we might deliver an immediate extint
990 1:
991 adds r20=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;;
992 ld8 r21=[r20];; // r21 = vcr.ipsr
993 extr.u r22=r21,IA64_PSR_BE_BIT,1 ;;
994 // if turning on psr.be, give up for now and do it the slow way
995 cmp.ne p7,p0=r22,r0
996 (p7) br.spnt.few dispatch_break_fault ;;
997 // if (!(vpsr.dt && vpsr.rt && vpsr.it)), do it the slow way
998 movl r20=(IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_IT);;
999 and r22=r20,r21
1000 ;;
1001 cmp.ne p7,p0=r22,r20
1002 (p7) br.spnt.few dispatch_break_fault ;;
1003 // if was in metaphys mode, do it the slow way (FIXME later?)
1004 adds r20=XSI_METAPHYS_OFS-XSI_PSR_IC_OFS,r18 ;;
1005 ld4 r20=[r20];;
1006 cmp.ne p7,p0=r20,r0
1007 (p7) br.spnt.few dispatch_break_fault ;;
1008 // if domain hasn't already done virtual bank switch
1009 // do it the slow way (FIXME later?)
1010 #if 0
1011 adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
1012 ld4 r20=[r20];;
1013 cmp.eq p7,p0=r20,r0
1014 (p7) br.spnt.few dispatch_break_fault ;;
1015 #endif
1016 // validate vcr.iip, if in Xen range, do it the slow way
1017 adds r20=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;;
1018 ld8 r22=[r20];;
1019 movl r23=XEN_VIRT_SPACE_LOW
1020 movl r24=XEN_VIRT_SPACE_HIGH ;;
1021 cmp.ltu p0,p7=r22,r23 ;; // if !(iip<low) &&
1022 (p7) cmp.geu p0,p7=r22,r24 ;; // !(iip>=high)
1023 (p7) br.spnt.few dispatch_break_fault ;;
1024 #ifndef RFI_TO_INTERRUPT // see beginning of file
1025 cmp.ne p6,p0=r30,r0
1026 (p6) br.cond.spnt.few dispatch_break_fault ;;
1027 #endif
1029 1: // OK now, let's do an rfi.
1030 #ifdef FAST_HYPERPRIVOP_CNT
1031 movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_RFI);;
1032 ld8 r23=[r20];;
1033 adds r23=1,r23;;
1034 st8 [r20]=r23;;
1035 #endif
1036 #ifdef RFI_TO_INTERRUPT
1037 // maybe do an immediate interrupt delivery?
1038 cmp.ne p6,p0=r30,r0
1039 (p6) br.cond.spnt.few rfi_check_extint;;
1040 #endif
1042 just_do_rfi:
1043 // r18=&vpsr.i|vpsr.ic, r21==vpsr, r22=vcr.iip
1044 mov cr.iip=r22;;
1045 adds r20=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;;
1046 st4 [r20]=r0 ;;
1047 adds r20=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
1048 ld8 r20=[r20];;
1049 dep r20=0,r20,38,25;; // ensure ifs has no reserved bits set
1050 mov cr.ifs=r20 ;;
1051 // ipsr.cpl == (vcr.ipsr.cpl == 0) 2 : 3;
1052 dep r21=-1,r21,IA64_PSR_CPL1_BIT,1 ;;
1053 // vpsr.i = vcr.ipsr.i; vpsr.ic = vcr.ipsr.ic
1054 mov r19=r0 ;;
1055 extr.u r23=r21,IA64_PSR_I_BIT,1 ;;
1056 cmp.ne p7,p6=r23,r0 ;;
1057 // not done yet
1058 (p7) dep r19=-1,r19,32,1
1059 extr.u r23=r21,IA64_PSR_IC_BIT,1 ;;
1060 cmp.ne p7,p6=r23,r0 ;;
1061 (p7) dep r19=-1,r19,0,1 ;;
1062 st8 [r18]=r19 ;;
1063 // force on psr.ic, i, dt, rt, it, bn
1064 movl r20=(IA64_PSR_I|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_IT|IA64_PSR_BN)
1065 ;;
1066 or r21=r21,r20
1067 ;;
1068 mov cr.ipsr=r21
1069 adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
1070 ld4 r21=[r20];;
1071 cmp.ne p7,p0=r21,r0 // domain already did "bank 1 switch?"
1072 (p7) br.cond.spnt.few 1f;
1073 // OK, now all set to go except for switch to virtual bank1
1074 mov r22=1;; st4 [r20]=r22;
1075 mov r30=r2; mov r29=r3;;
1076 mov r17=ar.unat;;
1077 adds r16=XSI_B1NATS_OFS-XSI_PSR_IC_OFS,r18
1078 adds r2=XSI_BANK1_OFS-XSI_PSR_IC_OFS,r18;
1079 adds r3=(XSI_BANK1_OFS+8)-XSI_PSR_IC_OFS,r18;;
1080 ld8 r16=[r16];;
1081 mov ar.unat=r16;;
1082 bsw.1;;
1083 // FIXME?: ar.unat is not really handled correctly,
1084 // but may not matter if the OS is NaT-clean
1085 .mem.offset 0,0; ld8.fill r16=[r2],16 ;
1086 .mem.offset 8,0; ld8.fill r17=[r3],16 ;;
1087 .mem.offset 0,0; ld8.fill r18=[r2],16 ;
1088 .mem.offset 0,0; ld8.fill r19=[r3],16 ;;
1089 .mem.offset 8,0; ld8.fill r20=[r2],16 ;
1090 .mem.offset 8,0; ld8.fill r21=[r3],16 ;;
1091 .mem.offset 8,0; ld8.fill r22=[r2],16 ;
1092 .mem.offset 8,0; ld8.fill r23=[r3],16 ;;
1093 .mem.offset 8,0; ld8.fill r24=[r2],16 ;
1094 .mem.offset 8,0; ld8.fill r25=[r3],16 ;;
1095 .mem.offset 8,0; ld8.fill r26=[r2],16 ;
1096 .mem.offset 8,0; ld8.fill r27=[r3],16 ;;
1097 .mem.offset 8,0; ld8.fill r28=[r2],16 ;
1098 .mem.offset 8,0; ld8.fill r29=[r3],16 ;;
1099 .mem.offset 8,0; ld8.fill r30=[r2],16 ;
1100 .mem.offset 8,0; ld8.fill r31=[r3],16 ;;
1101 bsw.0 ;;
1102 mov ar.unat=r17;;
1103 mov r2=r30; mov r3=r29;;
1104 1: mov pr=r31,-1
1105 ;;
1106 rfi
1107 ;;
1109 #ifdef RFI_TO_INTERRUPT
1110 GLOBAL_ENTRY(rfi_check_extint)
1111 //br.sptk.many dispatch_break_fault ;;
1113 // r18=&vpsr.i|vpsr.ic, r21==vpsr, r22=vcr.iip
1114 // make sure none of these get trashed in case going to just_do_rfi
1115 movl r30=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
1116 ld8 r30=[r30];;
1117 adds r24=IA64_VCPU_INSVC3_OFFSET,r30;;
1118 mov r25=192
1119 adds r16=IA64_VCPU_IRR3_OFFSET,r30;;
1120 ld8 r23=[r16];;
1121 cmp.eq p6,p0=r23,r0;;
1122 (p6) adds r16=-8,r16;;
1123 (p6) adds r24=-8,r24;;
1124 (p6) adds r25=-64,r25;;
1125 (p6) ld8 r23=[r16];;
1126 (p6) cmp.eq p6,p0=r23,r0;;
1127 (p6) adds r16=-8,r16;;
1128 (p6) adds r24=-8,r24;;
1129 (p6) adds r25=-64,r25;;
1130 (p6) ld8 r23=[r16];;
1131 (p6) cmp.eq p6,p0=r23,r0;;
1132 (p6) adds r16=-8,r16;;
1133 (p6) adds r24=-8,r24;;
1134 (p6) adds r25=-64,r25;;
1135 (p6) ld8 r23=[r16];;
1136 (p6) cmp.eq p6,p0=r23,r0;;
1137 cmp.eq p6,p0=r23,r0
1138 (p6) br.cond.spnt.few just_do_rfi; // this is actually an error
1139 // r16 points to non-zero element of irr, r23 has value
1140 // r24 points to corr element of insvc, r25 has elt*64
1141 ld8 r26=[r24];;
1142 cmp.geu p6,p0=r26,r23
1143 (p6) br.cond.spnt.many just_do_rfi;
1145 // not masked by insvc, get vector number
1146 shr.u r26=r23,1;;
1147 or r26=r23,r26;;
1148 shr.u r27=r26,2;;
1149 or r26=r26,r27;;
1150 shr.u r27=r26,4;;
1151 or r26=r26,r27;;
1152 shr.u r27=r26,8;;
1153 or r26=r26,r27;;
1154 shr.u r27=r26,16;;
1155 or r26=r26,r27;;
1156 shr.u r27=r26,32;;
1157 or r26=r26,r27;;
1158 andcm r26=0xffffffffffffffff,r26;;
1159 popcnt r26=r26;;
1160 sub r26=63,r26;;
1161 // r26 now contains the bit index (mod 64)
1162 mov r27=1;;
1163 shl r27=r27,r26;;
1164 // r27 now contains the (within the proper word) bit mask
1165 add r26=r25,r26
1166 // r26 now contains the vector [0..255]
1167 adds r20=XSI_TPR_OFS-XSI_PSR_IC_OFS,r18 ;;
1168 ld8 r20=[r20] ;;
1169 extr.u r28=r20,16,1
1170 extr.u r29=r20,4,4 ;;
1171 cmp.ne p6,p0=r28,r0 // if tpr.mmi is set, just rfi
1172 (p6) br.cond.spnt.few just_do_rfi;;
1173 shl r29=r29,4;;
1174 adds r29=15,r29;;
1175 cmp.ge p6,p0=r29,r26 // if tpr masks interrupt, just rfi
1176 (p6) br.cond.spnt.few just_do_rfi;;
1178 // this doesn't work yet (dies early after getting to user mode)
1179 // but happens relatively infrequently, so fix it later.
1180 // NOTE that these will be counted incorrectly for now (for privcnt output)
1181 GLOBAL_ENTRY(rfi_with_interrupt)
1182 #if 1
1183 br.sptk.many dispatch_break_fault ;;
1184 #endif
1186 // OK, have an unmasked vector, so deliver extint to vcr.iva+0x3000
1187 // r18 == XSI_PSR_IC
1188 // r21 == vipsr (ipsr in shared_mem)
1189 // r30 == IA64_KR(CURRENT)
1190 // r31 == pr
1191 mov r17=cr.ipsr;;
1192 mov r16=cr.isr;;
1193 // set shared_mem isr
1194 extr.u r16=r16,38,1;; // grab cr.isr.ir bit
1195 dep r16=r16,r0,38,1 ;; // insert into cr.isr (rest of bits zero)
1196 extr.u r20=r21,41,2 ;; // get v(!)psr.ri
1197 dep r16=r20,r16,41,2 ;; // deposit cr.isr.ei
1198 adds r22=XSI_ISR_OFS-XSI_PSR_IC_OFS,r18 ;;
1199 st8 [r22]=r16 ;;
1200 // set cr.ipsr (make sure cpl==2!)
1201 mov r29=r17 ;;
1202 movl r28=DELIVER_PSR_SET;;
1203 movl r27=~(DELIVER_PSR_CLR|IA64_PSR_CPL0);;
1204 or r29=r29,r28;;
1205 and r29=r29,r27;;
1206 mov cr.ipsr=r29;;
1207 // v.ipsr and v.iip are already set (and v.iip validated) as rfi target
1208 // set shared_mem interrupt_delivery_enabled to 0
1209 // set shared_mem interrupt_collection_enabled to 0
1210 st8 [r18]=r0;;
1211 // cover and set shared_mem precover_ifs to cr.ifs
1212 // set shared_mem ifs and incomplete_regframe to 0
1213 #if 0
1214 cover ;;
1215 mov r20=cr.ifs;;
1216 adds r22=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;;
1217 st4 [r22]=r0 ;;
1218 adds r22=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
1219 st8 [r22]=r0 ;;
1220 adds r22=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
1221 st8 [r22]=r20 ;;
1222 // leave cr.ifs alone for later rfi
1223 #else
1224 adds r22=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;;
1225 st4 [r22]=r0 ;;
1226 adds r22=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
1227 ld8 r20=[r22];;
1228 st8 [r22]=r0 ;;
1229 adds r22=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
1230 st8 [r22]=r20 ;;
1231 #endif
1232 // set iip to go to domain IVA break instruction vector
1233 adds r22=IA64_VCPU_IVA_OFFSET,r30;;
1234 ld8 r23=[r22];;
1235 movl r24=0x3000;;
1236 add r24=r24,r23;;
1237 mov cr.iip=r24;;
1238 #if 0
1239 // OK, now all set to go except for switch to virtual bank0
1240 mov r30=r2; mov r29=r3;;
1241 adds r2=XSI_BANK1_OFS-XSI_PSR_IC_OFS,r18;
1242 adds r3=(XSI_BANK1_OFS+8)-XSI_PSR_IC_OFS,r18;;
1243 bsw.1;;
1244 // FIXME: need to handle ar.unat!
1245 .mem.offset 0,0; st8.spill [r2]=r16,16;
1246 .mem.offset 8,0; st8.spill [r3]=r17,16 ;;
1247 .mem.offset 0,0; st8.spill [r2]=r18,16;
1248 .mem.offset 8,0; st8.spill [r3]=r19,16 ;;
1249 .mem.offset 0,0; st8.spill [r2]=r20,16;
1250 .mem.offset 8,0; st8.spill [r3]=r21,16 ;;
1251 .mem.offset 0,0; st8.spill [r2]=r22,16;
1252 .mem.offset 8,0; st8.spill [r3]=r23,16 ;;
1253 .mem.offset 0,0; st8.spill [r2]=r24,16;
1254 .mem.offset 8,0; st8.spill [r3]=r25,16 ;;
1255 .mem.offset 0,0; st8.spill [r2]=r26,16;
1256 .mem.offset 8,0; st8.spill [r3]=r27,16 ;;
1257 .mem.offset 0,0; st8.spill [r2]=r28,16;
1258 .mem.offset 8,0; st8.spill [r3]=r29,16 ;;
1259 .mem.offset 0,0; st8.spill [r2]=r30,16;
1260 .mem.offset 8,0; st8.spill [r3]=r31,16 ;;
1261 movl r31=XSI_IPSR;;
1262 bsw.0 ;;
1263 mov r2=r30; mov r3=r29;;
1264 #else
1265 bsw.1;;
1266 movl r31=XSI_IPSR;;
1267 bsw.0 ;;
1268 #endif
1269 adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
1270 st4 [r20]=r0 ;;
1271 mov pr=r31,-1 ;;
1272 rfi
1273 #endif // RFI_TO_INTERRUPT
1275 ENTRY(hyper_cover)
1276 #ifdef FAST_HYPERPRIVOP_CNT
1277 movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_COVER);;
1278 ld8 r21=[r20];;
1279 adds r21=1,r21;;
1280 st8 [r20]=r21;;
1281 #endif
1282 mov r24=cr.ipsr
1283 mov r25=cr.iip;;
1284 // skip test for vpsr.ic.. it's a prerequisite for hyperprivops
1285 cover ;;
1286 adds r20=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;;
1287 mov r30=cr.ifs;;
1288 adds r22=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18
1289 ld4 r21=[r20] ;;
1290 cmp.eq p6,p7=r21,r0 ;;
1291 (p6) st8 [r22]=r30;;
1292 (p7) st4 [r20]=r0;;
1293 mov cr.ifs=r0;;
1294 // adjust return address to skip over break instruction
1295 extr.u r26=r24,41,2 ;;
1296 cmp.eq p6,p7=2,r26 ;;
1297 (p6) mov r26=0
1298 (p6) adds r25=16,r25
1299 (p7) adds r26=1,r26
1300 ;;
1301 dep r24=r26,r24,41,2
1302 ;;
1303 mov cr.ipsr=r24
1304 mov cr.iip=r25
1305 mov pr=r31,-1 ;;
1306 rfi
1307 ;;
1309 // return from metaphysical mode (meta=1) to virtual mode (meta=0)
1310 ENTRY(hyper_ssm_dt)
1311 #ifdef FAST_HYPERPRIVOP_CNT
1312 movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SSM_DT);;
1313 ld8 r21=[r20];;
1314 adds r21=1,r21;;
1315 st8 [r20]=r21;;
1316 #endif
1317 mov r24=cr.ipsr
1318 mov r25=cr.iip;;
1319 adds r20=XSI_METAPHYS_OFS-XSI_PSR_IC_OFS,r18 ;;
1320 ld4 r21=[r20];;
1321 cmp.eq p7,p0=r21,r0 // meta==0?
1322 (p7) br.spnt.many 1f ;; // already in virtual mode
1323 movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
1324 ld8 r22=[r22];;
1325 adds r22=IA64_VCPU_META_SAVED_RR0_OFFSET,r22;;
1326 ld4 r23=[r22];;
1327 mov rr[r0]=r23;;
1328 srlz.i;;
1329 st4 [r20]=r0 ;;
1330 // adjust return address to skip over break instruction
1331 1: extr.u r26=r24,41,2 ;;
1332 cmp.eq p6,p7=2,r26 ;;
1333 (p6) mov r26=0
1334 (p6) adds r25=16,r25
1335 (p7) adds r26=1,r26
1336 ;;
1337 dep r24=r26,r24,41,2
1338 ;;
1339 mov cr.ipsr=r24
1340 mov cr.iip=r25
1341 mov pr=r31,-1 ;;
1342 rfi
1343 ;;
1345 // go to metaphysical mode (meta=1) from virtual mode (meta=0)
1346 ENTRY(hyper_rsm_dt)
1347 #ifdef FAST_HYPERPRIVOP_CNT
1348 movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_RSM_DT);;
1349 ld8 r21=[r20];;
1350 adds r21=1,r21;;
1351 st8 [r20]=r21;;
1352 #endif
1353 mov r24=cr.ipsr
1354 mov r25=cr.iip;;
1355 adds r20=XSI_METAPHYS_OFS-XSI_PSR_IC_OFS,r18 ;;
1356 ld4 r21=[r20];;
1357 cmp.ne p7,p0=r21,r0 // meta==0?
1358 (p7) br.spnt.many 1f ;; // already in metaphysical mode
1359 movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
1360 ld8 r22=[r22];;
1361 adds r22=IA64_VCPU_META_RR0_OFFSET,r22;;
1362 ld4 r23=[r22];;
1363 mov rr[r0]=r23;;
1364 srlz.i;;
1365 adds r21=1,r0 ;;
1366 st4 [r20]=r21 ;;
1367 // adjust return address to skip over break instruction
1368 1: extr.u r26=r24,41,2 ;;
1369 cmp.eq p6,p7=2,r26 ;;
1370 (p6) mov r26=0
1371 (p6) adds r25=16,r25
1372 (p7) adds r26=1,r26
1373 ;;
1374 dep r24=r26,r24,41,2
1375 ;;
1376 mov cr.ipsr=r24
1377 mov cr.iip=r25
1378 mov pr=r31,-1 ;;
1379 rfi
1380 ;;
1382 ENTRY(hyper_get_tpr)
1383 #ifdef FAST_HYPERPRIVOP_CNT
1384 movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_GET_TPR);;
1385 ld8 r21=[r20];;
1386 adds r21=1,r21;;
1387 st8 [r20]=r21;;
1388 #endif
1389 mov r24=cr.ipsr
1390 mov r25=cr.iip;;
1391 adds r20=XSI_TPR_OFS-XSI_PSR_IC_OFS,r18 ;;
1392 ld8 r8=[r20];;
1393 extr.u r26=r24,41,2 ;;
1394 cmp.eq p6,p7=2,r26 ;;
1395 (p6) mov r26=0
1396 (p6) adds r25=16,r25
1397 (p7) adds r26=1,r26
1398 ;;
1399 dep r24=r26,r24,41,2
1400 ;;
1401 mov cr.ipsr=r24
1402 mov cr.iip=r25
1403 mov pr=r31,-1 ;;
1404 rfi
1405 ;;
1406 END(hyper_get_tpr)
1408 // if we get to here, there are no interrupts pending so we
1409 // can change virtual tpr to any value without fear of provoking
1410 // (or accidentally missing) delivering an interrupt
1411 ENTRY(hyper_set_tpr)
1412 #ifdef FAST_HYPERPRIVOP_CNT
1413 movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SET_TPR);;
1414 ld8 r21=[r20];;
1415 adds r21=1,r21;;
1416 st8 [r20]=r21;;
1417 #endif
1418 mov r24=cr.ipsr
1419 mov r25=cr.iip;;
1420 movl r27=0xff00;;
1421 adds r20=XSI_TPR_OFS-XSI_PSR_IC_OFS,r18 ;;
1422 andcm r8=r8,r27;;
1423 st8 [r20]=r8;;
1424 extr.u r26=r24,41,2 ;;
1425 cmp.eq p6,p7=2,r26 ;;
1426 (p6) mov r26=0
1427 (p6) adds r25=16,r25
1428 (p7) adds r26=1,r26
1429 ;;
1430 dep r24=r26,r24,41,2
1431 ;;
1432 mov cr.ipsr=r24
1433 mov cr.iip=r25
1434 mov pr=r31,-1 ;;
1435 rfi
1436 ;;
1437 END(hyper_set_tpr)
1439 ENTRY(hyper_get_ivr)
1440 #ifdef FAST_HYPERPRIVOP_CNT
1441 movl r22=fast_hyperpriv_cnt+(8*XEN_HYPER_GET_IVR);;
1442 ld8 r21=[r22];;
1443 adds r21=1,r21;;
1444 st8 [r22]=r21;;
1445 #endif
1446 mov r8=15;;
1447 // when we get to here r20=~=interrupts pending
1448 cmp.eq p7,p0=r20,r0;;
1449 (p7) adds r20=XSI_PEND_OFS-XSI_PSR_IC_OFS,r18 ;;
1450 (p7) st4 [r20]=r0;;
1451 (p7) br.spnt.many 1f ;;
1452 movl r30=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
1453 ld8 r30=[r30];;
1454 adds r24=IA64_VCPU_INSVC3_OFFSET,r30;;
1455 mov r25=192
1456 adds r22=IA64_VCPU_IRR3_OFFSET,r30;;
1457 ld8 r23=[r22];;
1458 cmp.eq p6,p0=r23,r0;;
1459 (p6) adds r22=-8,r22;;
1460 (p6) adds r24=-8,r24;;
1461 (p6) adds r25=-64,r25;;
1462 (p6) ld8 r23=[r22];;
1463 (p6) cmp.eq p6,p0=r23,r0;;
1464 (p6) adds r22=-8,r22;;
1465 (p6) adds r24=-8,r24;;
1466 (p6) adds r25=-64,r25;;
1467 (p6) ld8 r23=[r22];;
1468 (p6) cmp.eq p6,p0=r23,r0;;
1469 (p6) adds r22=-8,r22;;
1470 (p6) adds r24=-8,r24;;
1471 (p6) adds r25=-64,r25;;
1472 (p6) ld8 r23=[r22];;
1473 (p6) cmp.eq p6,p0=r23,r0;;
1474 cmp.eq p6,p0=r23,r0
1475 (p6) br.cond.spnt.few 1f; // this is actually an error
1476 // r22 points to non-zero element of irr, r23 has value
1477 // r24 points to corr element of insvc, r25 has elt*64
1478 ld8 r26=[r24];;
1479 cmp.geu p6,p0=r26,r23
1480 (p6) br.cond.spnt.many 1f;
1481 // not masked by insvc, get vector number
1482 shr.u r26=r23,1;;
1483 or r26=r23,r26;;
1484 shr.u r27=r26,2;;
1485 or r26=r26,r27;;
1486 shr.u r27=r26,4;;
1487 or r26=r26,r27;;
1488 shr.u r27=r26,8;;
1489 or r26=r26,r27;;
1490 shr.u r27=r26,16;;
1491 or r26=r26,r27;;
1492 shr.u r27=r26,32;;
1493 or r26=r26,r27;;
1494 andcm r26=0xffffffffffffffff,r26;;
1495 popcnt r26=r26;;
1496 sub r26=63,r26;;
1497 // r26 now contains the bit index (mod 64)
1498 mov r27=1;;
1499 shl r27=r27,r26;;
1500 // r27 now contains the (within the proper word) bit mask
1501 add r26=r25,r26
1502 // r26 now contains the vector [0..255]
1503 adds r20=XSI_TPR_OFS-XSI_PSR_IC_OFS,r18 ;;
1504 ld8 r20=[r20] ;;
1505 extr.u r28=r20,16,1
1506 extr.u r29=r20,4,4 ;;
1507 cmp.ne p6,p0=r28,r0 // if tpr.mmi is set, return SPURIOUS
1508 (p6) br.cond.spnt.few 1f;
1509 shl r29=r29,4;;
1510 adds r29=15,r29;;
1511 cmp.ge p6,p0=r29,r26
1512 (p6) br.cond.spnt.few 1f;
1513 // OK, have an unmasked vector to process/return
1514 ld8 r25=[r24];;
1515 or r25=r25,r27;;
1516 st8 [r24]=r25;;
1517 ld8 r25=[r22];;
1518 andcm r25=r25,r27;;
1519 st8 [r22]=r25;;
1520 mov r8=r26;;
1521 // if its a clock tick, remember itm to avoid delivering it twice
1522 adds r20=XSI_ITV_OFS-XSI_PSR_IC_OFS,r18 ;;
1523 ld8 r20=[r20];;
1524 extr.u r20=r20,0,8;;
1525 cmp.eq p6,p0=r20,r8
1526 adds r22=IA64_VCPU_DOMAIN_ITM_LAST_OFFSET,r30
1527 adds r23=IA64_VCPU_DOMAIN_ITM_OFFSET,r30;;
1528 ld8 r23=[r23];;
1529 (p6) st8 [r22]=r23;;
1530 // all done
1531 1: mov r24=cr.ipsr
1532 mov r25=cr.iip;;
1533 extr.u r26=r24,41,2 ;;
1534 cmp.eq p6,p7=2,r26 ;;
1535 (p6) mov r26=0
1536 (p6) adds r25=16,r25
1537 (p7) adds r26=1,r26
1538 ;;
1539 dep r24=r26,r24,41,2
1540 ;;
1541 mov cr.ipsr=r24
1542 mov cr.iip=r25
1543 mov pr=r31,-1 ;;
1544 rfi
1545 ;;
1546 END(hyper_get_ivr)
1548 ENTRY(hyper_eoi)
1549 // when we get to here r20=~=interrupts pending
1550 cmp.ne p7,p0=r20,r0
1551 (p7) br.spnt.many dispatch_break_fault ;;
1552 #ifdef FAST_HYPERPRIVOP_CNT
1553 movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_EOI);;
1554 ld8 r21=[r20];;
1555 adds r21=1,r21;;
1556 st8 [r20]=r21;;
1557 #endif
1558 movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
1559 ld8 r22=[r22];;
1560 adds r22=IA64_VCPU_INSVC3_OFFSET,r22;;
1561 ld8 r23=[r22];;
1562 cmp.eq p6,p0=r23,r0;;
1563 (p6) adds r22=-8,r22;;
1564 (p6) ld8 r23=[r22];;
1565 (p6) cmp.eq p6,p0=r23,r0;;
1566 (p6) adds r22=-8,r22;;
1567 (p6) ld8 r23=[r22];;
1568 (p6) cmp.eq p6,p0=r23,r0;;
1569 (p6) adds r22=-8,r22;;
1570 (p6) ld8 r23=[r22];;
1571 (p6) cmp.eq p6,p0=r23,r0;;
1572 cmp.eq p6,p0=r23,r0
1573 (p6) br.cond.spnt.few 1f; // this is actually an error
1574 // r22 points to non-zero element of insvc, r23 has value
1575 shr.u r24=r23,1;;
1576 or r24=r23,r24;;
1577 shr.u r25=r24,2;;
1578 or r24=r24,r25;;
1579 shr.u r25=r24,4;;
1580 or r24=r24,r25;;
1581 shr.u r25=r24,8;;
1582 or r24=r24,r25;;
1583 shr.u r25=r24,16;;
1584 or r24=r24,r25;;
1585 shr.u r25=r24,32;;
1586 or r24=r24,r25;;
1587 andcm r24=0xffffffffffffffff,r24;;
1588 popcnt r24=r24;;
1589 sub r24=63,r24;;
1590 // r24 now contains the bit index
1591 mov r25=1;;
1592 shl r25=r25,r24;;
1593 andcm r23=r23,r25;;
1594 st8 [r22]=r23;;
1595 1: mov r24=cr.ipsr
1596 mov r25=cr.iip;;
1597 extr.u r26=r24,41,2 ;;
1598 cmp.eq p6,p7=2,r26 ;;
1599 (p6) mov r26=0
1600 (p6) adds r25=16,r25
1601 (p7) adds r26=1,r26
1602 ;;
1603 dep r24=r26,r24,41,2
1604 ;;
1605 mov cr.ipsr=r24
1606 mov cr.iip=r25
1607 mov pr=r31,-1 ;;
1608 rfi
1609 ;;
1610 END(hyper_eoi)
1612 ENTRY(hyper_set_itm)
1613 // when we get to here r20=~=interrupts pending
1614 cmp.ne p7,p0=r20,r0
1615 (p7) br.spnt.many dispatch_break_fault ;;
1616 #ifdef FAST_HYPERPRIVOP_CNT
1617 movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SET_ITM);;
1618 ld8 r21=[r20];;
1619 adds r21=1,r21;;
1620 st8 [r20]=r21;;
1621 #endif
1622 movl r20=THIS_CPU(cpu_info)+IA64_CPUINFO_ITM_NEXT_OFFSET;;
1623 ld8 r21=[r20];;
1624 movl r20=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
1625 ld8 r20=[r20];;
1626 adds r20=IA64_VCPU_DOMAIN_ITM_OFFSET,r20;;
1627 st8 [r20]=r8;;
1628 cmp.geu p6,p0=r21,r8;;
1629 (p6) mov r21=r8;;
1630 // now "safe set" cr.itm=r21
1631 mov r23=100;;
1632 2: mov cr.itm=r21;;
1633 srlz.d;;
1634 mov r22=ar.itc ;;
1635 cmp.leu p6,p0=r21,r22;;
1636 add r21=r21,r23;;
1637 shl r23=r23,1;;
1638 (p6) br.cond.spnt.few 2b;;
1639 1: mov r24=cr.ipsr
1640 mov r25=cr.iip;;
1641 extr.u r26=r24,41,2 ;;
1642 cmp.eq p6,p7=2,r26 ;;
1643 (p6) mov r26=0
1644 (p6) adds r25=16,r25
1645 (p7) adds r26=1,r26
1646 ;;
1647 dep r24=r26,r24,41,2
1648 ;;
1649 mov cr.ipsr=r24
1650 mov cr.iip=r25
1651 mov pr=r31,-1 ;;
1652 rfi
1653 ;;
1654 END(hyper_set_itm)
1656 ENTRY(hyper_get_rr)
1657 #ifdef FAST_HYPERPRIVOP_CNT
1658 movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_GET_RR);;
1659 ld8 r21=[r20];;
1660 adds r21=1,r21;;
1661 st8 [r20]=r21;;
1662 #endif
1663 extr.u r25=r8,61,3;;
1664 adds r20=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18 ;;
1665 shl r25=r25,3;;
1666 add r20=r20,r25;;
1667 ld8 r8=[r20];;
1668 1: mov r24=cr.ipsr
1669 mov r25=cr.iip;;
1670 extr.u r26=r24,41,2 ;;
1671 cmp.eq p6,p7=2,r26 ;;
1672 (p6) mov r26=0
1673 (p6) adds r25=16,r25
1674 (p7) adds r26=1,r26
1675 ;;
1676 dep r24=r26,r24,41,2
1677 ;;
1678 mov cr.ipsr=r24
1679 mov cr.iip=r25
1680 mov pr=r31,-1 ;;
1681 rfi
1682 ;;
1683 END(hyper_get_rr)
1685 ENTRY(hyper_set_rr)
1686 extr.u r25=r8,61,3;;
1687 cmp.leu p7,p0=7,r25 // punt on setting rr7
1688 (p7) br.spnt.many dispatch_break_fault ;;
1689 #ifdef FAST_HYPERPRIVOP_CNT
1690 movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SET_RR);;
1691 ld8 r21=[r20];;
1692 adds r21=1,r21;;
1693 st8 [r20]=r21;;
1694 #endif
1695 extr.u r26=r9,8,24 // r26 = r9.rid
1696 movl r20=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
1697 ld8 r20=[r20];;
1698 adds r21=IA64_VCPU_STARTING_RID_OFFSET,r20;;
1699 ld4 r22=[r21];;
1700 adds r21=IA64_VCPU_ENDING_RID_OFFSET,r20;;
1701 ld4 r23=[r21];;
1702 adds r24=IA64_VCPU_META_SAVED_RR0_OFFSET,r20;;
1703 add r22=r26,r22;;
1704 cmp.geu p6,p0=r22,r23 // if r9.rid + starting_rid >= ending_rid
1705 (p6) br.cond.spnt.few 1f; // this is an error, but just ignore/return
1706 // r21=starting_rid
1707 adds r20=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18 ;;
1708 shl r25=r25,3;;
1709 add r20=r20,r25;;
1710 st8 [r20]=r9;; // store away exactly what was passed
1711 // but adjust value actually placed in rr[r8]
1712 // r22 contains adjusted rid, "mangle" it (see regionreg.c)
1713 // and set ps to PAGE_SHIFT and ve to 1
1714 extr.u r27=r22,0,8
1715 extr.u r28=r22,8,8
1716 extr.u r29=r22,16,8;;
1717 dep.z r23=PAGE_SHIFT,2,6;;
1718 dep r23=-1,r23,0,1;; // mangling is swapping bytes 1 & 3
1719 dep r23=r27,r23,24,8;;
1720 dep r23=r28,r23,16,8;;
1721 dep r23=r29,r23,8,8
1722 cmp.eq p6,p0=r25,r0;; // if rr0, save for metaphysical
1723 (p6) st4 [r24]=r23
1724 mov rr[r8]=r23;;
1725 // done, mosey on back
1726 1: mov r24=cr.ipsr
1727 mov r25=cr.iip;;
1728 extr.u r26=r24,41,2 ;;
1729 cmp.eq p6,p7=2,r26 ;;
1730 (p6) mov r26=0
1731 (p6) adds r25=16,r25
1732 (p7) adds r26=1,r26
1733 ;;
1734 dep r24=r26,r24,41,2
1735 ;;
1736 mov cr.ipsr=r24
1737 mov cr.iip=r25
1738 mov pr=r31,-1 ;;
1739 rfi
1740 ;;
1741 END(hyper_set_rr)
1743 ENTRY(hyper_set_kr)
1744 extr.u r25=r8,3,61;;
1745 cmp.ne p7,p0=r0,r25 // if kr# > 7, go slow way
1746 (p7) br.spnt.many dispatch_break_fault ;;
1747 #ifdef FAST_HYPERPRIVOP_CNT
1748 movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SET_KR);;
1749 ld8 r21=[r20];;
1750 adds r21=1,r21;;
1751 st8 [r20]=r21;;
1752 #endif
1753 adds r21=XSI_KR0_OFS-XSI_PSR_IC_OFS,r18 ;;
1754 shl r20=r8,3;;
1755 add r22=r20,r21;;
1756 st8 [r22]=r9;;
1757 cmp.eq p7,p0=r8,r0
1758 adds r8=-1,r8;;
1759 (p7) mov ar0=r9;;
1760 cmp.eq p7,p0=r8,r0
1761 adds r8=-1,r8;;
1762 (p7) mov ar1=r9;;
1763 cmp.eq p7,p0=r8,r0
1764 adds r8=-1,r8;;
1765 (p7) mov ar2=r9;;
1766 cmp.eq p7,p0=r8,r0
1767 adds r8=-1,r8;;
1768 (p7) mov ar3=r9;;
1769 cmp.eq p7,p0=r8,r0
1770 adds r8=-1,r8;;
1771 (p7) mov ar4=r9;;
1772 cmp.eq p7,p0=r8,r0
1773 adds r8=-1,r8;;
1774 (p7) mov ar5=r9;;
1775 cmp.eq p7,p0=r8,r0
1776 adds r8=-1,r8;;
1777 (p7) mov ar6=r9;;
1778 cmp.eq p7,p0=r8,r0
1779 adds r8=-1,r8;;
1780 (p7) mov ar7=r9;;
1781 // done, mosey on back
1782 1: mov r24=cr.ipsr
1783 mov r25=cr.iip;;
1784 extr.u r26=r24,41,2 ;;
1785 cmp.eq p6,p7=2,r26 ;;
1786 (p6) mov r26=0
1787 (p6) adds r25=16,r25
1788 (p7) adds r26=1,r26
1789 ;;
1790 dep r24=r26,r24,41,2
1791 ;;
1792 mov cr.ipsr=r24
1793 mov cr.iip=r25
1794 mov pr=r31,-1 ;;
1795 rfi
1796 ;;
1797 END(hyper_set_kr)
1799 // this routine was derived from optimized assembly output from
1800 // vcpu_thash so it is dense and difficult to read but it works
1801 // On entry:
1802 // r18 == XSI_PSR_IC
1803 // r31 == pr
1804 GLOBAL_ENTRY(hyper_thash)
1805 #ifdef FAST_HYPERPRIVOP_CNT
1806 movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_THASH);;
1807 ld8 r21=[r20];;
1808 adds r21=1,r21;;
1809 st8 [r20]=r21;;
1810 #endif
1811 shr.u r20 = r8, 61
1812 addl r25 = 1, r0
1813 movl r17 = 0xe000000000000000
1814 ;;
1815 and r21 = r17, r8 // VHPT_Addr1
1816 ;;
1817 shladd r28 = r20, 3, r18
1818 adds r19 = XSI_PTA_OFS-XSI_PSR_IC_OFS, r18
1819 ;;
1820 adds r27 = XSI_RR0_OFS-XSI_PSR_IC_OFS, r28
1821 addl r28 = 32767, r0
1822 ld8 r24 = [r19] // pta
1823 ;;
1824 ld8 r23 = [r27] // rrs[vadr>>61]
1825 extr.u r26 = r24, 2, 6
1826 ;;
1827 extr.u r22 = r23, 2, 6
1828 shl r30 = r25, r26
1829 ;;
1830 shr.u r19 = r8, r22
1831 shr.u r29 = r24, 15
1832 ;;
1833 adds r17 = -1, r30
1834 ;;
1835 shladd r27 = r19, 3, r0
1836 extr.u r26 = r17, 15, 46
1837 ;;
1838 andcm r24 = r29, r26
1839 and r19 = r28, r27
1840 shr.u r25 = r27, 15
1841 ;;
1842 and r23 = r26, r25
1843 ;;
1844 or r22 = r24, r23
1845 ;;
1846 dep.z r20 = r22, 15, 46
1847 ;;
1848 or r16 = r20, r21
1849 ;;
1850 or r8 = r19, r16
1851 // done, update iip/ipsr to next instruction
1852 mov r24=cr.ipsr
1853 mov r25=cr.iip;;
1854 extr.u r26=r24,41,2 ;;
1855 cmp.eq p6,p7=2,r26 ;;
1856 (p6) mov r26=0
1857 (p6) adds r25=16,r25
1858 (p7) adds r26=1,r26
1859 ;;
1860 dep r24=r26,r24,41,2
1861 ;;
1862 mov cr.ipsr=r24
1863 mov cr.iip=r25
1864 mov pr=r31,-1 ;;
1865 rfi
1866 ;;
1867 END(hyper_thash)
1869 ENTRY(hyper_ptc_ga)
1870 #ifndef FAST_PTC_GA
1871 br.spnt.few dispatch_break_fault ;;
1872 #endif
1873 // FIXME: validate not flushing Xen addresses
1874 #ifdef FAST_HYPERPRIVOP_CNT
1875 movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_PTC_GA);;
1876 ld8 r21=[r20];;
1877 adds r21=1,r21;;
1878 st8 [r20]=r21;;
1879 #endif
1880 mov r28=r8
1881 extr.u r19=r9,2,6 // addr_range=1<<((r9&0xfc)>>2)
1882 mov r20=1
1883 shr.u r24=r8,61
1884 addl r27=56,r0 // PAGE_SHIFT<<2 (for ptc.ga)
1885 movl r26=0x8000000000000000 // INVALID_TI_TAG
1886 mov r30=ar.lc
1887 ;;
1888 shl r19=r20,r19
1889 cmp.eq p7,p0=7,r24
1890 (p7) br.spnt.many dispatch_break_fault ;; // slow way for rr7
1891 ;;
1892 cmp.le p7,p0=r19,r0 // skip flush if size<=0
1893 (p7) br.cond.dpnt 2f ;;
1894 extr.u r24=r19,0,PAGE_SHIFT
1895 shr.u r23=r19,PAGE_SHIFT ;; // repeat loop for n pages
1896 cmp.ne p7,p0=r24,r0 ;;
1897 (p7) adds r23=1,r23 ;; // n_pages<size<n_pages+1? extra iter
1898 mov ar.lc=r23
1899 movl r29=PAGE_SIZE;;
1900 1:
1901 thash r25=r28 ;;
1902 adds r25=16,r25 ;;
1903 ld8 r24=[r25] ;;
1904 // FIXME: should check if tag matches, not just blow it away
1905 or r24=r26,r24 ;; // vhpt_entry->ti_tag = 1
1906 st8 [r25]=r24
1907 ptc.ga r28,r27 ;;
1908 srlz.i ;;
1909 add r28=r29,r28
1910 br.cloop.sptk.few 1b
1911 ;;
1912 2:
1913 mov ar.lc=r30 ;;
1914 mov r29=cr.ipsr
1915 mov r30=cr.iip;;
1916 movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
1917 ld8 r27=[r27];;
1918 adds r25=IA64_VCPU_DTLB_OFFSET,r27
1919 adds r26=IA64_VCPU_ITLB_OFFSET,r27;;
1920 ld8 r24=[r25]
1921 ld8 r27=[r26] ;;
1922 and r24=-2,r24
1923 and r27=-2,r27 ;;
1924 st8 [r25]=r24 // set 1-entry i/dtlb as not present
1925 st8 [r26]=r27 ;;
1926 // increment to point to next instruction
1927 extr.u r26=r29,41,2 ;;
1928 cmp.eq p6,p7=2,r26 ;;
1929 (p6) mov r26=0
1930 (p6) adds r30=16,r30
1931 (p7) adds r26=1,r26
1932 ;;
1933 dep r29=r26,r29,41,2
1934 ;;
1935 mov cr.ipsr=r29
1936 mov cr.iip=r30
1937 mov pr=r31,-1 ;;
1938 rfi
1939 ;;
1940 END(hyper_ptc_ga)
1942 // recovery block for hyper_itc metaphysical memory lookup
1943 ENTRY(recover_and_dispatch_break_fault)
1944 #ifdef FAST_REFLECT_CNT
1945 movl r21=recover_to_break_fault_count;;
1946 ld8 r22=[r21];;
1947 adds r22=1,r22;;
1948 st8 [r21]=r22;;
1949 #endif
1950 mov b0=r29 ;;
1951 br.sptk.many dispatch_break_fault;;
1953 // Registers at entry
1954 // r17 = break immediate (XEN_HYPER_ITC_D or I)
1955 // r18 == XSI_PSR_IC_OFS
1956 // r31 == pr
1957 GLOBAL_ENTRY(hyper_itc)
1958 ENTRY(hyper_itc_i)
1959 // fall through, hyper_itc_d handles both i and d
1960 ENTRY(hyper_itc_d)
1961 #ifndef FAST_ITC
1962 br.sptk.many dispatch_break_fault ;;
1963 #endif
1964 // ensure itir.ps >= xen's pagesize
1965 adds r23=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18 ;;
1966 ld8 r23=[r23];;
1967 extr.u r24=r23,2,6;; // r24==logps
1968 cmp.gt p7,p0=PAGE_SHIFT,r24
1969 (p7) br.spnt.many dispatch_break_fault ;;
1970 adds r21=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;;
1971 ld8 r21=[r21];;
1972 // for now, punt on region0 inserts
1973 extr.u r21=r21,61,3;;
1974 cmp.eq p7,p0=r21,r0
1975 (p7) br.spnt.many dispatch_break_fault ;;
1976 movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
1977 ld8 r27=[r27];;
1978 adds r27=IA64_VCPU_DOMAIN_OFFSET,r27;;
1979 ld8 r27=[r27];;
1980 // FIXME: is the global var dom0 always pinned? assume so for now
1981 movl r28=dom0;;
1982 ld8 r28=[r28];;
1983 // FIXME: for now, only handle dom0 (see lookup_domain_mpa below)
1984 cmp.ne p7,p0=r27,r28
1985 (p7) br.spnt.many dispatch_break_fault ;;
1986 #ifdef FAST_HYPERPRIVOP_CNT
1987 cmp.eq p6,p7=XEN_HYPER_ITC_D,r17;;
1988 (p6) movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_ITC_D);;
1989 (p7) movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_ITC_I);;
1990 ld8 r21=[r20];;
1991 adds r21=1,r21;;
1992 st8 [r20]=r21;;
1993 #endif
1994 (p6) mov r17=2;;
1995 (p7) mov r17=3;;
1996 mov r29=b0 ;;
1997 movl r30=recover_and_dispatch_break_fault ;;
1998 mov r16=r8;;
1999 // fall through
2002 // fast_insert(PSCB(ifa),r24=ps,r16=pte)
2003 // r16 == pte
2004 // r17 == bit0: 1=inst, 0=data; bit1: 1=itc, 0=vcpu_translate
2005 // r18 == XSI_PSR_IC_OFS
2006 // r24 == ps
2007 // r29 == saved value of b0 in case of recovery
2008 // r30 == recovery ip if failure occurs
2009 // r31 == pr
2010 GLOBAL_ENTRY(fast_insert)
2011 // translate_domain_pte(r16=pteval,PSCB(ifa)=address,r24=itir)
2012 mov r19=1;;
2013 shl r20=r19,r24;;
2014 adds r20=-1,r20;; // r20 == mask
2015 movl r19=_PAGE_PPN_MASK;;
2016 and r22=r16,r19;; // r22 == pteval & _PAGE_PPN_MASK
2017 andcm r19=r22,r20;;
2018 adds r21=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;;
2019 ld8 r21=[r21];;
2020 and r20=r21,r20;;
2021 or r19=r19,r20;; // r19 == mpaddr
2022 // FIXME: for now, just do domain0 and skip mpaddr range checks
2023 dep r20=r0,r19,0,PAGE_SHIFT
2024 movl r21=PAGE_PHYS ;;
2025 or r20=r20,r21 ;; // r20==return value from lookup_domain_mpa
2026 // r16=pteval,r20=pteval2
2027 movl r19=_PAGE_PPN_MASK
2028 movl r21=_PAGE_PL_2;;
2029 andcm r25=r16,r19;; // r25==pteval & ~_PAGE_PPN_MASK
2030 and r22=r20,r19;;
2031 or r22=r22,r21;;
2032 or r22=r22,r25;; // r22==return value from translate_domain_pte
2033 // done with translate_domain_pte
2034 // now do vcpu_itc_no_srlz(vcpu,IorD,ifa,r22=pte,r16=mppte,r24=logps)
2035 // FIXME: for now, just domain0 and skip range check
2036 // psr.ic already cleared
2037 // NOTE: r24 still contains ps (from above)
2038 shladd r24=r24,2,r0;;
2039 mov cr.itir=r24;;
2040 adds r23=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;;
2041 ld8 r23=[r23];;
2042 mov cr.ifa=r23;;
2043 tbit.z p6,p7=r17,0;;
2044 (p6) itc.d r22;;
2045 (p7) itc.i r22;;
2046 dv_serialize_data
2047 // FIXME: how do I make assembler warnings go away here?
2048 // vhpt_insert(r23=vaddr,r22=pte,r24=logps<<2)
2049 thash r28=r23
2050 or r26=1,r22;;
2051 ttag r21=r23
2052 adds r25=8,r28
2053 mov r19=r28;;
2054 st8 [r25]=r24
2055 adds r20=16,r28;;
2056 st8 [r19]=r26
2057 st8 [r20]=r21;;
2058 // vcpu_set_tr_entry(trp,r22=pte|1,r24=itir,r23=ifa)
2059 // TR_ENTRY = {page_flags,itir,addr,rid}
2060 tbit.z p6,p7=r17,0;;
2061 movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
2062 ld8 r27=[r27];;
2063 adds r28=IA64_VCPU_STARTING_RID_OFFSET,r27
2064 (p6) adds r27=IA64_VCPU_DTLB_OFFSET,r27
2065 (p7) adds r27=IA64_VCPU_ITLB_OFFSET,r27;;
2066 st8 [r27]=r22,8;; // page_flags: already has pl >= 2 and p==1
2067 st8 [r27]=r24,8;; // itir
2068 mov r19=-4096;;
2069 and r23=r23,r19;;
2070 st8 [r27]=r23,8;; // ifa & ~0xfff
2071 adds r29 = XSI_RR0_OFS-XSI_PSR_IC_OFS,r18
2072 extr.u r25=r23,61,3;;
2073 shladd r29=r25,3,r29;;
2074 ld8 r29=[r29];;
2075 movl r20=0xffffff00;;
2076 and r29=r29,r20;;
2077 st8 [r27]=r29,-8;; // rid
2078 //if ps > 12
2079 cmp.eq p7,p0=12<<2,r24
2080 (p7) br.cond.sptk.many 1f;;
2081 // if (ps > 12) {
2082 // trp->ppn &= ~((1UL<<(ps-12))-1); trp->vadr &= ~((1UL<<ps)-1); }
2083 extr.u r29=r24,2,6
2084 mov r28=1;;
2085 shl r26=r28,r29;;
2086 adds r29=-12,r29;;
2087 shl r25=r28,r29;;
2088 mov r29=-1
2089 adds r26=-1,r26
2090 adds r25=-1,r25;;
2091 andcm r26=r29,r26 // ~((1UL<<ps)-1)
2092 andcm r25=r29,r25;; // ~((1UL<<(ps-12))-1)
2093 ld8 r29=[r27];;
2094 and r29=r29,r26;;
2095 st8 [r27]=r29,-16;;
2096 ld8 r29=[r27];;
2097 extr.u r28=r29,12,38;;
2098 movl r26=0xfffc000000000fff;;
2099 and r29=r29,r26
2100 and r28=r28,r25;;
2101 shl r28=r28,12;;
2102 or r29=r29,r28;;
2103 st8 [r27]=r29;;
2104 1: // done with vcpu_set_tr_entry
2105 //PSCBX(vcpu,i/dtlb_pte) = mp_pte
2106 movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
2107 ld8 r27=[r27];;
2108 tbit.z p6,p7=r17,0;;
2109 (p6) adds r27=IA64_VCPU_DTLB_PTE_OFFSET,r27
2110 (p7) adds r27=IA64_VCPU_ITLB_PTE_OFFSET,r27;;
2111 st8 [r27]=r16;;
2112 // done with vcpu_itc_no_srlz
2114 // if hyper_itc, increment to point to next instruction
2115 tbit.z p7,p0=r17,1
2116 (p7) br.cond.sptk.few no_inc_iip;;
2118 mov r29=cr.ipsr
2119 mov r30=cr.iip;;
2120 extr.u r26=r29,41,2 ;;
2121 cmp.eq p6,p7=2,r26 ;;
2122 (p6) mov r26=0
2123 (p6) adds r30=16,r30
2124 (p7) adds r26=1,r26
2125 ;;
2126 dep r29=r26,r29,41,2
2127 ;;
2128 mov cr.ipsr=r29
2129 mov cr.iip=r30;;
2131 no_inc_iip:
2132 mov pr=r31,-1 ;;
2133 rfi
2134 ;;
2135 END(fast_insert)