ia64/xen-unstable

view xen/arch/ia64/xen/hyperprivop.S @ 10929:7cde0d938ef4

[IA64] convert more privop_stat to perfc

Convert most privop stats to perfc.

Signed-off-by: Tristan Gingold <tristan.gingold@bull.net>
author awilliam@xenbuild.aw
date Fri Aug 04 09:02:43 2006 -0600 (2006-08-04)
parents 3d6c1af609bf
children c3e20511c745
line source
1 /*
2 * arch/ia64/kernel/hyperprivop.S
3 *
4 * Copyright (C) 2005 Hewlett-Packard Co
5 * Dan Magenheimer <dan.magenheimer@hp.com>
6 */
8 #include <linux/config.h>
10 #include <asm/asmmacro.h>
11 #include <asm/kregs.h>
12 #include <asm/offsets.h>
13 #include <asm/processor.h>
14 #include <asm/system.h>
15 #include <asm/debugger.h>
16 #include <asm/asm-xsi-offsets.h>
17 #include <public/arch-ia64.h>
20 #define _PAGE_PPN_MASK 0x0003fffffffff000 //asm/pgtable.h doesn't do assembly
21 #define PAGE_PHYS 0x0010000000000761 //__pgprot(__DIRTY_BITS|_PAGE_PL_2|_PAGE_AR_RWX)
22 #define _PAGE_PL_2 (2<<7)
24 #if 1 // change to 0 to turn off all fast paths
25 # define FAST_HYPERPRIVOPS
26 # ifdef PERF_COUNTERS
27 # define FAST_HYPERPRIVOP_CNT
28 # define FAST_HYPERPRIVOP_PERFC(N) \
29 (perfcounters + FAST_HYPERPRIVOP_PERFC_OFS + (4 * N))
30 # define FAST_REFLECT_CNT
31 # endif
33 //#define FAST_TICK // mostly working (unat problems) but default off for now
34 //#define FAST_TLB_MISS_REFLECT // mostly working but default off for now
35 # ifdef CONFIG_XEN_IA64_DOM0_VP
36 # undef FAST_ITC //XXX CONFIG_XEN_IA64_DOM0_VP
37 // TODO fast_itc doesn't suport dom0 vp yet.
38 # else
39 //# define FAST_ITC // to be reviewed
40 # endif
41 # define FAST_BREAK
42 # ifndef CONFIG_XEN_IA64_DOM0_VP
43 # define FAST_ACCESS_REFLECT
44 # else
45 # undef FAST_ACCESS_REFLECT //XXX CONFIG_XEN_IA64_DOM0_VP
46 // TODO fast_access_reflect
47 // doesn't support dom0 vp yet.
48 # endif
49 # define FAST_RFI
50 # define FAST_SSM_I
51 # define FAST_PTC_GA
52 # undef RFI_TO_INTERRUPT // not working yet
53 #endif
55 #ifdef CONFIG_SMP
56 //#warning "FIXME: ptc.ga instruction requires spinlock for SMP"
57 #undef FAST_PTC_GA
58 #endif
60 // FIXME: turn off for now... but NaTs may crash Xen so re-enable soon!
61 #define HANDLE_AR_UNAT
63 // FIXME: This is defined in include/asm-ia64/hw_irq.h but this
64 // doesn't appear to be include'able from assembly?
65 #define IA64_TIMER_VECTOR 0xef
67 // Should be included from common header file (also in process.c)
68 // NO PSR_CLR IS DIFFERENT! (CPL)
69 #define IA64_PSR_CPL1 (__IA64_UL(1) << IA64_PSR_CPL1_BIT)
70 #define IA64_PSR_CPL0 (__IA64_UL(1) << IA64_PSR_CPL0_BIT)
71 // note IA64_PSR_PK removed from following, why is this necessary?
72 #define DELIVER_PSR_SET (IA64_PSR_IC | IA64_PSR_I | \
73 IA64_PSR_DT | IA64_PSR_RT | IA64_PSR_CPL1 | \
74 IA64_PSR_IT | IA64_PSR_BN)
76 #define DELIVER_PSR_CLR (IA64_PSR_AC | IA64_PSR_DFL | IA64_PSR_DFH | \
77 IA64_PSR_SP | IA64_PSR_DI | IA64_PSR_SI | \
78 IA64_PSR_DB | IA64_PSR_LP | IA64_PSR_TB | \
79 IA64_PSR_MC | IA64_PSR_IS | \
80 IA64_PSR_ID | IA64_PSR_DA | IA64_PSR_DD | \
81 IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | IA64_PSR_IA)
83 // Note: not hand-scheduled for now
84 // Registers at entry
85 // r16 == cr.isr
86 // r17 == cr.iim
87 // r18 == XSI_PSR_IC_OFS
88 // r19 == vpsr.ic
89 // r31 == pr
90 GLOBAL_ENTRY(fast_hyperprivop)
91 #ifndef FAST_HYPERPRIVOPS // see beginning of file
92 br.sptk.many dispatch_break_fault ;;
93 #endif
94 // HYPERPRIVOP_SSM_I?
95 // assumes domain interrupts pending, so just do it
96 cmp.eq p7,p6=HYPERPRIVOP_SSM_I,r17
97 (p7) br.sptk.many hyper_ssm_i;;
99 // FIXME. This algorithm gives up (goes to the slow path) if there
100 // are ANY interrupts pending, even if they are currently
101 // undeliverable. This should be improved later...
102 adds r20=XSI_PEND_OFS-XSI_PSR_IC_OFS,r18 ;;
103 ld4 r20=[r20] ;;
104 cmp.eq p7,p0=r0,r20
105 (p7) br.cond.sptk.many 1f
106 movl r20=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
107 ld8 r20=[r20];;
108 adds r21=IA64_VCPU_IRR0_OFFSET,r20;
109 adds r22=IA64_VCPU_IRR0_OFFSET+8,r20;;
110 ld8 r23=[r21],16; ld8 r24=[r22],16;;
111 ld8 r21=[r21]; ld8 r22=[r22];;
112 or r23=r23,r24; or r21=r21,r22;;
113 or r20=r23,r21;;
114 1: // when we get to here r20=~=interrupts pending
115 // Check pending event indication
116 (p7) movl r20=THIS_CPU(current_psr_i_addr);;
117 (p7) ld8 r20=[r20];;
118 (p7) adds r20=-1,r20;; /* evtchn_upcall_pending */
119 (p7) ld1 r20=[r20];;
121 // HYPERPRIVOP_RFI?
122 cmp.eq p7,p6=HYPERPRIVOP_RFI,r17
123 (p7) br.sptk.many hyper_rfi;;
125 // HYPERPRIVOP_GET_IVR?
126 cmp.eq p7,p6=HYPERPRIVOP_GET_IVR,r17
127 (p7) br.sptk.many hyper_get_ivr;;
129 cmp.ne p7,p0=r20,r0
130 (p7) br.spnt.many dispatch_break_fault ;;
132 // HYPERPRIVOP_COVER?
133 cmp.eq p7,p6=HYPERPRIVOP_COVER,r17
134 (p7) br.sptk.many hyper_cover;;
136 // HYPERPRIVOP_SSM_DT?
137 cmp.eq p7,p6=HYPERPRIVOP_SSM_DT,r17
138 (p7) br.sptk.many hyper_ssm_dt;;
140 // HYPERPRIVOP_RSM_DT?
141 cmp.eq p7,p6=HYPERPRIVOP_RSM_DT,r17
142 (p7) br.sptk.many hyper_rsm_dt;;
144 // HYPERPRIVOP_GET_TPR?
145 cmp.eq p7,p6=HYPERPRIVOP_GET_TPR,r17
146 (p7) br.sptk.many hyper_get_tpr;;
148 // HYPERPRIVOP_SET_TPR?
149 cmp.eq p7,p6=HYPERPRIVOP_SET_TPR,r17
150 (p7) br.sptk.many hyper_set_tpr;;
152 // HYPERPRIVOP_EOI?
153 cmp.eq p7,p6=HYPERPRIVOP_EOI,r17
154 (p7) br.sptk.many hyper_eoi;;
156 // HYPERPRIVOP_SET_ITM?
157 cmp.eq p7,p6=HYPERPRIVOP_SET_ITM,r17
158 (p7) br.sptk.many hyper_set_itm;;
160 // HYPERPRIVOP_SET_RR?
161 cmp.eq p7,p6=HYPERPRIVOP_SET_RR,r17
162 (p7) br.sptk.many hyper_set_rr;;
164 // HYPERPRIVOP_GET_RR?
165 cmp.eq p7,p6=HYPERPRIVOP_GET_RR,r17
166 (p7) br.sptk.many hyper_get_rr;;
168 // HYPERPRIVOP_PTC_GA?
169 cmp.eq p7,p6=HYPERPRIVOP_PTC_GA,r17
170 (p7) br.sptk.many hyper_ptc_ga;;
172 // HYPERPRIVOP_ITC_D?
173 cmp.eq p7,p6=HYPERPRIVOP_ITC_D,r17
174 (p7) br.sptk.many hyper_itc_d;;
176 // HYPERPRIVOP_ITC_I?
177 cmp.eq p7,p6=HYPERPRIVOP_ITC_I,r17
178 (p7) br.sptk.many hyper_itc_i;;
180 // HYPERPRIVOP_THASH?
181 cmp.eq p7,p6=HYPERPRIVOP_THASH,r17
182 (p7) br.sptk.many hyper_thash;;
184 // HYPERPRIVOP_SET_KR?
185 cmp.eq p7,p6=HYPERPRIVOP_SET_KR,r17
186 (p7) br.sptk.many hyper_set_kr;;
188 // if not one of the above, give up for now and do it the slow way
189 br.sptk.many dispatch_break_fault ;;
190 END(fast_hyperprivop)
192 // give up for now if: ipsr.be==1, ipsr.pp==1
193 // from reflect_interruption, don't need to:
194 // - printf first extint (debug only)
195 // - check for interrupt collection enabled (routine will force on)
196 // - set ifa (not valid for extint)
197 // - set iha (not valid for extint)
198 // - set itir (not valid for extint)
199 // DO need to
200 // - increment the HYPER_SSM_I fast_hyperprivop counter
201 // - set shared_mem iip to instruction after HYPER_SSM_I
202 // - set cr.iip to guest iva+0x3000
203 // - set shared_mem ipsr to [vcpu_get_ipsr_int_state]
204 // be = pp = bn = 0; dt = it = rt = 1; cpl = 3 or 0;
205 // i = shared_mem interrupt_delivery_enabled
206 // ic = shared_mem interrupt_collection_enabled
207 // ri = instruction after HYPER_SSM_I
208 // all other bits unchanged from real cr.ipsr
209 // - set cr.ipsr (DELIVER_PSR_SET/CLEAR, don't forget cpl!)
210 // - set shared_mem isr: isr.ei to instr following HYPER_SSM_I
211 // and isr.ri to cr.isr.ri (all other bits zero)
212 // - cover and set shared_mem precover_ifs to cr.ifs
213 // ^^^ MISSED THIS FOR fast_break??
214 // - set shared_mem ifs and incomplete_regframe to 0
215 // - set shared_mem interrupt_delivery_enabled to 0
216 // - set shared_mem interrupt_collection_enabled to 0
217 // - set r31 to SHAREDINFO_ADDR
218 // - virtual bank switch 0
219 // maybe implement later
220 // - verify that there really IS a deliverable interrupt pending
221 // - set shared_mem iva
222 // needs to be done but not implemented (in reflect_interruption)
223 // - set shared_mem iipa
224 // don't know for sure
225 // - set shared_mem unat
226 // r16 == cr.isr
227 // r17 == cr.iim
228 // r18 == XSI_PSR_IC
229 // r19 == vpsr.ic
230 // r31 == pr
231 ENTRY(hyper_ssm_i)
232 #ifndef FAST_SSM_I
233 br.spnt.few dispatch_break_fault ;;
234 #endif
235 // give up for now if: ipsr.be==1, ipsr.pp==1
236 mov r30=cr.ipsr;;
237 mov r29=cr.iip;;
238 extr.u r21=r30,IA64_PSR_BE_BIT,1 ;;
239 cmp.ne p7,p0=r21,r0
240 (p7) br.sptk.many dispatch_break_fault ;;
241 extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
242 cmp.ne p7,p0=r21,r0
243 (p7) br.sptk.many dispatch_break_fault ;;
244 #ifdef FAST_HYPERPRIVOP_CNT
245 movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_SSM_I);;
246 ld4 r21=[r20];;
247 adds r21=1,r21;;
248 st4 [r20]=r21;;
249 #endif
250 // set shared_mem iip to instruction after HYPER_SSM_I
251 extr.u r20=r30,41,2 ;;
252 cmp.eq p6,p7=2,r20 ;;
253 (p6) mov r20=0
254 (p6) adds r29=16,r29
255 (p7) adds r20=1,r20 ;;
256 dep r30=r20,r30,41,2;; // adjust cr.ipsr.ri but don't save yet
257 adds r21=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;;
258 st8 [r21]=r29 ;;
259 // set shared_mem isr
260 extr.u r16=r16,38,1;; // grab cr.isr.ir bit
261 dep r16=r16,r0,38,1 ;; // insert into cr.isr (rest of bits zero)
262 dep r16=r20,r16,41,2 ;; // deposit cr.isr.ri
263 adds r21=XSI_ISR_OFS-XSI_PSR_IC_OFS,r18 ;;
264 st8 [r21]=r16 ;;
265 // set cr.ipsr
266 mov r29=r30 ;;
267 movl r28=DELIVER_PSR_SET;;
268 movl r27=~DELIVER_PSR_CLR;;
269 or r29=r29,r28;;
270 and r29=r29,r27;;
271 mov cr.ipsr=r29;;
272 // set shared_mem ipsr (from ipsr in r30 with ipsr.ri already set)
273 extr.u r29=r30,IA64_PSR_CPL0_BIT,2;;
274 cmp.eq p6,p7=3,r29;;
275 (p6) dep r30=-1,r30,IA64_PSR_CPL0_BIT,2
276 (p7) dep r30=0,r30,IA64_PSR_CPL0_BIT,2
277 ;;
278 // FOR SSM_I ONLY, also turn on psr.i and psr.ic
279 movl r28=(IA64_PSR_DT|IA64_PSR_IT|IA64_PSR_RT|IA64_PSR_I|IA64_PSR_IC);;
280 // movl r27=~(IA64_PSR_BE|IA64_PSR_PP|IA64_PSR_BN);;
281 movl r27=~(IA64_PSR_BE|IA64_PSR_BN);;
282 or r30=r30,r28;;
283 and r30=r30,r27;;
284 mov r20=1
285 movl r22=THIS_CPU(current_psr_i_addr)
286 adds r21=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;;
287 ld8 r22=[r22]
288 st8 [r21]=r30;;
289 // set shared_mem interrupt_delivery_enabled to 0
290 // set shared_mem interrupt_collection_enabled to 0
291 st1 [r22]=r20
292 st4 [r18]=r0
293 // cover and set shared_mem precover_ifs to cr.ifs
294 // set shared_mem ifs and incomplete_regframe to 0
295 cover ;;
296 mov r20=cr.ifs;;
297 adds r21=XSI_INCOMPL_REGFR_OFS-XSI_PSR_IC_OFS,r18 ;;
298 st4 [r21]=r0 ;;
299 adds r21=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
300 st8 [r21]=r0 ;;
301 adds r21=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
302 st8 [r21]=r20 ;;
303 // leave cr.ifs alone for later rfi
304 // set iip to go to domain IVA break instruction vector
305 movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
306 ld8 r22=[r22];;
307 adds r22=IA64_VCPU_IVA_OFFSET,r22;;
308 ld8 r23=[r22];;
309 movl r24=0x3000;;
310 add r24=r24,r23;;
311 mov cr.iip=r24;;
312 // OK, now all set to go except for switch to virtual bank0
313 mov r30=r2
314 mov r29=r3
315 mov r28=r4
316 ;;
317 adds r2=XSI_BANK1_R16_OFS-XSI_PSR_IC_OFS,r18;
318 adds r3=(XSI_BANK1_R16_OFS+8)-XSI_PSR_IC_OFS,r18;;
319 adds r4=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18
320 bsw.1;;
321 // FIXME?: ar.unat is not really handled correctly,
322 // but may not matter if the OS is NaT-clean
323 .mem.offset 0,0; st8.spill [r2]=r16,16;
324 .mem.offset 8,0; st8.spill [r3]=r17,16 ;;
325 .mem.offset 0,0; st8.spill [r2]=r18,16;
326 .mem.offset 8,0; st8.spill [r3]=r19,16 ;;
327 .mem.offset 0,0; st8.spill [r2]=r20,16;
328 .mem.offset 8,0; st8.spill [r3]=r21,16 ;;
329 .mem.offset 0,0; st8.spill [r2]=r22,16;
330 .mem.offset 8,0; st8.spill [r3]=r23,16 ;;
331 .mem.offset 0,0; st8.spill [r2]=r24,16;
332 .mem.offset 8,0; st8.spill [r3]=r25,16 ;;
333 .mem.offset 0,0; st8.spill [r2]=r26,16;
334 .mem.offset 8,0; st8.spill [r3]=r27,16 ;;
335 .mem.offset 0,0; st8.spill [r2]=r28,16;
336 .mem.offset 8,0; st8.spill [r3]=r29,16 ;;
337 .mem.offset 0,0; st8.spill [r2]=r30,16;
338 .mem.offset 8,0; st8.spill [r3]=r31,16 ;;
339 mov r31=r4
340 bsw.0 ;;
341 mov r2=r30
342 mov r3=r29
343 mov r4=r28
344 adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
345 st4 [r20]=r0 ;;
346 mov pr=r31,-1 ;;
347 rfi
348 ;;
349 END(hyper_ssm_i)
351 // reflect domain clock interrupt
352 // r31 == pr
353 // r30 == cr.ivr
354 // r29 == rp
355 GLOBAL_ENTRY(fast_tick_reflect)
356 #ifndef FAST_TICK // see beginning of file
357 br.cond.sptk.many rp;;
358 #endif
359 mov r28=IA64_TIMER_VECTOR;;
360 cmp.ne p6,p0=r28,r30
361 (p6) br.cond.spnt.few rp;;
362 movl r20=THIS_CPU(cpu_info)+IA64_CPUINFO_ITM_NEXT_OFFSET;;
363 ld8 r26=[r20];;
364 mov r27=ar.itc;;
365 adds r27=200,r27;; // safety margin
366 cmp.ltu p6,p0=r26,r27
367 (p6) br.cond.spnt.few rp;;
368 mov r17=cr.ipsr;;
369 // slow path if: ipsr.be==1, ipsr.pp==1
370 extr.u r21=r17,IA64_PSR_BE_BIT,1 ;;
371 cmp.ne p6,p0=r21,r0
372 (p6) br.cond.spnt.few rp;;
373 extr.u r21=r17,IA64_PSR_PP_BIT,1 ;;
374 cmp.ne p6,p0=r21,r0
375 (p6) br.cond.spnt.few rp;;
376 // definitely have a domain tick
377 mov cr.eoi=r0;;
378 mov rp=r29;;
379 mov cr.itm=r26;; // ensure next tick
380 #ifdef FAST_REFLECT_CNT
381 movl r20=perfcounters+FAST_REFLECT_PERFC_OFS+((0x3000>>8)*4);;
382 ld4 r21=[r20];;
383 adds r21=1,r21;;
384 st4 [r20]=r21;;
385 #endif
386 // vcpu_pend_timer(current)
387 movl r18=THIS_CPU(current_psr_ic_addr)
388 ;;
389 ld8 r18=[r18]
390 ;;
391 adds r20=XSI_ITV_OFS-XSI_PSR_IC_OFS,r18 ;;
392 ld8 r20=[r20];;
393 cmp.eq p6,p0=r20,r0 // if cr.itv==0 done
394 (p6) br.cond.spnt.few fast_tick_reflect_done;;
395 tbit.nz p6,p0=r20,16;; // check itv.m (discard) bit
396 (p6) br.cond.spnt.few fast_tick_reflect_done;;
397 extr.u r27=r20,0,6 // r27 has low 6 bits of itv.vector
398 extr.u r26=r20,6,2;; // r26 has irr index of itv.vector
399 movl r19=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
400 ld8 r19=[r19];;
401 adds r22=IA64_VCPU_DOMAIN_ITM_LAST_OFFSET,r19
402 adds r23=IA64_VCPU_DOMAIN_ITM_OFFSET,r19;;
403 ld8 r24=[r22];;
404 ld8 r23=[r23];;
405 cmp.eq p6,p0=r23,r24 // skip if this tick already delivered
406 (p6) br.cond.spnt.few fast_tick_reflect_done;;
407 // set irr bit
408 adds r21=IA64_VCPU_IRR0_OFFSET,r19;
409 shl r26=r26,3;;
410 add r21=r21,r26;;
411 mov r25=1;;
412 shl r22=r25,r27;;
413 ld8 r23=[r21];;
414 or r22=r22,r23;;
415 st8 [r21]=r22;;
416 // set PSCB(pending_interruption)!
417 adds r20=XSI_PEND_OFS-XSI_PSR_IC_OFS,r18 ;;
418 st4 [r20]=r25;;
420 // if interrupted at pl0, we're done
421 extr.u r16=r17,IA64_PSR_CPL0_BIT,2;;
422 cmp.eq p6,p0=r16,r0;;
423 (p6) br.cond.spnt.few fast_tick_reflect_done;;
424 // if guest vpsr.i is off, we're done
425 movl r21=THIS_CPU(current_psr_i_addr);;
426 ld8 r21=[r21];;
427 ld1 r21=[r21];;
428 cmp.eq p0,p6=r21,r0
429 (p6) br.cond.spnt.few fast_tick_reflect_done;;
431 // OK, we have a clock tick to deliver to the active domain!
432 // so deliver to iva+0x3000
433 // r17 == cr.ipsr
434 // r18 == XSI_PSR_IC
435 // r19 == IA64_KR(CURRENT)
436 // r31 == pr
437 mov r16=cr.isr;;
438 mov r29=cr.iip;;
439 adds r21=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;;
440 st8 [r21]=r29 ;;
441 // set shared_mem isr
442 extr.u r16=r16,38,1;; // grab cr.isr.ir bit
443 dep r16=r16,r0,38,1 ;; // insert into cr.isr (rest of bits zero)
444 extr.u r20=r17,41,2 ;; // get ipsr.ri
445 dep r16=r20,r16,41,2 ;; // deposit cr.isr.ei
446 adds r21=XSI_ISR_OFS-XSI_PSR_IC_OFS,r18 ;;
447 st8 [r21]=r16 ;;
448 // set cr.ipsr (make sure cpl==2!)
449 mov r29=r17 ;;
450 movl r28=DELIVER_PSR_SET;;
451 movl r27=~(DELIVER_PSR_CLR|IA64_PSR_CPL0);;
452 or r29=r29,r28;;
453 and r29=r29,r27;;
454 mov cr.ipsr=r29;;
455 // set shared_mem ipsr (from ipsr in r17 with ipsr.ri already set)
456 extr.u r29=r17,IA64_PSR_CPL0_BIT,2;;
457 cmp.eq p6,p7=3,r29;;
458 (p6) dep r17=-1,r17,IA64_PSR_CPL0_BIT,2
459 (p7) dep r17=0,r17,IA64_PSR_CPL0_BIT,2
460 ;;
461 movl r28=(IA64_PSR_DT|IA64_PSR_IT|IA64_PSR_RT);;
462 movl r27=~(IA64_PSR_BE|IA64_PSR_PP|IA64_PSR_BN|IA64_PSR_I|IA64_PSR_IC);;
463 dep r21=-1,r21,IA64_PSR_CPL1_BIT,1 ;;
464 or r17=r17,r28;;
465 and r17=r17,r27;;
466 ld4 r16=[r18];;
467 cmp.ne p6,p0=r16,r0;;
468 movl r22=THIS_CPU(current_psr_i_addr);;
469 ld8 r22=[r22]
470 (p6) dep r17=-1,r17,IA64_PSR_IC_BIT,1 ;;
471 ld1 r16=[r22];;
472 cmp.eq p6,p0=r16,r0;;
473 (p6) dep r17=-1,r17,IA64_PSR_I_BIT,1 ;;
474 mov r20=1
475 adds r21=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;;
476 st8 [r21]=r17 ;;
477 // set shared_mem interrupt_delivery_enabled to 0
478 // set shared_mem interrupt_collection_enabled to 0
479 st1 [r22]=r20;;
480 st4 [r18]=r0;;
481 // cover and set shared_mem precover_ifs to cr.ifs
482 // set shared_mem ifs and incomplete_regframe to 0
483 cover ;;
484 mov r20=cr.ifs;;
485 adds r21=XSI_INCOMPL_REGFR_OFS-XSI_PSR_IC_OFS,r18 ;;
486 st4 [r21]=r0 ;;
487 adds r21=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
488 st8 [r21]=r0 ;;
489 adds r21=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
490 st8 [r21]=r20 ;;
491 // leave cr.ifs alone for later rfi
492 // set iip to go to domain IVA break instruction vector
493 adds r22=IA64_VCPU_IVA_OFFSET,r19;;
494 ld8 r23=[r22];;
495 movl r24=0x3000;;
496 add r24=r24,r23;;
497 mov cr.iip=r24;;
498 // OK, now all set to go except for switch to virtual bank0
499 mov r30=r2
500 mov r29=r3
501 mov r27=r4
502 #ifdef HANDLE_AR_UNAT
503 mov r28=ar.unat;
504 #endif
505 ;;
506 adds r2=XSI_BANK1_R16_OFS-XSI_PSR_IC_OFS,r18
507 adds r3=(XSI_BANK1_R16_OFS+8)-XSI_PSR_IC_OFS,r18
508 adds r4=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18
509 ;;
510 bsw.1;;
511 .mem.offset 0,0; st8.spill [r2]=r16,16;
512 .mem.offset 8,0; st8.spill [r3]=r17,16 ;;
513 .mem.offset 0,0; st8.spill [r2]=r18,16;
514 .mem.offset 8,0; st8.spill [r3]=r19,16 ;;
515 .mem.offset 0,0; st8.spill [r2]=r20,16;
516 .mem.offset 8,0; st8.spill [r3]=r21,16 ;;
517 .mem.offset 0,0; st8.spill [r2]=r22,16;
518 .mem.offset 8,0; st8.spill [r3]=r23,16 ;;
519 .mem.offset 0,0; st8.spill [r2]=r24,16;
520 .mem.offset 8,0; st8.spill [r3]=r25,16 ;;
521 .mem.offset 0,0; st8.spill [r2]=r26,16;
522 .mem.offset 8,0; st8.spill [r3]=r27,16 ;;
523 .mem.offset 0,0; st8.spill [r2]=r28,16;
524 .mem.offset 8,0; st8.spill [r3]=r29,16 ;;
525 .mem.offset 0,0; st8.spill [r2]=r30,16;
526 .mem.offset 8,0; st8.spill [r3]=r31,16 ;;
527 #ifdef HANDLE_AR_UNAT
528 // r16~r23 are preserved regsin bank0 regs, we need to restore them,
529 // r24~r31 are scratch regs, we don't need to handle NaT bit,
530 // because OS handler must assign it before access it
531 ld8 r16=[r2],16;
532 ld8 r17=[r3],16;;
533 ld8 r18=[r2],16;
534 ld8 r19=[r3],16;;
535 ld8 r20=[r2],16;
536 ld8 r21=[r3],16;;
537 ld8 r22=[r2],16;
538 ld8 r23=[r3],16;;
539 #endif
540 mov r31=r4
541 ;;
542 bsw.0 ;;
543 mov r24=ar.unat;
544 mov r2=r30
545 mov r3=r29
546 mov r4=r27
547 #ifdef HANDLE_AR_UNAT
548 mov ar.unat=r28;
549 #endif
550 ;;
551 adds r25=XSI_B1NATS_OFS-XSI_PSR_IC_OFS,r18 ;
552 adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
553 st8 [r25]=r24;
554 st4 [r20]=r0 ;;
555 fast_tick_reflect_done:
556 mov pr=r31,-1 ;;
557 rfi
558 END(fast_tick_reflect)
560 // reflect domain breaks directly to domain
561 // r16 == cr.isr
562 // r17 == cr.iim
563 // r18 == XSI_PSR_IC
564 // r19 == vpsr.ic
565 // r31 == pr
566 GLOBAL_ENTRY(fast_break_reflect)
567 #ifndef FAST_BREAK // see beginning of file
568 br.sptk.many dispatch_break_fault ;;
569 #endif
570 mov r30=cr.ipsr;;
571 mov r29=cr.iip;;
572 extr.u r21=r30,IA64_PSR_BE_BIT,1 ;;
573 cmp.ne p7,p0=r21,r0 ;;
574 (p7) br.spnt.few dispatch_break_fault ;;
575 extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
576 cmp.ne p7,p0=r21,r0 ;;
577 (p7) br.spnt.few dispatch_break_fault ;;
578 movl r20=IA64_PSR_CPL ;;
579 and r22=r20,r30 ;;
580 cmp.ne p7,p0=r22,r0
581 (p7) br.spnt.many 1f ;;
582 cmp.eq p7,p0=r17,r0
583 (p7) br.spnt.few dispatch_break_fault ;;
584 #ifdef CRASH_DEBUG
585 movl r21=CDB_BREAK_NUM ;;
586 cmp.eq p7,p0=r17,r21
587 (p7) br.spnt.few dispatch_break_fault ;;
588 #endif
589 1:
590 #if 1 /* special handling in case running on simulator */
591 movl r20=first_break;;
592 ld4 r23=[r20];;
593 movl r21=0x80001;
594 movl r22=0x80002;;
595 cmp.ne p7,p0=r23,r0;;
596 (p7) br.spnt.few dispatch_break_fault ;;
597 cmp.eq p7,p0=r21,r17;
598 (p7) br.spnt.few dispatch_break_fault ;;
599 cmp.eq p7,p0=r22,r17;
600 (p7) br.spnt.few dispatch_break_fault ;;
601 #endif
602 movl r20=0x2c00;
603 // save iim in shared_info
604 adds r21=XSI_IIM_OFS-XSI_PSR_IC_OFS,r18 ;;
605 st8 [r21]=r17;;
606 // fall through
607 END(fast_break_reflect)
609 // reflect to domain ivt+r20
610 // sets up isr,iip,ipsr,ifs (FIXME: do iipa too)
611 // r16 == cr.isr
612 // r18 == XSI_PSR_IC
613 // r20 == offset into ivt
614 // r29 == iip
615 // r30 == ipsr
616 // r31 == pr
617 ENTRY(fast_reflect)
618 #ifdef FAST_REFLECT_CNT
619 movl r22=perfcounters+FAST_REFLECT_PERFC_OFS;
620 shr r23=r20,8-2;;
621 add r22=r22,r23;;
622 ld4 r21=[r22];;
623 adds r21=1,r21;;
624 st4 [r22]=r21;;
625 #endif
626 // save iip in shared_info (DON'T POINT TO NEXT INSTRUCTION!)
627 adds r21=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;;
628 st8 [r21]=r29,XSI_ISR_OFS-XSI_IIP_OFS;;
629 // set shared_mem isr
630 st8 [r21]=r16 ;;
631 // set cr.ipsr
632 movl r21=THIS_CPU(current_psr_i_addr)
633 mov r29=r30 ;;
634 ld8 r21=[r21]
635 movl r28=DELIVER_PSR_SET;;
636 movl r27=~(DELIVER_PSR_CLR|IA64_PSR_CPL0);;
637 or r29=r29,r28;;
638 and r29=r29,r27;;
639 mov cr.ipsr=r29;;
640 // set shared_mem ipsr (from ipsr in r30 with ipsr.ri already set)
641 extr.u r29=r30,IA64_PSR_CPL0_BIT,2;;
642 cmp.eq p6,p7=3,r29;;
643 (p6) dep r30=-1,r30,IA64_PSR_CPL0_BIT,2
644 (p7) dep r30=0,r30,IA64_PSR_CPL0_BIT,2
645 ;;
646 movl r28=(IA64_PSR_DT|IA64_PSR_IT|IA64_PSR_RT);;
647 movl r27=~(IA64_PSR_BE|IA64_PSR_PP|IA64_PSR_BN);;
648 or r30=r30,r28;;
649 and r30=r30,r27;;
650 // also set shared_mem ipsr.i and ipsr.ic appropriately
651 ld1 r22=[r21]
652 ld4 r24=[r18];;
653 cmp4.eq p6,p7=r24,r0;;
654 (p6) dep r30=0,r30,IA64_PSR_IC_BIT,1
655 (p7) dep r30=-1,r30,IA64_PSR_IC_BIT,1 ;;
656 mov r24=r21
657 cmp.ne p6,p7=r22,r0;;
658 (p6) dep r30=0,r30,IA64_PSR_I_BIT,1
659 (p7) dep r30=-1,r30,IA64_PSR_I_BIT,1 ;;
660 mov r22=1
661 adds r21=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;;
662 st8 [r21]=r30 ;;
663 // set shared_mem interrupt_delivery_enabled to 0
664 // set shared_mem interrupt_collection_enabled to 0
665 st1 [r24]=r22
666 st4 [r18]=r0;;
667 // cover and set shared_mem precover_ifs to cr.ifs
668 // set shared_mem ifs and incomplete_regframe to 0
669 cover ;;
670 mov r24=cr.ifs;;
671 adds r21=XSI_INCOMPL_REGFR_OFS-XSI_PSR_IC_OFS,r18 ;;
672 st4 [r21]=r0 ;;
673 adds r21=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
674 st8 [r21]=r0 ;;
675 adds r21=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
676 st8 [r21]=r24 ;;
677 // FIXME: need to save iipa and isr to be arch-compliant
678 // set iip to go to domain IVA break instruction vector
679 movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
680 ld8 r22=[r22];;
681 adds r22=IA64_VCPU_IVA_OFFSET,r22;;
682 ld8 r23=[r22];;
683 add r20=r20,r23;;
684 mov cr.iip=r20;;
685 // OK, now all set to go except for switch to virtual bank0
686 mov r30=r2
687 mov r29=r3
688 #ifdef HANDLE_AR_UNAT
689 mov r28=ar.unat;
690 #endif
691 mov r27=r4
692 adds r2=XSI_BANK1_R16_OFS-XSI_PSR_IC_OFS,r18;
693 adds r3=(XSI_BANK1_R16_OFS+8)-XSI_PSR_IC_OFS,r18
694 adds r4=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18
695 ;;
696 bsw.1;;
697 .mem.offset 0,0; st8.spill [r2]=r16,16;
698 .mem.offset 8,0; st8.spill [r3]=r17,16 ;;
699 .mem.offset 0,0; st8.spill [r2]=r18,16;
700 .mem.offset 8,0; st8.spill [r3]=r19,16 ;;
701 .mem.offset 0,0; st8.spill [r2]=r20,16;
702 .mem.offset 8,0; st8.spill [r3]=r21,16 ;;
703 .mem.offset 0,0; st8.spill [r2]=r22,16;
704 .mem.offset 8,0; st8.spill [r3]=r23,16 ;;
705 .mem.offset 0,0; st8.spill [r2]=r24,16;
706 .mem.offset 8,0; st8.spill [r3]=r25,16 ;;
707 .mem.offset 0,0; st8.spill [r2]=r26,16;
708 .mem.offset 8,0; st8.spill [r3]=r27,16 ;;
709 .mem.offset 0,0; st8.spill [r2]=r28,16;
710 .mem.offset 8,0; st8.spill [r3]=r29,16 ;;
711 .mem.offset 0,0; st8.spill [r2]=r30,16;
712 .mem.offset 8,0; st8.spill [r3]=r31,16 ;;
713 #ifdef HANDLE_AR_UNAT
714 // r16~r23 are preserved regsin bank0 regs, we need to restore them,
715 // r24~r31 are scratch regs, we don't need to handle NaT bit,
716 // because OS handler must assign it before access it
717 ld8 r16=[r2],16;
718 ld8 r17=[r3],16;;
719 ld8 r18=[r2],16;
720 ld8 r19=[r3],16;;
721 ld8 r20=[r2],16;
722 ld8 r21=[r3],16;;
723 ld8 r22=[r2],16;
724 ld8 r23=[r3],16;;
725 #endif
726 mov r31=r4
727 ;;
728 bsw.0 ;;
729 mov r24=ar.unat;
730 mov r2=r30
731 mov r3=r29
732 #ifdef HANDLE_AR_UNAT
733 mov ar.unat=r28;
734 #endif
735 mov r4=r27
736 ;;
737 adds r25=XSI_B1NATS_OFS-XSI_PSR_IC_OFS,r18 ;
738 adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
739 st8 [r25]=r24;
740 st4 [r20]=r0 ;;
741 mov pr=r31,-1 ;;
742 rfi
743 ;;
744 END(fast_reflect)
746 // reflect access faults (0x2400,0x2800,0x5300) directly to domain
747 // r16 == isr
748 // r17 == ifa
749 // r19 == reflect number (only pass-thru to dispatch_reflection)
750 // r20 == offset into ivt
751 // r31 == pr
752 GLOBAL_ENTRY(fast_access_reflect)
753 #ifndef FAST_ACCESS_REFLECT // see beginning of file
754 br.spnt.few dispatch_reflection ;;
755 #endif
756 mov r30=cr.ipsr;;
757 mov r29=cr.iip;;
758 extr.u r21=r30,IA64_PSR_BE_BIT,1 ;;
759 cmp.ne p7,p0=r21,r0
760 (p7) br.spnt.few dispatch_reflection ;;
761 extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
762 cmp.ne p7,p0=r21,r0
763 (p7) br.spnt.few dispatch_reflection ;;
764 extr.u r21=r30,IA64_PSR_CPL0_BIT,2 ;;
765 cmp.eq p7,p0=r21,r0
766 (p7) br.spnt.few dispatch_reflection ;;
767 movl r18=THIS_CPU(current_psr_ic_addr);;
768 ld8 r18=[r18];;
769 ld4 r21=[r18];;
770 cmp.eq p7,p0=r0,r21
771 (p7) br.spnt.few dispatch_reflection ;;
772 // set shared_mem ifa, FIXME: should we validate it?
773 mov r17=cr.ifa;;
774 adds r21=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;;
775 st8 [r21]=r17 ;;
776 // get rr[ifa] and save to itir in shared memory (extra bits ignored)
777 shr.u r22=r17,61
778 adds r23=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18
779 adds r21=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18 ;;
780 shladd r22=r22,3,r21;;
781 ld8 r22=[r22];;
782 and r22=~3,r22;;
783 st8 [r23]=r22;;
784 br.cond.sptk.many fast_reflect;;
785 END(fast_access_reflect)
787 // when we get to here, VHPT_CCHAIN_LOOKUP has failed and everything
788 // is as it was at the time of original miss. We want to preserve that
789 // so if we get a nested fault, we can just branch to page_fault
790 GLOBAL_ENTRY(fast_tlb_miss_reflect)
791 #ifndef FAST_TLB_MISS_REFLECT // see beginning of file
792 br.spnt.few page_fault ;;
793 #else
794 mov r31=pr
795 mov r30=cr.ipsr
796 mov r29=cr.iip
797 mov r16=cr.isr
798 mov r17=cr.ifa;;
799 // for now, always take slow path for region 0 (e.g. metaphys mode)
800 extr.u r21=r17,61,3;;
801 cmp.eq p7,p0=r0,r21
802 (p7) br.spnt.few page_fault ;;
803 // always take slow path for PL0 (e.g. __copy_from_user)
804 extr.u r21=r30,IA64_PSR_CPL0_BIT,2 ;;
805 cmp.eq p7,p0=r21,r0
806 (p7) br.spnt.few page_fault ;;
807 // slow path if strange ipsr or isr bits set
808 extr.u r21=r30,IA64_PSR_BE_BIT,1 ;;
809 cmp.ne p7,p0=r21,r0
810 (p7) br.spnt.few page_fault ;;
811 extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
812 cmp.ne p7,p0=r21,r0
813 (p7) br.spnt.few page_fault ;;
814 movl r21=IA64_ISR_IR|IA64_ISR_SP|IA64_ISR_NA ;;
815 and r21=r16,r21;;
816 cmp.ne p7,p0=r0,r21
817 (p7) br.spnt.few page_fault ;;
818 // also take slow path if virtual psr.ic=0
819 movl r18=XSI_PSR_IC;;
820 ld4 r21=[r18];;
821 cmp.eq p7,p0=r0,r21
822 (p7) br.spnt.few page_fault ;;
823 // OK, if we get to here, we are doing a fast vcpu_translate. Need to:
824 // 1) look in the virtual TR's (pinned), if not there
825 // 2) look in the 1-entry TLB (pinned), if not there
826 // 3) check the domain VHPT (NOT pinned, accesses domain memory!)
827 // If we find it in any of these places, we need to effectively do
828 // a hyper_itc_i/d
830 // short-term hack for now, if in region 5-7, take slow path
831 // since all Linux TRs are in region 5 or 7, we need not check TRs
832 extr.u r21=r17,61,3;;
833 cmp.le p7,p0=5,r21
834 (p7) br.spnt.few page_fault ;;
835 fast_tlb_no_tr_match:
836 movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
837 ld8 r27=[r27];;
838 tbit.nz p6,p7=r16,IA64_ISR_X_BIT;;
839 (p6) adds r25=IA64_VCPU_ITLB_OFFSET,r27;;
840 (p7) adds r25=IA64_VCPU_DTLB_OFFSET,r27;;
841 ld8 r20=[r25],8;;
842 tbit.z p7,p0=r20,0;; // present?
843 (p7) br.cond.spnt.few 1f;;
844 // if ifa is in range of tlb, don't bother to check rid, go slow path
845 ld8 r21=[r25],8;;
846 mov r23=1
847 extr.u r21=r21,2,6;;
848 shl r22=r23,r21
849 ld8 r21=[r25],8;;
850 cmp.ltu p7,p0=r17,r21
851 (p7) br.cond.sptk.many 1f;
852 add r21=r22,r21;;
853 cmp.ltu p7,p0=r17,r21
854 (p7) br.cond.spnt.few page_fault;;
856 1: // check the guest VHPT
857 adds r19 = XSI_PTA_OFS-XSI_PSR_IC_OFS, r18;;
858 ld8 r19=[r19];;
859 tbit.nz p7,p0=r19,IA64_PTA_VF_BIT;; // long format VHPT
860 (p7) br.cond.spnt.few page_fault;;
861 // if (!rr.ve || !(pta & IA64_PTA_VE)) take slow way for now
862 // FIXME: later, we deliver an alt_d/i vector after thash and itir
863 tbit.z p7,p0=r19,IA64_PTA_VE_BIT;; //
864 (p7) br.cond.spnt.few page_fault;;
865 extr.u r25=r17,61,3;;
866 adds r21=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18 ;;
867 shl r25=r25,3;;
868 add r21=r21,r25;;
869 ld8 r22=[r21];;
870 tbit.z p7,p0=r22,0
871 (p7) br.cond.spnt.few page_fault;;
873 // compute and save away itir (r22 & RR_PS_MASK)
874 movl r21=0xfc;;
875 and r22=r22,r21;;
876 adds r21=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18 ;;
877 st8 [r21]=r22;;
879 // save away ifa
880 adds r21=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;;
881 st8 [r21]=r17;;
882 // see vcpu_thash to save away iha
883 shr.u r20 = r17, 61
884 addl r25 = 1, r0
885 movl r30 = 0xe000000000000000
886 ;;
887 and r21 = r30, r17 // VHPT_Addr1
888 ;;
889 shladd r28 = r20, 3, r18
890 adds r19 = XSI_PTA_OFS-XSI_PSR_IC_OFS, r18
891 ;;
892 adds r27 = XSI_RR0_OFS-XSI_PSR_IC_OFS, r28
893 addl r28 = 32767, r0
894 ld8 r24 = [r19] // pta
895 ;;
896 ld8 r23 = [r27] // rrs[vadr>>61]
897 extr.u r26 = r24, 2, 6
898 ;;
899 extr.u r22 = r23, 2, 6
900 shl r30 = r25, r26
901 ;;
902 shr.u r19 = r17, r22
903 shr.u r29 = r24, 15
904 ;;
905 adds r30 = -1, r30
906 ;;
907 shladd r27 = r19, 3, r0
908 extr.u r26 = r30, 15, 46
909 ;;
910 andcm r24 = r29, r26
911 and r19 = r28, r27
912 shr.u r25 = r27, 15
913 ;;
914 and r23 = r26, r25
915 ;;
916 or r22 = r24, r23
917 ;;
918 dep.z r20 = r22, 15, 46
919 ;;
920 or r30 = r20, r21
921 ;;
922 //or r8 = r19, r30
923 or r19 = r19, r30
924 ;;
925 adds r23=XSI_IHA_OFS-XSI_PSR_IC_OFS,r18 ;;
926 st8 [r23]=r19;;
927 // done with thash, check guest VHPT
929 adds r20 = XSI_PTA_OFS-XSI_PSR_IC_OFS, r18;;
930 ld8 r24 = [r20];; // pta
931 // avoid recursively walking the VHPT
932 // if (((r17=address ^ r24=pta) & ((itir_mask(pta) << 3) >> 3)) != 0) {
933 mov r20=-8
934 xor r21=r17,r24
935 extr.u r24=r24,2,6;;
936 shl r20=r20,r24;;
937 shr.u r20=r20,3;;
938 and r21=r20,r21;;
939 cmp.eq p7,p0=r21,r0
940 (p7) br.cond.spnt.few 1f;;
941 // __copy_from_user(&pte, r19=(void *)(*iha), sizeof(pte)=8)
942 // prepare for possible nested dtlb fault
943 mov r29=b0
944 movl r30=guest_vhpt_miss;;
945 // now go fetch the entry from the guest VHPT
946 ld8 r20=[r19];;
947 // if we wind up here, we successfully loaded the VHPT entry
949 // this VHPT walker aborts on non-present pages instead
950 // of inserting a not-present translation, this allows
951 // vectoring directly to the miss handler
952 tbit.z p7,p0=r20,0
953 (p7) br.cond.spnt.few page_not_present;;
955 #ifdef FAST_REFLECT_CNT
956 movl r21=perfcounter+FAST_VHPT_TRANSLATE_PERFC_OFS;;
957 ld4 r22=[r21];;
958 adds r22=1,r22;;
959 st4 [r21]=r22;;
960 #endif
962 // prepare for fast_insert(PSCB(ifa),PSCB(itir),r16=pte)
963 // r16 == pte
964 // r17 == bit0: 1=inst, 0=data; bit1: 1=itc, 0=vcpu_translate
965 // r18 == XSI_PSR_IC_OFS
966 // r24 == ps
967 // r29 == saved value of b0 in case of recovery
968 // r30 == recovery ip if failure occurs
969 // r31 == pr
970 tbit.nz p6,p7=r16,IA64_ISR_X_BIT;;
971 (p6) mov r17=1;;
972 (p7) mov r17=0;;
973 mov r16=r20
974 mov r29=b0 ;;
975 movl r30=recover_and_page_fault ;;
976 adds r21=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18 ;;
977 ld8 r24=[r21];;
978 extr.u r24=r24,2,6;;
979 // IFA already in PSCB
980 br.cond.sptk.many fast_insert;;
981 END(fast_tlb_miss_reflect)
983 // we get here if fast_insert fails (e.g. due to metaphysical lookup)
984 ENTRY(recover_and_page_fault)
985 #ifdef PERF_COUNTERS
986 movl r21=perfcounters + RECOVER_TO_PAGE_FAULT_PERFC_OFS;;
987 ld4 r22=[r21];;
988 adds r22=1,r22;;
989 st4 [r21]=r22;;
990 #endif
991 mov b0=r29;;
992 br.cond.sptk.many page_fault;;
994 // if we wind up here, we missed in guest VHPT so recover
995 // from nested dtlb fault and reflect a tlb fault to the guest
996 guest_vhpt_miss:
997 mov b0=r29;;
998 // fault = IA64_VHPT_FAULT
999 mov r20=r0
1000 br.cond.sptk.many 1f;
1002 // if we get to here, we are ready to reflect
1003 // need to set up virtual ifa, iha, itir (fast_reflect handles
1004 // virtual isr, iip, ipsr, ifs
1005 // see vcpu_get_itir_on_fault: get ps,rid,(FIXME key) from rr[ifa]
1006 page_not_present:
1007 tbit.nz p6,p7=r16,IA64_ISR_X_BIT;;
1008 (p6) movl r20=0x400;;
1009 (p7) movl r20=0x800;;
1011 1: extr.u r25=r17,61,3;;
1012 adds r21=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18 ;;
1013 shl r25=r25,3;;
1014 add r21=r21,r25;;
1015 ld8 r22=[r21];;
1016 extr.u r22=r22,2,30;;
1017 dep.z r22=r22,2,30;;
1018 adds r23=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18 ;;
1019 st8 [r23]=r22;;
1021 // fast reflect expects
1022 // r16 == cr.isr
1023 // r18 == XSI_PSR_IC
1024 // r20 == offset into ivt
1025 // r29 == iip
1026 // r30 == ipsr
1027 // r31 == pr
1028 //mov r16=cr.isr
1029 mov r29=cr.iip
1030 mov r30=cr.ipsr
1031 br.sptk.many fast_reflect;;
1032 #endif
1033 END(fast_tlb_miss_reflect)
1035 // ensure that, if giving up, registers at entry to fast_hyperprivop unchanged
1036 ENTRY(hyper_rfi)
1037 #ifndef FAST_RFI
1038 br.spnt.few dispatch_break_fault ;;
1039 #endif
1040 // if no interrupts pending, proceed
1041 mov r30=r0
1042 cmp.eq p7,p0=r20,r0
1043 (p7) br.sptk.many 1f
1044 ;;
1045 adds r20=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;;
1046 ld8 r21=[r20];; // r21 = vcr.ipsr
1047 extr.u r22=r21,IA64_PSR_I_BIT,1 ;;
1048 mov r30=r22
1049 // r30 determines whether we might deliver an immediate extint
1050 1:
1051 adds r20=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;;
1052 ld8 r21=[r20];; // r21 = vcr.ipsr
1053 extr.u r22=r21,IA64_PSR_BE_BIT,1 ;;
1054 // if turning on psr.be, give up for now and do it the slow way
1055 cmp.ne p7,p0=r22,r0
1056 (p7) br.spnt.few dispatch_break_fault ;;
1057 // if (!(vpsr.dt && vpsr.rt && vpsr.it)), do it the slow way
1058 movl r20=(IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_IT);;
1059 and r22=r20,r21
1060 ;;
1061 cmp.ne p7,p0=r22,r20
1062 (p7) br.spnt.few dispatch_break_fault ;;
1063 // if was in metaphys mode, do it the slow way (FIXME later?)
1064 adds r20=XSI_METAPHYS_OFS-XSI_PSR_IC_OFS,r18 ;;
1065 ld4 r20=[r20];;
1066 cmp.ne p7,p0=r20,r0
1067 (p7) br.spnt.few dispatch_break_fault ;;
1068 // if domain hasn't already done virtual bank switch
1069 // do it the slow way (FIXME later?)
1070 #if 0
1071 adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
1072 ld4 r20=[r20];;
1073 cmp.eq p7,p0=r20,r0
1074 (p7) br.spnt.few dispatch_break_fault ;;
1075 #endif
1076 // validate vcr.iip, if in Xen range, do it the slow way
1077 adds r20=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;;
1078 ld8 r22=[r20];;
1079 movl r23=HYPERVISOR_VIRT_START
1080 movl r24=HYPERVISOR_VIRT_END;;
1081 cmp.ltu p0,p7=r22,r23 ;; // if !(iip<low) &&
1082 (p7) cmp.geu p0,p7=r22,r24 ;; // !(iip>=high)
1083 (p7) br.spnt.few dispatch_break_fault ;;
1084 #ifndef RFI_TO_INTERRUPT // see beginning of file
1085 cmp.ne p6,p0=r30,r0
1086 (p6) br.cond.spnt.few dispatch_break_fault ;;
1087 #endif
1089 1: // OK now, let's do an rfi.
1090 #ifdef FAST_HYPERPRIVOP_CNT
1091 movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_RFI);;
1092 ld4 r23=[r20];;
1093 adds r23=1,r23;;
1094 st4 [r20]=r23;;
1095 #endif
1096 #ifdef RFI_TO_INTERRUPT
1097 // maybe do an immediate interrupt delivery?
1098 cmp.ne p6,p0=r30,r0
1099 (p6) br.cond.spnt.few rfi_check_extint;;
1100 #endif
1102 just_do_rfi:
1103 // r18=&vpsr.i|vpsr.ic, r21==vpsr, r22=vcr.iip
1104 mov cr.iip=r22;;
1105 adds r20=XSI_INCOMPL_REGFR_OFS-XSI_PSR_IC_OFS,r18 ;;
1106 st4 [r20]=r0 ;;
1107 adds r20=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
1108 ld8 r20=[r20];;
1109 dep r20=0,r20,38,25;; // ensure ifs has no reserved bits set
1110 mov cr.ifs=r20 ;;
1111 // ipsr.cpl == (vcr.ipsr.cpl == 0) 2 : 3;
1112 movl r20=THIS_CPU(current_psr_i_addr)
1113 dep r21=-1,r21,IA64_PSR_CPL1_BIT,1 ;;
1114 // vpsr.i = vcr.ipsr.i; vpsr.ic = vcr.ipsr.ic
1115 ld8 r20=[r20]
1116 mov r19=1
1117 extr.u r23=r21,IA64_PSR_I_BIT,1 ;;
1118 cmp.ne p7,p6=r23,r0 ;;
1119 // not done yet
1120 (p7) st1 [r20]=r0
1121 (p6) st1 [r20]=r19;;
1122 extr.u r23=r21,IA64_PSR_IC_BIT,1 ;;
1123 cmp.ne p7,p6=r23,r0 ;;
1124 (p7) st4 [r18]=r19;;
1125 (p6) st4 [r18]=r0;;
1126 // force on psr.ic, i, dt, rt, it, bn
1127 movl r20=(IA64_PSR_I|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_IT|IA64_PSR_BN)
1128 ;;
1129 or r21=r21,r20
1130 ;;
1131 mov cr.ipsr=r21
1132 adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
1133 ld4 r21=[r20];;
1134 cmp.ne p7,p0=r21,r0 // domain already did "bank 1 switch?"
1135 (p7) br.cond.spnt.few 1f;
1136 // OK, now all set to go except for switch to virtual bank1
1137 mov r22=1;; st4 [r20]=r22;
1138 mov r30=r2; mov r29=r3;;
1139 mov r17=ar.unat;;
1140 adds r16=XSI_B1NATS_OFS-XSI_PSR_IC_OFS,r18
1141 adds r2=XSI_BANK1_R16_OFS-XSI_PSR_IC_OFS,r18;
1142 adds r3=(XSI_BANK1_R16_OFS+8)-XSI_PSR_IC_OFS,r18;;
1143 ld8 r16=[r16];;
1144 mov ar.unat=r16;;
1145 bsw.1;;
1146 // FIXME?: ar.unat is not really handled correctly,
1147 // but may not matter if the OS is NaT-clean
1148 .mem.offset 0,0; ld8.fill r16=[r2],16 ;
1149 .mem.offset 8,0; ld8.fill r17=[r3],16 ;;
1150 .mem.offset 0,0; ld8.fill r18=[r2],16 ;
1151 .mem.offset 0,0; ld8.fill r19=[r3],16 ;;
1152 .mem.offset 8,0; ld8.fill r20=[r2],16 ;
1153 .mem.offset 8,0; ld8.fill r21=[r3],16 ;;
1154 .mem.offset 8,0; ld8.fill r22=[r2],16 ;
1155 .mem.offset 8,0; ld8.fill r23=[r3],16 ;;
1156 .mem.offset 8,0; ld8.fill r24=[r2],16 ;
1157 .mem.offset 8,0; ld8.fill r25=[r3],16 ;;
1158 .mem.offset 8,0; ld8.fill r26=[r2],16 ;
1159 .mem.offset 8,0; ld8.fill r27=[r3],16 ;;
1160 .mem.offset 8,0; ld8.fill r28=[r2],16 ;
1161 .mem.offset 8,0; ld8.fill r29=[r3],16 ;;
1162 .mem.offset 8,0; ld8.fill r30=[r2],16 ;
1163 .mem.offset 8,0; ld8.fill r31=[r3],16 ;;
1164 bsw.0 ;;
1165 mov ar.unat=r17;;
1166 mov r2=r30; mov r3=r29;;
1167 1: mov pr=r31,-1
1168 ;;
1169 rfi
1170 ;;
1171 END(hyper_rfi)
1173 #ifdef RFI_TO_INTERRUPT
1174 ENTRY(rfi_check_extint)
1175 //br.sptk.many dispatch_break_fault ;;
1177 // r18=&vpsr.i|vpsr.ic, r21==vpsr, r22=vcr.iip
1178 // make sure none of these get trashed in case going to just_do_rfi
1179 movl r30=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
1180 ld8 r30=[r30];;
1181 adds r24=IA64_VCPU_INSVC3_OFFSET,r30;;
1182 mov r25=192
1183 adds r16=IA64_VCPU_IRR3_OFFSET,r30;;
1184 ld8 r23=[r16];;
1185 cmp.eq p6,p0=r23,r0;;
1186 (p6) adds r16=-8,r16;;
1187 (p6) adds r24=-8,r24;;
1188 (p6) adds r25=-64,r25;;
1189 (p6) ld8 r23=[r16];;
1190 (p6) cmp.eq p6,p0=r23,r0;;
1191 (p6) adds r16=-8,r16;;
1192 (p6) adds r24=-8,r24;;
1193 (p6) adds r25=-64,r25;;
1194 (p6) ld8 r23=[r16];;
1195 (p6) cmp.eq p6,p0=r23,r0;;
1196 (p6) adds r16=-8,r16;;
1197 (p6) adds r24=-8,r24;;
1198 (p6) adds r25=-64,r25;;
1199 (p6) ld8 r23=[r16];;
1200 (p6) cmp.eq p6,p0=r23,r0;;
1201 cmp.eq p6,p0=r23,r0
1202 (p6) br.cond.spnt.few just_do_rfi; // this is actually an error
1203 // r16 points to non-zero element of irr, r23 has value
1204 // r24 points to corr element of insvc, r25 has elt*64
1205 ld8 r26=[r24];;
1206 cmp.geu p6,p0=r26,r23
1207 (p6) br.cond.spnt.many just_do_rfi;
1209 // not masked by insvc, get vector number
1210 shr.u r26=r23,1;;
1211 or r26=r23,r26;;
1212 shr.u r27=r26,2;;
1213 or r26=r26,r27;;
1214 shr.u r27=r26,4;;
1215 or r26=r26,r27;;
1216 shr.u r27=r26,8;;
1217 or r26=r26,r27;;
1218 shr.u r27=r26,16;;
1219 or r26=r26,r27;;
1220 shr.u r27=r26,32;;
1221 or r26=r26,r27;;
1222 andcm r26=0xffffffffffffffff,r26;;
1223 popcnt r26=r26;;
1224 sub r26=63,r26;;
1225 // r26 now contains the bit index (mod 64)
1226 mov r27=1;;
1227 shl r27=r27,r26;;
1228 // r27 now contains the (within the proper word) bit mask
1229 add r26=r25,r26
1230 // r26 now contains the vector [0..255]
1231 adds r20=XSI_TPR_OFS-XSI_PSR_IC_OFS,r18 ;;
1232 ld8 r20=[r20] ;;
1233 extr.u r28=r20,16,1
1234 extr.u r29=r20,4,4 ;;
1235 cmp.ne p6,p0=r28,r0 // if tpr.mmi is set, just rfi
1236 (p6) br.cond.spnt.few just_do_rfi;;
1237 shl r29=r29,4;;
1238 adds r29=15,r29;;
1239 cmp.ge p6,p0=r29,r26 // if tpr masks interrupt, just rfi
1240 (p6) br.cond.spnt.few just_do_rfi;;
1241 END(rfi_check_extint)
1243 // this doesn't work yet (dies early after getting to user mode)
1244 // but happens relatively infrequently, so fix it later.
1245 // NOTE that these will be counted incorrectly for now (for privcnt output)
1246 ENTRY(rfi_with_interrupt)
1247 #if 1
1248 br.sptk.many dispatch_break_fault ;;
1249 #endif
1251 // OK, have an unmasked vector, so deliver extint to vcr.iva+0x3000
1252 // r18 == XSI_PSR_IC
1253 // r21 == vipsr (ipsr in shared_mem)
1254 // r30 == IA64_KR(CURRENT)
1255 // r31 == pr
1256 mov r17=cr.ipsr;;
1257 mov r16=cr.isr;;
1258 // set shared_mem isr
1259 extr.u r16=r16,38,1;; // grab cr.isr.ir bit
1260 dep r16=r16,r0,38,1 ;; // insert into cr.isr (rest of bits zero)
1261 extr.u r20=r21,41,2 ;; // get v(!)psr.ri
1262 dep r16=r20,r16,41,2 ;; // deposit cr.isr.ei
1263 adds r22=XSI_ISR_OFS-XSI_PSR_IC_OFS,r18 ;;
1264 st8 [r22]=r16;;
1265 movl r22=THIS_CPU(current_psr_i_addr)
1266 // set cr.ipsr (make sure cpl==2!)
1267 mov r29=r17
1268 movl r28=DELIVER_PSR_SET;;
1269 mov r20=1;;
1270 ld8 r22=[r22]
1271 movl r27=~(DELIVER_PSR_CLR|IA64_PSR_CPL0)
1272 or r29=r29,r28;;
1273 and r29=r29,r27;;
1274 mov cr.ipsr=r29;;
1275 // v.ipsr and v.iip are already set (and v.iip validated) as rfi target
1276 // set shared_mem interrupt_delivery_enabled to 0
1277 // set shared_mem interrupt_collection_enabled to 0
1278 st1 [r22]=r20
1279 st4 [r18]=r0;;
1280 // cover and set shared_mem precover_ifs to cr.ifs
1281 // set shared_mem ifs and incomplete_regframe to 0
1282 #if 0
1283 cover ;;
1284 mov r20=cr.ifs;;
1285 adds r22=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;;
1286 st4 [r22]=r0 ;;
1287 adds r22=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
1288 st8 [r22]=r0 ;;
1289 adds r22=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
1290 st8 [r22]=r20 ;;
1291 // leave cr.ifs alone for later rfi
1292 #else
1293 adds r22=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;;
1294 st4 [r22]=r0 ;;
1295 adds r22=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
1296 ld8 r20=[r22];;
1297 st8 [r22]=r0 ;;
1298 adds r22=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
1299 st8 [r22]=r20 ;;
1300 #endif
1301 // set iip to go to domain IVA break instruction vector
1302 adds r22=IA64_VCPU_IVA_OFFSET,r30;;
1303 ld8 r23=[r22];;
1304 movl r24=0x3000;;
1305 add r24=r24,r23;;
1306 mov cr.iip=r24;;
1307 #if 0
1308 // OK, now all set to go except for switch to virtual bank0
1309 mov r30=r2; mov r29=r3;;
1310 adds r2=XSI_BANK1_OFS-XSI_PSR_IC_OFS,r18;
1311 adds r3=(XSI_BANK1_OFS+8)-XSI_PSR_IC_OFS,r18;;
1312 bsw.1;;
1313 // FIXME: need to handle ar.unat!
1314 .mem.offset 0,0; st8.spill [r2]=r16,16;
1315 .mem.offset 8,0; st8.spill [r3]=r17,16 ;;
1316 .mem.offset 0,0; st8.spill [r2]=r18,16;
1317 .mem.offset 8,0; st8.spill [r3]=r19,16 ;;
1318 .mem.offset 0,0; st8.spill [r2]=r20,16;
1319 .mem.offset 8,0; st8.spill [r3]=r21,16 ;;
1320 .mem.offset 0,0; st8.spill [r2]=r22,16;
1321 .mem.offset 8,0; st8.spill [r3]=r23,16 ;;
1322 .mem.offset 0,0; st8.spill [r2]=r24,16;
1323 .mem.offset 8,0; st8.spill [r3]=r25,16 ;;
1324 .mem.offset 0,0; st8.spill [r2]=r26,16;
1325 .mem.offset 8,0; st8.spill [r3]=r27,16 ;;
1326 .mem.offset 0,0; st8.spill [r2]=r28,16;
1327 .mem.offset 8,0; st8.spill [r3]=r29,16 ;;
1328 .mem.offset 0,0; st8.spill [r2]=r30,16;
1329 .mem.offset 8,0; st8.spill [r3]=r31,16 ;;
1330 movl r31=XSI_IPSR;;
1331 bsw.0 ;;
1332 mov r2=r30; mov r3=r29;;
1333 #else
1334 bsw.1;;
1335 movl r31=XSI_IPSR;;
1336 bsw.0 ;;
1337 #endif
1338 adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
1339 st4 [r20]=r0 ;;
1340 mov pr=r31,-1 ;;
1341 rfi
1342 END(rfi_with_interrupt)
1343 #endif // RFI_TO_INTERRUPT
1345 ENTRY(hyper_cover)
1346 #ifdef FAST_HYPERPRIVOP_CNT
1347 movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_COVER);;
1348 ld4 r21=[r20];;
1349 adds r21=1,r21;;
1350 st4 [r20]=r21;;
1351 #endif
1352 mov r24=cr.ipsr
1353 mov r25=cr.iip;;
1354 // skip test for vpsr.ic.. it's a prerequisite for hyperprivops
1355 cover ;;
1356 adds r20=XSI_INCOMPL_REGFR_OFS-XSI_PSR_IC_OFS,r18 ;;
1357 mov r30=cr.ifs;;
1358 adds r22=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18
1359 ld4 r21=[r20] ;;
1360 cmp.eq p6,p7=r21,r0 ;;
1361 (p6) st8 [r22]=r30;;
1362 (p7) st4 [r20]=r0;;
1363 mov cr.ifs=r0;;
1364 // adjust return address to skip over break instruction
1365 extr.u r26=r24,41,2 ;;
1366 cmp.eq p6,p7=2,r26 ;;
1367 (p6) mov r26=0
1368 (p6) adds r25=16,r25
1369 (p7) adds r26=1,r26
1370 ;;
1371 dep r24=r26,r24,41,2
1372 ;;
1373 mov cr.ipsr=r24
1374 mov cr.iip=r25
1375 mov pr=r31,-1 ;;
1376 rfi
1377 ;;
1378 END(hyper_cover)
1380 // return from metaphysical mode (meta=1) to virtual mode (meta=0)
1381 ENTRY(hyper_ssm_dt)
1382 #ifdef FAST_HYPERPRIVOP_CNT
1383 movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_SSM_DT);;
1384 ld4 r21=[r20];;
1385 adds r21=1,r21;;
1386 st4 [r20]=r21;;
1387 #endif
1388 mov r24=cr.ipsr
1389 mov r25=cr.iip;;
1390 adds r20=XSI_METAPHYS_OFS-XSI_PSR_IC_OFS,r18 ;;
1391 ld4 r21=[r20];;
1392 cmp.eq p7,p0=r21,r0 // meta==0?
1393 (p7) br.spnt.many 1f ;; // already in virtual mode
1394 movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
1395 ld8 r22=[r22];;
1396 adds r22=IA64_VCPU_META_SAVED_RR0_OFFSET,r22;;
1397 ld8 r23=[r22];;
1398 mov rr[r0]=r23;;
1399 srlz.i;;
1400 st4 [r20]=r0 ;;
1401 // adjust return address to skip over break instruction
1402 1: extr.u r26=r24,41,2 ;;
1403 cmp.eq p6,p7=2,r26 ;;
1404 (p6) mov r26=0
1405 (p6) adds r25=16,r25
1406 (p7) adds r26=1,r26
1407 ;;
1408 dep r24=r26,r24,41,2
1409 ;;
1410 mov cr.ipsr=r24
1411 mov cr.iip=r25
1412 mov pr=r31,-1 ;;
1413 rfi
1414 ;;
1415 END(hyper_ssm_dt)
1417 // go to metaphysical mode (meta=1) from virtual mode (meta=0)
1418 ENTRY(hyper_rsm_dt)
1419 #ifdef FAST_HYPERPRIVOP_CNT
1420 movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_RSM_DT);;
1421 ld4 r21=[r20];;
1422 adds r21=1,r21;;
1423 st4 [r20]=r21;;
1424 #endif
1425 mov r24=cr.ipsr
1426 mov r25=cr.iip;;
1427 adds r20=XSI_METAPHYS_OFS-XSI_PSR_IC_OFS,r18 ;;
1428 ld4 r21=[r20];;
1429 cmp.ne p7,p0=r21,r0 // meta==0?
1430 (p7) br.spnt.many 1f ;; // already in metaphysical mode
1431 movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
1432 ld8 r22=[r22];;
1433 adds r22=IA64_VCPU_META_RR0_OFFSET,r22;;
1434 ld8 r23=[r22];;
1435 mov rr[r0]=r23;;
1436 srlz.i;;
1437 adds r21=1,r0 ;;
1438 st4 [r20]=r21 ;;
1439 // adjust return address to skip over break instruction
1440 1: extr.u r26=r24,41,2 ;;
1441 cmp.eq p6,p7=2,r26 ;;
1442 (p6) mov r26=0
1443 (p6) adds r25=16,r25
1444 (p7) adds r26=1,r26
1445 ;;
1446 dep r24=r26,r24,41,2
1447 ;;
1448 mov cr.ipsr=r24
1449 mov cr.iip=r25
1450 mov pr=r31,-1 ;;
1451 rfi
1452 ;;
1453 END(hyper_rsm_dt)
1455 ENTRY(hyper_get_tpr)
1456 #ifdef FAST_HYPERPRIVOP_CNT
1457 movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_GET_TPR);;
1458 ld4 r21=[r20];;
1459 adds r21=1,r21;;
1460 st4 [r20]=r21;;
1461 #endif
1462 mov r24=cr.ipsr
1463 mov r25=cr.iip;;
1464 adds r20=XSI_TPR_OFS-XSI_PSR_IC_OFS,r18 ;;
1465 ld8 r8=[r20];;
1466 extr.u r26=r24,41,2 ;;
1467 cmp.eq p6,p7=2,r26 ;;
1468 (p6) mov r26=0
1469 (p6) adds r25=16,r25
1470 (p7) adds r26=1,r26
1471 ;;
1472 dep r24=r26,r24,41,2
1473 ;;
1474 mov cr.ipsr=r24
1475 mov cr.iip=r25
1476 mov pr=r31,-1 ;;
1477 rfi
1478 ;;
1479 END(hyper_get_tpr)
1481 // if we get to here, there are no interrupts pending so we
1482 // can change virtual tpr to any value without fear of provoking
1483 // (or accidentally missing) delivering an interrupt
1484 ENTRY(hyper_set_tpr)
1485 #ifdef FAST_HYPERPRIVOP_CNT
1486 movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_SET_TPR);;
1487 ld4 r21=[r20];;
1488 adds r21=1,r21;;
1489 st4 [r20]=r21;;
1490 #endif
1491 mov r24=cr.ipsr
1492 mov r25=cr.iip;;
1493 movl r27=0xff00;;
1494 adds r20=XSI_TPR_OFS-XSI_PSR_IC_OFS,r18 ;;
1495 andcm r8=r8,r27;;
1496 st8 [r20]=r8;;
1497 extr.u r26=r24,41,2 ;;
1498 cmp.eq p6,p7=2,r26 ;;
1499 (p6) mov r26=0
1500 (p6) adds r25=16,r25
1501 (p7) adds r26=1,r26
1502 ;;
1503 dep r24=r26,r24,41,2
1504 ;;
1505 mov cr.ipsr=r24
1506 mov cr.iip=r25
1507 mov pr=r31,-1 ;;
1508 rfi
1509 ;;
1510 END(hyper_set_tpr)
1512 ENTRY(hyper_get_ivr)
1513 #ifdef FAST_HYPERPRIVOP_CNT
1514 movl r22=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_GET_IVR);;
1515 ld4 r21=[r22];;
1516 adds r21=1,r21;;
1517 st4 [r22]=r21;;
1518 #endif
1519 mov r8=15;;
1520 // when we get to here r20=~=interrupts pending
1521 cmp.eq p7,p0=r20,r0;;
1522 (p7) adds r20=XSI_PEND_OFS-XSI_PSR_IC_OFS,r18 ;;
1523 (p7) st4 [r20]=r0;;
1524 (p7) br.spnt.many 1f ;;
1525 movl r30=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
1526 ld8 r30=[r30];;
1527 adds r24=IA64_VCPU_INSVC3_OFFSET,r30;;
1528 mov r25=192
1529 adds r22=IA64_VCPU_IRR3_OFFSET,r30;;
1530 ld8 r23=[r22];;
1531 cmp.eq p6,p0=r23,r0;;
1532 (p6) adds r22=-8,r22;;
1533 (p6) adds r24=-8,r24;;
1534 (p6) adds r25=-64,r25;;
1535 (p6) ld8 r23=[r22];;
1536 (p6) cmp.eq p6,p0=r23,r0;;
1537 (p6) adds r22=-8,r22;;
1538 (p6) adds r24=-8,r24;;
1539 (p6) adds r25=-64,r25;;
1540 (p6) ld8 r23=[r22];;
1541 (p6) cmp.eq p6,p0=r23,r0;;
1542 (p6) adds r22=-8,r22;;
1543 (p6) adds r24=-8,r24;;
1544 (p6) adds r25=-64,r25;;
1545 (p6) ld8 r23=[r22];;
1546 (p6) cmp.eq p6,p0=r23,r0;;
1547 cmp.eq p6,p0=r23,r0
1548 (p6) br.cond.spnt.few 1f; // this is actually an error
1549 // r22 points to non-zero element of irr, r23 has value
1550 // r24 points to corr element of insvc, r25 has elt*64
1551 ld8 r26=[r24];;
1552 cmp.geu p6,p0=r26,r23
1553 (p6) br.cond.spnt.many 1f;
1554 // not masked by insvc, get vector number
1555 shr.u r26=r23,1;;
1556 or r26=r23,r26;;
1557 shr.u r27=r26,2;;
1558 or r26=r26,r27;;
1559 shr.u r27=r26,4;;
1560 or r26=r26,r27;;
1561 shr.u r27=r26,8;;
1562 or r26=r26,r27;;
1563 shr.u r27=r26,16;;
1564 or r26=r26,r27;;
1565 shr.u r27=r26,32;;
1566 or r26=r26,r27;;
1567 andcm r26=0xffffffffffffffff,r26;;
1568 popcnt r26=r26;;
1569 sub r26=63,r26;;
1570 // r26 now contains the bit index (mod 64)
1571 mov r27=1;;
1572 shl r27=r27,r26;;
1573 // r27 now contains the (within the proper word) bit mask
1574 add r26=r25,r26
1575 // r26 now contains the vector [0..255]
1576 adds r20=XSI_TPR_OFS-XSI_PSR_IC_OFS,r18 ;;
1577 ld8 r20=[r20] ;;
1578 extr.u r28=r20,16,1
1579 extr.u r29=r20,4,4 ;;
1580 cmp.ne p6,p0=r28,r0 // if tpr.mmi is set, return SPURIOUS
1581 (p6) br.cond.spnt.few 1f;
1582 shl r29=r29,4;;
1583 adds r29=15,r29;;
1584 cmp.ge p6,p0=r29,r26
1585 (p6) br.cond.spnt.few 1f;
1586 // OK, have an unmasked vector to process/return
1587 ld8 r25=[r24];;
1588 or r25=r25,r27;;
1589 st8 [r24]=r25;;
1590 ld8 r25=[r22];;
1591 andcm r25=r25,r27;;
1592 st8 [r22]=r25;;
1593 mov r8=r26;;
1594 // if its a clock tick, remember itm to avoid delivering it twice
1595 adds r20=XSI_ITV_OFS-XSI_PSR_IC_OFS,r18 ;;
1596 ld8 r20=[r20];;
1597 extr.u r20=r20,0,8;;
1598 cmp.eq p6,p0=r20,r8
1599 adds r22=IA64_VCPU_DOMAIN_ITM_LAST_OFFSET,r30
1600 adds r23=IA64_VCPU_DOMAIN_ITM_OFFSET,r30;;
1601 ld8 r23=[r23];;
1602 (p6) st8 [r22]=r23;;
1603 // all done
1604 1: mov r24=cr.ipsr
1605 mov r25=cr.iip;;
1606 extr.u r26=r24,41,2 ;;
1607 cmp.eq p6,p7=2,r26 ;;
1608 (p6) mov r26=0
1609 (p6) adds r25=16,r25
1610 (p7) adds r26=1,r26
1611 ;;
1612 dep r24=r26,r24,41,2
1613 ;;
1614 mov cr.ipsr=r24
1615 mov cr.iip=r25
1616 mov pr=r31,-1 ;;
1617 rfi
1618 ;;
1619 END(hyper_get_ivr)
1621 ENTRY(hyper_eoi)
1622 // when we get to here r20=~=interrupts pending
1623 cmp.ne p7,p0=r20,r0
1624 (p7) br.spnt.many dispatch_break_fault ;;
1625 #ifdef FAST_HYPERPRIVOP_CNT
1626 movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_EOI);;
1627 ld4 r21=[r20];;
1628 adds r21=1,r21;;
1629 st4 [r20]=r21;;
1630 #endif
1631 movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
1632 ld8 r22=[r22];;
1633 adds r22=IA64_VCPU_INSVC3_OFFSET,r22;;
1634 ld8 r23=[r22];;
1635 cmp.eq p6,p0=r23,r0;;
1636 (p6) adds r22=-8,r22;;
1637 (p6) ld8 r23=[r22];;
1638 (p6) cmp.eq p6,p0=r23,r0;;
1639 (p6) adds r22=-8,r22;;
1640 (p6) ld8 r23=[r22];;
1641 (p6) cmp.eq p6,p0=r23,r0;;
1642 (p6) adds r22=-8,r22;;
1643 (p6) ld8 r23=[r22];;
1644 (p6) cmp.eq p6,p0=r23,r0;;
1645 cmp.eq p6,p0=r23,r0
1646 (p6) br.cond.spnt.few 1f; // this is actually an error
1647 // r22 points to non-zero element of insvc, r23 has value
1648 shr.u r24=r23,1;;
1649 or r24=r23,r24;;
1650 shr.u r25=r24,2;;
1651 or r24=r24,r25;;
1652 shr.u r25=r24,4;;
1653 or r24=r24,r25;;
1654 shr.u r25=r24,8;;
1655 or r24=r24,r25;;
1656 shr.u r25=r24,16;;
1657 or r24=r24,r25;;
1658 shr.u r25=r24,32;;
1659 or r24=r24,r25;;
1660 andcm r24=0xffffffffffffffff,r24;;
1661 popcnt r24=r24;;
1662 sub r24=63,r24;;
1663 // r24 now contains the bit index
1664 mov r25=1;;
1665 shl r25=r25,r24;;
1666 andcm r23=r23,r25;;
1667 st8 [r22]=r23;;
1668 1: mov r24=cr.ipsr
1669 mov r25=cr.iip;;
1670 extr.u r26=r24,41,2 ;;
1671 cmp.eq p6,p7=2,r26 ;;
1672 (p6) mov r26=0
1673 (p6) adds r25=16,r25
1674 (p7) adds r26=1,r26
1675 ;;
1676 dep r24=r26,r24,41,2
1677 ;;
1678 mov cr.ipsr=r24
1679 mov cr.iip=r25
1680 mov pr=r31,-1 ;;
1681 rfi
1682 ;;
1683 END(hyper_eoi)
1685 ENTRY(hyper_set_itm)
1686 // when we get to here r20=~=interrupts pending
1687 cmp.ne p7,p0=r20,r0
1688 (p7) br.spnt.many dispatch_break_fault ;;
1689 #ifdef FAST_HYPERPRIVOP_CNT
1690 movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_SET_ITM);;
1691 ld4 r21=[r20];;
1692 adds r21=1,r21;;
1693 st4 [r20]=r21;;
1694 #endif
1695 movl r20=THIS_CPU(cpu_info)+IA64_CPUINFO_ITM_NEXT_OFFSET;;
1696 ld8 r21=[r20];;
1697 movl r20=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
1698 ld8 r20=[r20];;
1699 adds r20=IA64_VCPU_DOMAIN_ITM_OFFSET,r20;;
1700 st8 [r20]=r8;;
1701 cmp.geu p6,p0=r21,r8;;
1702 (p6) mov r21=r8;;
1703 // now "safe set" cr.itm=r21
1704 mov r23=100;;
1705 2: mov cr.itm=r21;;
1706 srlz.d;;
1707 mov r22=ar.itc ;;
1708 cmp.leu p6,p0=r21,r22;;
1709 add r21=r21,r23;;
1710 shl r23=r23,1;;
1711 (p6) br.cond.spnt.few 2b;;
1712 1: mov r24=cr.ipsr
1713 mov r25=cr.iip;;
1714 extr.u r26=r24,41,2 ;;
1715 cmp.eq p6,p7=2,r26 ;;
1716 (p6) mov r26=0
1717 (p6) adds r25=16,r25
1718 (p7) adds r26=1,r26
1719 ;;
1720 dep r24=r26,r24,41,2
1721 ;;
1722 mov cr.ipsr=r24
1723 mov cr.iip=r25
1724 mov pr=r31,-1 ;;
1725 rfi
1726 ;;
1727 END(hyper_set_itm)
1729 ENTRY(hyper_get_rr)
1730 #ifdef FAST_HYPERPRIVOP_CNT
1731 movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_GET_RR);;
1732 ld4 r21=[r20];;
1733 adds r21=1,r21;;
1734 st4 [r20]=r21;;
1735 #endif
1736 extr.u r25=r8,61,3;;
1737 adds r20=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18 ;;
1738 shl r25=r25,3;;
1739 add r20=r20,r25;;
1740 ld8 r8=[r20];;
1741 1: mov r24=cr.ipsr
1742 mov r25=cr.iip;;
1743 extr.u r26=r24,41,2 ;;
1744 cmp.eq p6,p7=2,r26 ;;
1745 (p6) mov r26=0
1746 (p6) adds r25=16,r25
1747 (p7) adds r26=1,r26
1748 ;;
1749 dep r24=r26,r24,41,2
1750 ;;
1751 mov cr.ipsr=r24
1752 mov cr.iip=r25
1753 mov pr=r31,-1 ;;
1754 rfi
1755 ;;
1756 END(hyper_get_rr)
1758 ENTRY(hyper_set_rr)
1759 extr.u r25=r8,61,3;;
1760 cmp.leu p7,p0=7,r25 // punt on setting rr7
1761 (p7) br.spnt.many dispatch_break_fault ;;
1762 #ifdef FAST_HYPERPRIVOP_CNT
1763 movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_SET_RR);;
1764 ld4 r21=[r20];;
1765 adds r21=1,r21;;
1766 st4 [r20]=r21;;
1767 #endif
1768 extr.u r26=r9,8,24 // r26 = r9.rid
1769 movl r20=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
1770 ld8 r20=[r20];;
1771 adds r21=IA64_VCPU_STARTING_RID_OFFSET,r20;;
1772 ld4 r22=[r21];;
1773 adds r21=IA64_VCPU_ENDING_RID_OFFSET,r20;;
1774 ld4 r23=[r21];;
1775 adds r24=IA64_VCPU_META_SAVED_RR0_OFFSET,r20;;
1776 add r22=r26,r22;;
1777 cmp.geu p6,p0=r22,r23 // if r9.rid + starting_rid >= ending_rid
1778 (p6) br.cond.spnt.few 1f; // this is an error, but just ignore/return
1779 // r21=starting_rid
1780 adds r20=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18 ;;
1781 shl r25=r25,3;;
1782 add r20=r20,r25;;
1783 st8 [r20]=r9;; // store away exactly what was passed
1784 // but adjust value actually placed in rr[r8]
1785 // r22 contains adjusted rid, "mangle" it (see regionreg.c)
1786 // and set ps to PAGE_SHIFT and ve to 1
1787 extr.u r27=r22,0,8
1788 extr.u r28=r22,8,8
1789 extr.u r29=r22,16,8;;
1790 dep.z r23=PAGE_SHIFT,2,6;;
1791 dep r23=-1,r23,0,1;; // mangling is swapping bytes 1 & 3
1792 dep r23=r27,r23,24,8;;
1793 dep r23=r28,r23,16,8;;
1794 dep r23=r29,r23,8,8
1795 cmp.eq p6,p0=r25,r0;; // if rr0, save for metaphysical
1796 (p6) st8 [r24]=r23
1797 mov rr[r8]=r23;;
1798 // done, mosey on back
1799 1: mov r24=cr.ipsr
1800 mov r25=cr.iip;;
1801 extr.u r26=r24,41,2 ;;
1802 cmp.eq p6,p7=2,r26 ;;
1803 (p6) mov r26=0
1804 (p6) adds r25=16,r25
1805 (p7) adds r26=1,r26
1806 ;;
1807 dep r24=r26,r24,41,2
1808 ;;
1809 mov cr.ipsr=r24
1810 mov cr.iip=r25
1811 mov pr=r31,-1 ;;
1812 rfi
1813 ;;
1814 END(hyper_set_rr)
1816 ENTRY(hyper_set_kr)
1817 extr.u r25=r8,3,61;;
1818 cmp.ne p7,p0=r0,r25 // if kr# > 7, go slow way
1819 (p7) br.spnt.many dispatch_break_fault ;;
1820 #ifdef FAST_HYPERPRIVOP_CNT
1821 movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_SET_KR);;
1822 ld4 r21=[r20];;
1823 adds r21=1,r21;;
1824 st4 [r20]=r21;;
1825 #endif
1826 adds r21=XSI_KR0_OFS-XSI_PSR_IC_OFS,r18 ;;
1827 shl r20=r8,3;;
1828 add r22=r20,r21;;
1829 st8 [r22]=r9;;
1830 cmp.eq p7,p0=r8,r0
1831 adds r8=-1,r8;;
1832 (p7) mov ar0=r9;;
1833 cmp.eq p7,p0=r8,r0
1834 adds r8=-1,r8;;
1835 (p7) mov ar1=r9;;
1836 cmp.eq p7,p0=r8,r0
1837 adds r8=-1,r8;;
1838 (p7) mov ar2=r9;;
1839 cmp.eq p7,p0=r8,r0
1840 adds r8=-1,r8;;
1841 (p7) mov ar3=r9;;
1842 cmp.eq p7,p0=r8,r0
1843 adds r8=-1,r8;;
1844 (p7) mov ar4=r9;;
1845 cmp.eq p7,p0=r8,r0
1846 adds r8=-1,r8;;
1847 (p7) mov ar5=r9;;
1848 cmp.eq p7,p0=r8,r0
1849 adds r8=-1,r8;;
1850 (p7) mov ar6=r9;;
1851 cmp.eq p7,p0=r8,r0
1852 adds r8=-1,r8;;
1853 (p7) mov ar7=r9;;
1854 // done, mosey on back
1855 1: mov r24=cr.ipsr
1856 mov r25=cr.iip;;
1857 extr.u r26=r24,41,2 ;;
1858 cmp.eq p6,p7=2,r26 ;;
1859 (p6) mov r26=0
1860 (p6) adds r25=16,r25
1861 (p7) adds r26=1,r26
1862 ;;
1863 dep r24=r26,r24,41,2
1864 ;;
1865 mov cr.ipsr=r24
1866 mov cr.iip=r25
1867 mov pr=r31,-1 ;;
1868 rfi
1869 ;;
1870 END(hyper_set_kr)
1872 // this routine was derived from optimized assembly output from
1873 // vcpu_thash so it is dense and difficult to read but it works
1874 // On entry:
1875 // r18 == XSI_PSR_IC
1876 // r31 == pr
1877 ENTRY(hyper_thash)
1878 #ifdef FAST_HYPERPRIVOP_CNT
1879 movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_THASH);;
1880 ld4 r21=[r20];;
1881 adds r21=1,r21;;
1882 st4 [r20]=r21;;
1883 #endif
1884 shr.u r20 = r8, 61
1885 addl r25 = 1, r0
1886 movl r17 = 0xe000000000000000
1887 ;;
1888 and r21 = r17, r8 // VHPT_Addr1
1889 ;;
1890 shladd r28 = r20, 3, r18
1891 adds r19 = XSI_PTA_OFS-XSI_PSR_IC_OFS, r18
1892 ;;
1893 adds r27 = XSI_RR0_OFS-XSI_PSR_IC_OFS, r28
1894 addl r28 = 32767, r0
1895 ld8 r24 = [r19] // pta
1896 ;;
1897 ld8 r23 = [r27] // rrs[vadr>>61]
1898 extr.u r26 = r24, 2, 6
1899 ;;
1900 extr.u r22 = r23, 2, 6
1901 shl r30 = r25, r26
1902 ;;
1903 shr.u r19 = r8, r22
1904 shr.u r29 = r24, 15
1905 ;;
1906 adds r17 = -1, r30
1907 ;;
1908 shladd r27 = r19, 3, r0
1909 extr.u r26 = r17, 15, 46
1910 ;;
1911 andcm r24 = r29, r26
1912 and r19 = r28, r27
1913 shr.u r25 = r27, 15
1914 ;;
1915 and r23 = r26, r25
1916 ;;
1917 or r22 = r24, r23
1918 ;;
1919 dep.z r20 = r22, 15, 46
1920 ;;
1921 or r16 = r20, r21
1922 ;;
1923 or r8 = r19, r16
1924 // done, update iip/ipsr to next instruction
1925 mov r24=cr.ipsr
1926 mov r25=cr.iip;;
1927 extr.u r26=r24,41,2 ;;
1928 cmp.eq p6,p7=2,r26 ;;
1929 (p6) mov r26=0
1930 (p6) adds r25=16,r25
1931 (p7) adds r26=1,r26
1932 ;;
1933 dep r24=r26,r24,41,2
1934 ;;
1935 mov cr.ipsr=r24
1936 mov cr.iip=r25
1937 mov pr=r31,-1 ;;
1938 rfi
1939 ;;
1940 END(hyper_thash)
1942 ENTRY(hyper_ptc_ga)
1943 #ifndef FAST_PTC_GA
1944 br.spnt.few dispatch_break_fault ;;
1945 #endif
1946 // FIXME: validate not flushing Xen addresses
1947 #ifdef FAST_HYPERPRIVOP_CNT
1948 movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_PTC_GA);;
1949 ld4 r21=[r20];;
1950 adds r21=1,r21;;
1951 st4 [r20]=r21;;
1952 #endif
1953 mov r28=r8
1954 extr.u r19=r9,2,6 // addr_range=1<<((r9&0xfc)>>2)
1955 mov r20=1
1956 shr.u r24=r8,61
1957 addl r27=56,r0 // PAGE_SHIFT<<2 (for ptc.ga)
1958 movl r26=0x8000000000000000 // INVALID_TI_TAG
1959 mov r30=ar.lc
1960 ;;
1961 shl r19=r20,r19
1962 cmp.eq p7,p0=7,r24
1963 (p7) br.spnt.many dispatch_break_fault ;; // slow way for rr7
1964 ;;
1965 cmp.le p7,p0=r19,r0 // skip flush if size<=0
1966 (p7) br.cond.dpnt 2f ;;
1967 extr.u r24=r19,0,PAGE_SHIFT
1968 shr.u r23=r19,PAGE_SHIFT ;; // repeat loop for n pages
1969 cmp.ne p7,p0=r24,r0 ;;
1970 (p7) adds r23=1,r23 ;; // n_pages<size<n_pages+1? extra iter
1971 mov ar.lc=r23
1972 movl r29=PAGE_SIZE;;
1973 1:
1974 thash r25=r28 ;;
1975 adds r25=16,r25 ;;
1976 ld8 r24=[r25] ;;
1977 // FIXME: should check if tag matches, not just blow it away
1978 or r24=r26,r24 ;; // vhpt_entry->ti_tag = 1
1979 st8 [r25]=r24
1980 ptc.ga r28,r27 ;;
1981 srlz.i ;;
1982 add r28=r29,r28
1983 br.cloop.sptk.few 1b
1984 ;;
1985 2:
1986 mov ar.lc=r30 ;;
1987 mov r29=cr.ipsr
1988 mov r30=cr.iip;;
1989 movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
1990 ld8 r27=[r27];;
1991 adds r25=IA64_VCPU_DTLB_OFFSET,r27
1992 adds r26=IA64_VCPU_ITLB_OFFSET,r27;;
1993 ld8 r24=[r25]
1994 ld8 r27=[r26] ;;
1995 and r24=-2,r24
1996 and r27=-2,r27 ;;
1997 st8 [r25]=r24 // set 1-entry i/dtlb as not present
1998 st8 [r26]=r27 ;;
1999 // increment to point to next instruction
2000 extr.u r26=r29,41,2 ;;
2001 cmp.eq p6,p7=2,r26 ;;
2002 (p6) mov r26=0
2003 (p6) adds r30=16,r30
2004 (p7) adds r26=1,r26
2005 ;;
2006 dep r29=r26,r29,41,2
2007 ;;
2008 mov cr.ipsr=r29
2009 mov cr.iip=r30
2010 mov pr=r31,-1 ;;
2011 rfi
2012 ;;
2013 END(hyper_ptc_ga)
2015 // recovery block for hyper_itc metaphysical memory lookup
2016 ENTRY(recover_and_dispatch_break_fault)
2017 #ifdef PERF_COUNTERS
2018 movl r21=perfcounters + RECOVER_TO_BREAK_FAULT_PERFC_OFS;;
2019 ld4 r22=[r21];;
2020 adds r22=1,r22;;
2021 st4 [r21]=r22;;
2022 #endif
2023 mov b0=r29 ;;
2024 br.sptk.many dispatch_break_fault;;
2025 END(recover_and_dispatch_break_fault)
2027 // Registers at entry
2028 // r17 = break immediate (HYPERPRIVOP_ITC_D or I)
2029 // r18 == XSI_PSR_IC_OFS
2030 // r31 == pr
2031 ENTRY(hyper_itc)
2032 hyper_itc_i:
2033 // fall through, hyper_itc_d handles both i and d
2034 hyper_itc_d:
2035 #ifndef FAST_ITC
2036 br.sptk.many dispatch_break_fault ;;
2037 #else
2038 // ensure itir.ps >= xen's pagesize
2039 adds r23=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18 ;;
2040 ld8 r23=[r23];;
2041 extr.u r24=r23,2,6;; // r24==logps
2042 cmp.gt p7,p0=PAGE_SHIFT,r24
2043 (p7) br.spnt.many dispatch_break_fault ;;
2044 adds r21=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;;
2045 ld8 r21=[r21];;
2046 // for now, punt on region0 inserts
2047 extr.u r21=r21,61,3;;
2048 cmp.eq p7,p0=r21,r0
2049 (p7) br.spnt.many dispatch_break_fault ;;
2050 movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
2051 ld8 r27=[r27];;
2052 adds r27=IA64_VCPU_DOMAIN_OFFSET,r27;;
2053 ld8 r27=[r27];;
2054 // FIXME: is the global var dom0 always pinned? assume so for now
2055 movl r28=dom0;;
2056 ld8 r28=[r28];;
2057 // FIXME: for now, only handle dom0 (see lookup_domain_mpa below)
2058 cmp.ne p7,p0=r27,r28
2059 (p7) br.spnt.many dispatch_break_fault ;;
2060 #ifdef FAST_HYPERPRIVOP_CNT
2061 cmp.eq p6,p7=HYPERPRIVOP_ITC_D,r17;;
2062 (p6) movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_ITC_D);;
2063 (p7) movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_ITC_I);;
2064 ld4 r21=[r20];;
2065 adds r21=1,r21;;
2066 st4 [r20]=r21;;
2067 #endif
2068 (p6) mov r17=2;;
2069 (p7) mov r17=3;;
2070 mov r29=b0 ;;
2071 movl r30=recover_and_dispatch_break_fault ;;
2072 mov r16=r8;;
2073 // fall through
2074 #endif
2075 END(hyper_itc)
2077 #if defined(FAST_ITC) || defined (FAST_TLB_MISS_REFLECT)
2079 // fast_insert(PSCB(ifa),r24=ps,r16=pte)
2080 // r16 == pte
2081 // r17 == bit0: 1=inst, 0=data; bit1: 1=itc, 0=vcpu_translate
2082 // r18 == XSI_PSR_IC_OFS
2083 // r24 == ps
2084 // r29 == saved value of b0 in case of recovery
2085 // r30 == recovery ip if failure occurs
2086 // r31 == pr
2087 ENTRY(fast_insert)
2088 // translate_domain_pte(r16=pteval,PSCB(ifa)=address,r24=itir)
2089 mov r19=1;;
2090 shl r20=r19,r24;;
2091 adds r20=-1,r20;; // r20 == mask
2092 movl r19=_PAGE_PPN_MASK;;
2093 and r22=r16,r19;; // r22 == pteval & _PAGE_PPN_MASK
2094 andcm r19=r22,r20;;
2095 adds r21=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;;
2096 ld8 r21=[r21];;
2097 and r20=r21,r20;;
2098 or r19=r19,r20;; // r19 == mpaddr
2099 // FIXME: for now, just do domain0 and skip mpaddr range checks
2100 dep r20=r0,r19,0,PAGE_SHIFT
2101 movl r21=PAGE_PHYS ;;
2102 or r20=r20,r21 ;; // r20==return value from lookup_domain_mpa
2103 // r16=pteval,r20=pteval2
2104 movl r19=_PAGE_PPN_MASK
2105 movl r21=_PAGE_PL_2;;
2106 andcm r25=r16,r19;; // r25==pteval & ~_PAGE_PPN_MASK
2107 and r22=r20,r19;;
2108 or r22=r22,r21;;
2109 or r22=r22,r25;; // r22==return value from translate_domain_pte
2110 // done with translate_domain_pte
2111 // now do vcpu_itc_no_srlz(vcpu,IorD,ifa,r22=pte,r16=mppte,r24=logps)
2112 // FIXME: for now, just domain0 and skip range check
2113 // psr.ic already cleared
2114 // NOTE: r24 still contains ps (from above)
2115 shladd r24=r24,2,r0;;
2116 mov cr.itir=r24;;
2117 adds r23=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;;
2118 ld8 r23=[r23];;
2119 mov cr.ifa=r23;;
2120 tbit.z p6,p7=r17,0;;
2121 (p6) itc.d r22;;
2122 (p7) itc.i r22;;
2123 dv_serialize_data
2124 // vhpt_insert(r23=vaddr,r22=pte,r24=logps<<2)
2125 thash r28=r23
2126 or r26=1,r22;;
2127 ttag r21=r23
2128 adds r25=8,r28
2129 mov r19=r28;;
2130 st8 [r25]=r24
2131 adds r20=16,r28;;
2132 st8 [r19]=r26
2133 st8 [r20]=r21;;
2134 // vcpu_set_tr_entry(trp,r22=pte|1,r24=itir,r23=ifa)
2135 // TR_ENTRY = {page_flags,itir,addr,rid}
2136 tbit.z p6,p7=r17,0;;
2137 movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
2138 ld8 r27=[r27];;
2139 adds r28=IA64_VCPU_STARTING_RID_OFFSET,r27
2140 (p6) adds r27=IA64_VCPU_DTLB_OFFSET,r27
2141 (p7) adds r27=IA64_VCPU_ITLB_OFFSET,r27;;
2142 st8 [r27]=r22,8;; // page_flags: already has pl >= 2 and p==1
2143 st8 [r27]=r24,8;; // itir
2144 mov r19=-4096;;
2145 and r23=r23,r19;;
2146 st8 [r27]=r23,8;; // ifa & ~0xfff
2147 adds r29 = XSI_RR0_OFS-XSI_PSR_IC_OFS,r18
2148 extr.u r25=r23,61,3;;
2149 shladd r29=r25,3,r29;;
2150 ld8 r29=[r29];;
2151 movl r20=0xffffff00;;
2152 and r29=r29,r20;;
2153 st8 [r27]=r29,-8;; // rid
2154 //if ps > 12
2155 cmp.eq p7,p0=12<<2,r24
2156 (p7) br.cond.sptk.many 1f;;
2157 // if (ps > 12) {
2158 // trp->ppn &= ~((1UL<<(ps-12))-1); trp->vadr &= ~((1UL<<ps)-1); }
2159 extr.u r29=r24,2,6
2160 mov r28=1;;
2161 shl r26=r28,r29;;
2162 adds r29=-12,r29;;
2163 shl r25=r28,r29;;
2164 mov r29=-1
2165 adds r26=-1,r26
2166 adds r25=-1,r25;;
2167 andcm r26=r29,r26 // ~((1UL<<ps)-1)
2168 andcm r25=r29,r25;; // ~((1UL<<(ps-12))-1)
2169 ld8 r29=[r27];;
2170 and r29=r29,r26;;
2171 st8 [r27]=r29,-16;;
2172 ld8 r29=[r27];;
2173 extr.u r28=r29,12,38;;
2174 movl r26=0xfffc000000000fff;;
2175 and r29=r29,r26
2176 and r28=r28,r25;;
2177 shl r28=r28,12;;
2178 or r29=r29,r28;;
2179 st8 [r27]=r29;;
2180 1: // done with vcpu_set_tr_entry
2181 //PSCBX(vcpu,i/dtlb_pte) = mp_pte
2182 movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
2183 ld8 r27=[r27];;
2184 tbit.z p6,p7=r17,0;;
2185 (p6) adds r27=IA64_VCPU_DTLB_PTE_OFFSET,r27
2186 (p7) adds r27=IA64_VCPU_ITLB_PTE_OFFSET,r27;;
2187 st8 [r27]=r16;;
2188 // done with vcpu_itc_no_srlz
2190 // if hyper_itc, increment to point to next instruction
2191 tbit.z p7,p0=r17,1
2192 (p7) br.cond.sptk.few no_inc_iip;;
2194 mov r29=cr.ipsr
2195 mov r30=cr.iip;;
2196 extr.u r26=r29,41,2 ;;
2197 cmp.eq p6,p7=2,r26 ;;
2198 (p6) mov r26=0
2199 (p6) adds r30=16,r30
2200 (p7) adds r26=1,r26
2201 ;;
2202 dep r29=r26,r29,41,2
2203 ;;
2204 mov cr.ipsr=r29
2205 mov cr.iip=r30;;
2207 no_inc_iip:
2208 mov pr=r31,-1 ;;
2209 rfi
2210 ;;
2211 END(fast_insert)
2212 #endif