ia64/xen-unstable

view xen/arch/ia64/xen/hyperprivop.S @ 16785:af3550f53874

[IA64] domheap: Don't pin xenheap down. Now it's unnecessary.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
author Alex Williamson <alex.williamson@hp.com>
date Thu Jan 17 12:05:43 2008 -0700 (2008-01-17)
parents fe25c7ec84e8
children 43a87df9a11e
line source
1 /*
2 * arch/ia64/kernel/hyperprivop.S
3 *
4 * Copyright (C) 2005 Hewlett-Packard Co
5 * Dan Magenheimer <dan.magenheimer@hp.com>
6 */
8 #include <linux/config.h>
10 #include <asm/asmmacro.h>
11 #include <asm/kregs.h>
12 #include <asm/offsets.h>
13 #include <asm/processor.h>
14 #include <asm/system.h>
15 #include <asm/debugger.h>
16 #include <asm/asm-xsi-offsets.h>
17 #include <asm/pgtable.h>
18 #include <asm/vmmu.h>
19 #include <public/xen.h>
21 #ifdef PERF_COUNTERS
22 #define PERFC(n) (THIS_CPU(perfcounters) + (IA64_PERFC_ ## n) * 4)
23 #endif
25 #define PAGE_PHYS (__DIRTY_BITS | _PAGE_PL_PRIV | _PAGE_AR_RWX)
27 #if 1 // change to 0 to turn off all fast paths
28 # define FAST_HYPERPRIVOPS
29 # ifdef PERF_COUNTERS
30 # define FAST_HYPERPRIVOP_CNT
31 # define FAST_HYPERPRIVOP_PERFC(N) PERFC(fast_hyperprivop + N)
32 # define FAST_REFLECT_CNT
33 # endif
35 //#define FAST_TICK // mostly working (unat problems) but default off for now
36 //#define FAST_TLB_MISS_REFLECT // mostly working but default off for now
37 # undef FAST_ITC //XXX TODO fast_itc doesn't support dom0 vp yet
38 # define FAST_BREAK
39 # undef FAST_ACCESS_REFLECT //XXX TODO fast_access_reflect
40 // doesn't support dom0 vp yet.
41 # define FAST_RFI
42 // TODO: Since we use callback to deliver interrupt,
43 // FAST_SSM_I needs to be rewritten.
44 # define FAST_SSM_I
45 # define FAST_PTC_GA
46 # undef RFI_TO_INTERRUPT // not working yet
47 # define FAST_SET_RR0_TO_RR4
48 #endif
50 #ifdef CONFIG_SMP
51 //#warning "FIXME: ptc.ga instruction requires spinlock for SMP"
52 #undef FAST_PTC_GA
53 #endif
55 // FIXME: turn off for now... but NaTs may crash Xen so re-enable soon!
56 #define HANDLE_AR_UNAT
58 // FIXME: This is defined in include/asm-ia64/hw_irq.h but this
59 // doesn't appear to be include'able from assembly?
60 #define IA64_TIMER_VECTOR 0xef
62 // Note: not hand-scheduled for now
63 // Registers at entry
64 // r16 == cr.isr
65 // r17 == cr.iim
66 // r18 == XSI_PSR_IC_OFS
67 // r19 == ipsr.cpl
68 // r31 == pr
69 GLOBAL_ENTRY(fast_hyperprivop)
70 // HYPERPRIVOP_SSM_I?
71 // assumes domain interrupts pending, so just do it
72 cmp.eq p7,p6=HYPERPRIVOP_SSM_I,r17
73 (p7) br.sptk.many hyper_ssm_i;;
75 // Check pending event indication
76 adds r20=XSI_PSR_I_ADDR_OFS-XSI_PSR_IC_OFS, r18;;
77 ld8 r20=[r20]
78 ;;
79 ld1 r22=[r20],-1 // evtchn_upcall_mask
80 ;;
81 ld1 r20=[r20] // evtchn_upcall_pending
82 ;;
84 // HYPERPRIVOP_RFI?
85 cmp.eq p7,p6=HYPERPRIVOP_RFI,r17
86 (p7) br.sptk.many hyper_rfi
87 ;;
88 #ifndef FAST_HYPERPRIVOPS // see beginning of file
89 br.sptk.many dispatch_break_fault ;;
90 #endif
91 // if event enabled and there are pending events
92 cmp.ne p7,p0=r20,r0
93 ;;
94 cmp.eq.and p7,p0=r22,r0
95 (p7) br.spnt.many dispatch_break_fault
96 ;;
98 // HYPERPRIVOP_COVER?
99 cmp.eq p7,p0=HYPERPRIVOP_COVER,r17
100 (p7) br.sptk.many hyper_cover
101 ;;
103 // HYPERPRIVOP_SSM_DT?
104 cmp.eq p7,p0=HYPERPRIVOP_SSM_DT,r17
105 (p7) br.sptk.many hyper_ssm_dt
106 ;;
108 // HYPERPRIVOP_RSM_DT?
109 cmp.eq p7,p0=HYPERPRIVOP_RSM_DT,r17
110 (p7) br.sptk.many hyper_rsm_dt
111 ;;
113 // HYPERPRIVOP_SET_ITM?
114 cmp.eq p7,p0=HYPERPRIVOP_SET_ITM,r17
115 (p7) br.sptk.many hyper_set_itm
116 ;;
118 // HYPERPRIVOP_SET_RR0_TO_RR4?
119 cmp.eq p7,p0=HYPERPRIVOP_SET_RR0_TO_RR4,r17
120 (p7) br.sptk.many hyper_set_rr0_to_rr4
121 ;;
123 // HYPERPRIVOP_SET_RR?
124 cmp.eq p7,p0=HYPERPRIVOP_SET_RR,r17
125 (p7) br.sptk.many hyper_set_rr
126 ;;
128 // HYPERPRIVOP_GET_RR?
129 cmp.eq p7,p0=HYPERPRIVOP_GET_RR,r17
130 (p7) br.sptk.many hyper_get_rr
131 ;;
133 // HYPERPRIVOP_GET_PSR?
134 cmp.eq p7,p0=HYPERPRIVOP_GET_PSR,r17
135 (p7) br.sptk.many hyper_get_psr
136 ;;
138 // HYPERPRIVOP_PTC_GA?
139 cmp.eq p7,p0=HYPERPRIVOP_PTC_GA,r17
140 (p7) br.sptk.many hyper_ptc_ga
141 ;;
143 // HYPERPRIVOP_ITC_D?
144 cmp.eq p7,p0=HYPERPRIVOP_ITC_D,r17
145 (p7) br.sptk.many hyper_itc_d
146 ;;
148 // HYPERPRIVOP_ITC_I?
149 cmp.eq p7,p0=HYPERPRIVOP_ITC_I,r17
150 (p7) br.sptk.many hyper_itc_i
151 ;;
153 // HYPERPRIVOP_THASH?
154 cmp.eq p7,p0=HYPERPRIVOP_THASH,r17
155 (p7) br.sptk.many hyper_thash
156 ;;
158 // HYPERPRIVOP_SET_KR?
159 cmp.eq p7,p0=HYPERPRIVOP_SET_KR,r17
160 (p7) br.sptk.many hyper_set_kr
161 ;;
163 // if not one of the above, give up for now and do it the slow way
164 br.sptk.many dispatch_break_fault
165 ;;
166 END(fast_hyperprivop)
168 // give up for now if: ipsr.be==1, ipsr.pp==1
169 // from reflect_interruption, don't need to:
170 // - printk first extint (debug only)
171 // - check for interrupt collection enabled (routine will force on)
172 // - set ifa (not valid for extint)
173 // - set iha (not valid for extint)
174 // - set itir (not valid for extint)
175 // DO need to
176 // - increment the HYPER_SSM_I fast_hyperprivop counter
177 // - set shared_mem iip to instruction after HYPER_SSM_I
178 // - set cr.iip to guest iva+0x3000
179 // - set shared_mem ipsr to [vcpu_get_ipsr_int_state]
180 // be = pp = bn = 0; dt = it = rt = 1; cpl = 3 or 0;
181 // i = shared_mem interrupt_delivery_enabled
182 // ic = shared_mem interrupt_collection_enabled
183 // ri = instruction after HYPER_SSM_I
184 // all other bits unchanged from real cr.ipsr
185 // - set cr.ipsr (DELIVER_PSR_SET/CLEAR, don't forget cpl!)
186 // - set shared_mem isr: isr.ei to instr following HYPER_SSM_I
187 // and isr.ri to cr.isr.ri (all other bits zero)
188 // - cover and set shared_mem precover_ifs to cr.ifs
189 // ^^^ MISSED THIS FOR fast_break??
190 // - set shared_mem interrupt_delivery_enabled to 0
191 // - set shared_mem interrupt_collection_enabled to 0
192 // - set r31 to SHAREDINFO_ADDR
193 // - virtual bank switch 0
194 // maybe implement later
195 // - verify that there really IS a deliverable interrupt pending
196 // - set shared_mem iva
197 // needs to be done but not implemented (in reflect_interruption)
198 // - set shared_mem iipa
199 // don't know for sure
200 // - set shared_mem unat
201 // r16 == cr.isr
202 // r17 == cr.iim
203 // r18 == XSI_PSR_IC
204 // r19 == ipsr.cpl
205 // r31 == pr
206 ENTRY(hyper_ssm_i)
207 #ifndef FAST_SSM_I
208 br.spnt.few dispatch_break_fault ;;
209 #endif
210 // give up for now if: ipsr.be==1, ipsr.pp==1
211 mov r30=cr.ipsr
212 mov r29=cr.iip;;
213 extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
214 cmp.ne p7,p0=r21,r0
215 (p7) br.sptk.many dispatch_break_fault ;;
216 #ifdef FAST_HYPERPRIVOP_CNT
217 movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_SSM_I);;
218 ld4 r21=[r20];;
219 adds r21=1,r21;;
220 st4 [r20]=r21;;
221 #endif
222 // set shared_mem iip to instruction after HYPER_SSM_I
223 extr.u r20=r30,IA64_PSR_RI_BIT,2 ;;
224 cmp.eq p6,p7=2,r20 ;;
225 (p6) mov r20=0
226 (p6) adds r29=16,r29
227 (p7) adds r20=1,r20 ;;
228 dep r30=r20,r30,IA64_PSR_RI_BIT,2 // adjust cr.ipsr.ri but don't save yet
229 adds r21=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;;
230 st8 [r21]=r29 ;;
231 // set shared_mem isr
232 extr.u r16=r16,IA64_ISR_IR_BIT,1;; // grab cr.isr.ir bit
233 dep r16=r16,r0,IA64_ISR_IR_BIT,1;; // insert into cr.isr (rest of bits zero)
234 dep r16=r20,r16,IA64_PSR_RI_BIT,2 // deposit cr.isr.ri
235 adds r21=XSI_ISR_OFS-XSI_PSR_IC_OFS,r18 ;;
236 st8 [r21]=r16
237 // set cr.ipsr
238 mov r29=r30
239 movl r28=DELIVER_PSR_SET
240 movl r27=~(DELIVER_PSR_CLR & (~IA64_PSR_CPL));;
241 and r29=r29,r27;;
242 or r29=r29,r28;;
243 // set hpsr_dfh to ipsr
244 adds r28=XSI_HPSR_DFH_OFS-XSI_PSR_IC_OFS,r18;;
245 ld1 r28=[r28];;
246 dep r29=r28,r29,IA64_PSR_DFH_BIT,1;;
247 mov cr.ipsr=r29;;
248 // set shared_mem ipsr (from ipsr in r30 with ipsr.ri already set)
249 extr.u r29=r30,IA64_PSR_CPL0_BIT,2;;
250 cmp.eq p7,p0=CONFIG_CPL0_EMUL,r29;;
251 (p7) dep r30=0,r30,IA64_PSR_CPL0_BIT,2
252 // FOR SSM_I ONLY, also turn on psr.i and psr.ic
253 movl r28=(IA64_PSR_DT|IA64_PSR_IT|IA64_PSR_RT|IA64_PSR_I|IA64_PSR_IC)
254 // movl r27=~(IA64_PSR_BE|IA64_PSR_PP|IA64_PSR_BN);;
255 movl r27=~IA64_PSR_BN;;
256 or r30=r30,r28;;
257 and r30=r30,r27;;
258 mov r20=1
259 movl r22=THIS_CPU(current_psr_i_addr)
260 adds r21=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;;
261 ld8 r22=[r22]
262 adds r27=XSI_VPSR_DFH_OFS-XSI_PSR_IC_OFS,r18;;
263 ld1 r28=[r27];;
264 st1 [r27]=r0
265 dep r30=r28,r30,IA64_PSR_DFH_BIT,1
266 ;;
267 st8 [r21]=r30;;
268 // set shared_mem interrupt_delivery_enabled to 0
269 // set shared_mem interrupt_collection_enabled to 0
270 st1 [r22]=r20
271 st4 [r18]=r0
272 // cover and set shared_mem precover_ifs to cr.ifs
273 // set shared_mem ifs to 0
274 cover ;;
275 mov r20=cr.ifs
276 adds r21=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
277 st8 [r21]=r0 ;;
278 adds r21=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
279 st8 [r21]=r20 ;;
280 // leave cr.ifs alone for later rfi
281 // set iip to go to event callback handler
282 movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
283 ld8 r22=[r22];;
284 adds r22=IA64_VCPU_EVENT_CALLBACK_IP_OFFSET,r22;;
285 ld8 r24=[r22];;
286 mov cr.iip=r24;;
287 // OK, now all set to go except for switch to virtual bank0
288 mov r30=r2
289 mov r29=r3
290 ;;
291 adds r2=XSI_BANK1_R16_OFS-XSI_PSR_IC_OFS,r18
292 adds r3=(XSI_BANK1_R16_OFS+8)-XSI_PSR_IC_OFS,r18
293 // temporarily save ar.unat
294 mov r28=ar.unat
295 bsw.1;;
296 // FIXME?: ar.unat is not really handled correctly,
297 // but may not matter if the OS is NaT-clean
298 .mem.offset 0,0; st8.spill [r2]=r16,16
299 .mem.offset 8,0; st8.spill [r3]=r17,16 ;;
300 .mem.offset 0,0; st8.spill [r2]=r18,16
301 .mem.offset 8,0; st8.spill [r3]=r19,16 ;;
302 .mem.offset 0,0; st8.spill [r2]=r20,16
303 .mem.offset 8,0; st8.spill [r3]=r21,16 ;;
304 .mem.offset 0,0; st8.spill [r2]=r22,16
305 .mem.offset 8,0; st8.spill [r3]=r23,16 ;;
306 .mem.offset 0,0; st8.spill [r2]=r24,16
307 .mem.offset 8,0; st8.spill [r3]=r25,16 ;;
308 .mem.offset 0,0; st8.spill [r2]=r26,16
309 .mem.offset 8,0; st8.spill [r3]=r27,16 ;;
310 .mem.offset 0,0; st8.spill [r2]=r28,16
311 .mem.offset 8,0; st8.spill [r3]=r29,16 ;;
312 .mem.offset 0,0; st8.spill [r2]=r30,16
313 .mem.offset 8,0; st8.spill [r3]=r31,16 ;;
314 bsw.0 ;;
315 mov r27=ar.unat
316 adds r26=XSI_B1NATS_OFS-XSI_PSR_IC_OFS,r18 ;;
317 //save bank1 ar.unat
318 st8 [r26]=r27
319 //restore ar.unat
320 mov ar.unat=r28
321 mov r2=r30
322 mov r3=r29
323 adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
324 st4 [r20]=r0
325 mov pr=r31,-1 ;;
326 rfi
327 ;;
328 END(hyper_ssm_i)
330 // reflect domain clock interrupt
331 // r31 == pr
332 // r30 == cr.ivr
333 // r29 == rp
334 GLOBAL_ENTRY(fast_tick_reflect)
335 #ifndef FAST_TICK // see beginning of file
336 br.cond.sptk.many rp;;
337 #endif
338 mov r28=IA64_TIMER_VECTOR;;
339 cmp.ne p6,p0=r28,r30
340 (p6) br.cond.spnt.few rp;;
341 movl r20=THIS_CPU(cpu_info)+IA64_CPUINFO_ITM_NEXT_OFFSET;;
342 ld8 r26=[r20]
343 mov r27=ar.itc;;
344 adds r27=200,r27;; // safety margin
345 cmp.ltu p6,p0=r26,r27
346 (p6) br.cond.spnt.few rp;;
347 mov r17=cr.ipsr;;
348 // slow path if: ipsr.pp==1
349 extr.u r21=r17,IA64_PSR_PP_BIT,1 ;;
350 cmp.ne p6,p0=r21,r0
351 (p6) br.cond.spnt.few rp;;
352 // definitely have a domain tick
353 mov cr.eoi=r0
354 mov rp=r29
355 mov cr.itm=r26 // ensure next tick
356 #ifdef FAST_REFLECT_CNT
357 movl r20=PERFC(fast_reflect + (0x3000>>8));;
358 ld4 r21=[r20];;
359 adds r21=1,r21;;
360 st4 [r20]=r21;;
361 #endif
362 // vcpu_pend_timer(current)
363 movl r18=THIS_CPU(current_psr_ic_addr)
364 ;;
365 ld8 r18=[r18]
366 ;;
367 adds r20=XSI_ITV_OFS-XSI_PSR_IC_OFS,r18 ;;
368 ld8 r20=[r20];;
369 cmp.eq p6,p0=r20,r0 // if cr.itv==0 done
370 (p6) br.cond.spnt.few fast_tick_reflect_done;;
371 tbit.nz p6,p0=r20,16;; // check itv.m (discard) bit
372 (p6) br.cond.spnt.few fast_tick_reflect_done;;
373 extr.u r27=r20,0,6 // r27 has low 6 bits of itv.vector
374 extr.u r26=r20,6,2 // r26 has irr index of itv.vector
375 movl r19=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
376 ld8 r19=[r19];;
377 adds r22=IA64_VCPU_DOMAIN_ITM_LAST_OFFSET,r19
378 adds r23=IA64_VCPU_DOMAIN_ITM_OFFSET,r19;;
379 ld8 r24=[r22]
380 ld8 r23=[r23];;
381 cmp.eq p6,p0=r23,r24 // skip if this tick already delivered
382 (p6) br.cond.spnt.few fast_tick_reflect_done;;
383 // set irr bit
384 adds r21=IA64_VCPU_IRR0_OFFSET,r19
385 shl r26=r26,3;;
386 add r21=r21,r26
387 mov r25=1;;
388 shl r22=r25,r27
389 ld8 r23=[r21];;
390 or r22=r22,r23;;
391 st8 [r21]=r22
392 // set evtchn_upcall_pending!
393 adds r20=XSI_PSR_I_ADDR_OFS-XSI_PSR_IC_OFS,r18;;
394 ld8 r20=[r20];;
395 adds r20=-1,r20;; // evtchn_upcall_pending
396 st1 [r20]=r25
397 // if interrupted at pl0, we're done
398 extr.u r16=r17,IA64_PSR_CPL0_BIT,2;;
399 cmp.eq p6,p0=r16,r0;;
400 (p6) br.cond.spnt.few fast_tick_reflect_done;;
401 // if guest vpsr.i is off, we're done
402 movl r21=THIS_CPU(current_psr_i_addr);;
403 ld8 r21=[r21];;
404 ld1 r21=[r21];;
405 cmp.eq p0,p6=r21,r0
406 (p6) br.cond.spnt.few fast_tick_reflect_done;;
408 // OK, we have a clock tick to deliver to the active domain!
409 // so deliver to iva+0x3000
410 // r17 == cr.ipsr
411 // r18 == XSI_PSR_IC
412 // r19 == IA64_KR(CURRENT)
413 // r31 == pr
414 mov r16=cr.isr
415 mov r29=cr.iip
416 adds r21=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;;
417 st8 [r21]=r29
418 // set shared_mem isr
419 extr.u r16=r16,IA64_ISR_IR_BIT,1;; // grab cr.isr.ir bit
420 dep r16=r16,r0,IA64_ISR_IR_BIT,1 // insert into cr.isr (rest of bits zero)
421 extr.u r20=r17,IA64_PSR_RI_BIT,2;; // get ipsr.ri
422 dep r16=r20,r16,IA64_PSR_RI_BIT,2 // deposit cr.isr.ei
423 adds r21=XSI_ISR_OFS-XSI_PSR_IC_OFS,r18;;
424 st8 [r21]=r16
425 // set cr.ipsr (make sure cpl==2!)
426 mov r29=r17
427 movl r28=DELIVER_PSR_SET | (CONFIG_CPL0_EMUL << IA64_PSR_CPL0_BIT)
428 movl r27=~DELIVER_PSR_CLR;;
429 and r29=r29,r27;;
430 or r29=r29,r28;;
431 mov cr.ipsr=r29;;
432 // set shared_mem ipsr (from ipsr in r17 with ipsr.ri already set)
433 extr.u r29=r17,IA64_PSR_CPL0_BIT,2;;
434 cmp.eq p7,p0=CONFIG_CPL0_EMUL,r29;;
435 (p7) dep r17=0,r17,IA64_PSR_CPL0_BIT,2
436 movl r28=(IA64_PSR_DT|IA64_PSR_IT|IA64_PSR_RT)
437 movl r27=~(IA64_PSR_PP|IA64_PSR_BN|IA64_PSR_I|IA64_PSR_IC);;
438 or r17=r17,r28;;
439 and r17=r17,r27
440 ld4 r16=[r18];;
441 cmp.ne p6,p0=r16,r0
442 movl r22=THIS_CPU(current_psr_i_addr);;
443 ld8 r22=[r22]
444 (p6) dep r17=-1,r17,IA64_PSR_IC_BIT,1 ;;
445 ld1 r16=[r22];;
446 cmp.eq p6,p0=r16,r0;;
447 (p6) dep r17=-1,r17,IA64_PSR_I_BIT,1
448 mov r20=1
449 adds r21=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18;;
450 st8 [r21]=r17
451 // set shared_mem interrupt_delivery_enabled to 0
452 // set shared_mem interrupt_collection_enabled to 0
453 st1 [r22]=r20
454 st4 [r18]=r0;;
455 // cover and set shared_mem precover_ifs to cr.ifs
456 // set shared_mem ifs to 0
457 cover ;;
458 mov r20=cr.ifs
459 adds r21=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
460 st8 [r21]=r0 ;;
461 adds r21=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
462 st8 [r21]=r20
463 // leave cr.ifs alone for later rfi
464 // set iip to go to domain IVA break instruction vector
465 adds r22=IA64_VCPU_IVA_OFFSET,r19;;
466 ld8 r23=[r22]
467 movl r24=0x3000;;
468 add r24=r24,r23;;
469 mov cr.iip=r24
470 // OK, now all set to go except for switch to virtual bank0
471 mov r30=r2
472 mov r29=r3
473 #ifdef HANDLE_AR_UNAT
474 mov r28=ar.unat
475 #endif
476 ;;
477 adds r2=XSI_BANK1_R16_OFS-XSI_PSR_IC_OFS,r18
478 adds r3=(XSI_BANK1_R16_OFS+8)-XSI_PSR_IC_OFS,r18
479 ;;
480 bsw.1;;
481 .mem.offset 0,0; st8.spill [r2]=r16,16
482 .mem.offset 8,0; st8.spill [r3]=r17,16 ;;
483 .mem.offset 0,0; st8.spill [r2]=r18,16
484 .mem.offset 8,0; st8.spill [r3]=r19,16 ;;
485 .mem.offset 0,0; st8.spill [r2]=r20,16
486 .mem.offset 8,0; st8.spill [r3]=r21,16 ;;
487 .mem.offset 0,0; st8.spill [r2]=r22,16
488 .mem.offset 8,0; st8.spill [r3]=r23,16 ;;
489 .mem.offset 0,0; st8.spill [r2]=r24,16
490 .mem.offset 8,0; st8.spill [r3]=r25,16 ;;
491 .mem.offset 0,0; st8.spill [r2]=r26,16
492 .mem.offset 8,0; st8.spill [r3]=r27,16 ;;
493 .mem.offset 0,0; st8.spill [r2]=r28,16
494 .mem.offset 8,0; st8.spill [r3]=r29,16 ;;
495 .mem.offset 0,0; st8.spill [r2]=r30,16
496 .mem.offset 8,0; st8.spill [r3]=r31,16 ;;
497 #ifdef HANDLE_AR_UNAT
498 // r16~r23 are preserved regsin bank0 regs, we need to restore them,
499 // r24~r31 are scratch regs, we don't need to handle NaT bit,
500 // because OS handler must assign it before access it
501 ld8 r16=[r2],16
502 ld8 r17=[r3],16;;
503 ld8 r18=[r2],16
504 ld8 r19=[r3],16;;
505 ld8 r20=[r2],16
506 ld8 r21=[r3],16;;
507 ld8 r22=[r2],16
508 ld8 r23=[r3],16;;
509 #endif
510 ;;
511 bsw.0 ;;
512 mov r24=ar.unat
513 mov r2=r30
514 mov r3=r29
515 #ifdef HANDLE_AR_UNAT
516 mov ar.unat=r28
517 #endif
518 ;;
519 adds r25=XSI_B1NATS_OFS-XSI_PSR_IC_OFS,r18
520 adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
521 st8 [r25]=r24
522 st4 [r20]=r0
523 fast_tick_reflect_done:
524 mov pr=r31,-1 ;;
525 rfi
526 END(fast_tick_reflect)
528 // reflect domain breaks directly to domain
529 // r16 == cr.isr
530 // r17 == cr.iim
531 // r18 == XSI_PSR_IC
532 // r19 == ipsr.cpl
533 // r31 == pr
534 GLOBAL_ENTRY(fast_break_reflect)
535 #ifndef FAST_BREAK // see beginning of file
536 br.sptk.many dispatch_break_fault ;;
537 #endif
538 mov r30=cr.ipsr
539 mov r29=cr.iip;;
540 extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
541 cmp.ne p7,p0=r21,r0
542 (p7) br.spnt.few dispatch_break_fault ;;
543 movl r20=IA64_PSR_CPL ;;
544 and r22=r20,r30 ;;
545 cmp.ne p7,p0=r22,r0
546 (p7) br.spnt.many 1f ;;
547 cmp.eq p7,p0=r17,r0
548 (p7) br.spnt.few dispatch_break_fault ;;
549 #ifdef CRASH_DEBUG
550 movl r21=CDB_BREAK_NUM ;;
551 cmp.eq p7,p0=r17,r21
552 (p7) br.spnt.few dispatch_break_fault ;;
553 #endif
554 1:
555 #if 1 /* special handling in case running on simulator */
556 movl r20=first_break;;
557 ld4 r23=[r20]
558 movl r21=0x80001
559 movl r22=0x80002;;
560 cmp.ne p7,p0=r23,r0
561 (p7) br.spnt.few dispatch_break_fault ;;
562 cmp.eq p7,p0=r21,r17
563 (p7) br.spnt.few dispatch_break_fault ;;
564 cmp.eq p7,p0=r22,r17
565 (p7) br.spnt.few dispatch_break_fault ;;
566 #endif
567 movl r20=0x2c00
568 // save iim in shared_info
569 adds r21=XSI_IIM_OFS-XSI_PSR_IC_OFS,r18 ;;
570 st8 [r21]=r17;;
571 // fall through
572 END(fast_break_reflect)
574 // reflect to domain ivt+r20
575 // sets up isr,iip,ipsr,ifs (FIXME: do iipa too)
576 // r16 == cr.isr
577 // r18 == XSI_PSR_IC
578 // r20 == offset into ivt
579 // r29 == iip
580 // r30 == ipsr
581 // r31 == pr
582 ENTRY(fast_reflect)
583 #ifdef FAST_REFLECT_CNT
584 movl r22=PERFC(fast_reflect)
585 shr r23=r20,8-2;;
586 add r22=r22,r23;;
587 ld4 r21=[r22];;
588 adds r21=1,r21;;
589 st4 [r22]=r21;;
590 #endif
591 // save iip in shared_info (DON'T POINT TO NEXT INSTRUCTION!)
592 adds r21=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;;
593 st8 [r21]=r29,XSI_ISR_OFS-XSI_IIP_OFS;;
594 // set shared_mem isr
595 st8 [r21]=r16 ;;
596 // set cr.ipsr
597 movl r21=THIS_CPU(current_psr_i_addr)
598 mov r29=r30 ;;
599 ld8 r21=[r21]
600 movl r28=DELIVER_PSR_SET | (CONFIG_CPL0_EMUL << IA64_PSR_CPL0_BIT)
601 movl r27=~DELIVER_PSR_CLR;;
602 and r29=r29,r27;;
603 or r29=r29,r28;;
604 // set hpsr_dfh to ipsr
605 adds r28=XSI_HPSR_DFH_OFS-XSI_PSR_IC_OFS,r18;;
606 ld1 r28=[r28];;
607 dep r29=r28,r29,IA64_PSR_DFH_BIT,1;;
608 mov cr.ipsr=r29;;
609 // set shared_mem ipsr (from ipsr in r30 with ipsr.ri already set)
610 extr.u r29=r30,IA64_PSR_CPL0_BIT,2;;
611 cmp.eq p7,p0=CONFIG_CPL0_EMUL,r29;;
612 (p7) dep r30=0,r30,IA64_PSR_CPL0_BIT,2
613 movl r28=(IA64_PSR_DT|IA64_PSR_IT|IA64_PSR_RT)
614 movl r27=~(IA64_PSR_PP|IA64_PSR_BN);;
615 or r30=r30,r28;;
616 and r30=r30,r27
617 // also set shared_mem ipsr.i and ipsr.ic appropriately
618 ld1 r22=[r21]
619 ld4 r24=[r18];;
620 cmp4.eq p6,p7=r24,r0;;
621 (p6) dep r30=0,r30,IA64_PSR_IC_BIT,1
622 (p7) dep r30=-1,r30,IA64_PSR_IC_BIT,1
623 mov r24=r21
624 cmp.ne p6,p7=r22,r0;;
625 (p6) dep r30=0,r30,IA64_PSR_I_BIT,1
626 (p7) dep r30=-1,r30,IA64_PSR_I_BIT,1
627 mov r22=1
628 adds r21=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18
629 adds r27=XSI_VPSR_DFH_OFS-XSI_PSR_IC_OFS,r18;;
630 ld1 r28=[r27];;
631 st1 [r27]=r0
632 dep r30=r28,r30,IA64_PSR_DFH_BIT,1
633 ;;
634 st8 [r21]=r30
635 // set shared_mem interrupt_delivery_enabled to 0
636 // set shared_mem interrupt_collection_enabled to 0
637 st1 [r24]=r22
638 st4 [r18]=r0;;
639 // cover and set shared_mem precover_ifs to cr.ifs
640 // set shared_mem ifs to 0
641 cover ;;
642 mov r24=cr.ifs
643 adds r21=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
644 st8 [r21]=r0 ;;
645 adds r21=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
646 st8 [r21]=r24
647 // FIXME: need to save iipa and isr to be arch-compliant
648 // set iip to go to domain IVA break instruction vector
649 movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
650 ld8 r22=[r22];;
651 adds r22=IA64_VCPU_IVA_OFFSET,r22;;
652 ld8 r23=[r22];;
653 add r20=r20,r23;;
654 mov cr.iip=r20
655 // OK, now all set to go except for switch to virtual bank0
656 mov r30=r2
657 mov r29=r3
658 #ifdef HANDLE_AR_UNAT
659 mov r28=ar.unat
660 #endif
661 ;;
662 adds r2=XSI_BANK1_R16_OFS-XSI_PSR_IC_OFS,r18
663 adds r3=(XSI_BANK1_R16_OFS+8)-XSI_PSR_IC_OFS,r18
664 ;;
665 bsw.1;;
666 .mem.offset 0,0; st8.spill [r2]=r16,16
667 .mem.offset 8,0; st8.spill [r3]=r17,16 ;;
668 .mem.offset 0,0; st8.spill [r2]=r18,16
669 .mem.offset 8,0; st8.spill [r3]=r19,16 ;;
670 .mem.offset 0,0; st8.spill [r2]=r20,16
671 .mem.offset 8,0; st8.spill [r3]=r21,16 ;;
672 .mem.offset 0,0; st8.spill [r2]=r22,16
673 .mem.offset 8,0; st8.spill [r3]=r23,16 ;;
674 .mem.offset 0,0; st8.spill [r2]=r24,16
675 .mem.offset 8,0; st8.spill [r3]=r25,16 ;;
676 .mem.offset 0,0; st8.spill [r2]=r26,16
677 .mem.offset 8,0; st8.spill [r3]=r27,16 ;;
678 .mem.offset 0,0; st8.spill [r2]=r28,16
679 .mem.offset 8,0; st8.spill [r3]=r29,16 ;;
680 .mem.offset 0,0; st8.spill [r2]=r30,16
681 .mem.offset 8,0; st8.spill [r3]=r31,16 ;;
682 #ifdef HANDLE_AR_UNAT
683 // r16~r23 are preserved regs in bank0 regs, we need to restore them,
684 // r24~r31 are scratch regs, we don't need to handle NaT bit,
685 // because OS handler must assign it before access it
686 ld8 r16=[r2],16
687 ld8 r17=[r3],16;;
688 ld8 r18=[r2],16
689 ld8 r19=[r3],16;;
690 ld8 r20=[r2],16
691 ld8 r21=[r3],16;;
692 ld8 r22=[r2],16
693 ld8 r23=[r3],16;;
694 #endif
695 ;;
696 bsw.0 ;;
697 mov r24=ar.unat
698 mov r2=r30
699 mov r3=r29
700 #ifdef HANDLE_AR_UNAT
701 mov ar.unat=r28
702 #endif
703 ;;
704 adds r25=XSI_B1NATS_OFS-XSI_PSR_IC_OFS,r18
705 adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
706 st8 [r25]=r24
707 st4 [r20]=r0
708 mov pr=r31,-1 ;;
709 rfi
710 ;;
711 END(fast_reflect)
713 // reflect access faults (0x2400,0x2800,0x5300) directly to domain
714 // r16 == isr
715 // r17 == ifa
716 // r19 == reflect number (only pass-thru to dispatch_reflection)
717 // r20 == offset into ivt
718 // r31 == pr
719 GLOBAL_ENTRY(fast_access_reflect)
720 #ifndef FAST_ACCESS_REFLECT // see beginning of file
721 br.spnt.few dispatch_reflection ;;
722 #endif
723 mov r30=cr.ipsr
724 mov r29=cr.iip;;
725 extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
726 cmp.ne p7,p0=r21,r0
727 (p7) br.spnt.few dispatch_reflection ;;
728 extr.u r21=r30,IA64_PSR_CPL0_BIT,2 ;;
729 cmp.eq p7,p0=r21,r0
730 (p7) br.spnt.few dispatch_reflection ;;
731 movl r18=THIS_CPU(current_psr_ic_addr);;
732 ld8 r18=[r18];;
733 ld4 r21=[r18];;
734 cmp.eq p7,p0=r0,r21
735 (p7) br.spnt.few dispatch_reflection ;;
736 // set shared_mem ifa, FIXME: should we validate it?
737 mov r17=cr.ifa
738 adds r21=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;;
739 st8 [r21]=r17 ;;
740 // get rr[ifa] and save to itir in shared memory (extra bits ignored)
741 shr.u r22=r17,61
742 adds r23=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18
743 adds r21=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18 ;;
744 shladd r22=r22,3,r21;;
745 ld8 r22=[r22];;
746 and r22=~3,r22;;
747 st8 [r23]=r22;;
748 br.cond.sptk.many fast_reflect;;
749 END(fast_access_reflect)
751 // when we get to here, VHPT_CCHAIN_LOOKUP has failed and everything
752 // is as it was at the time of original miss. We want to preserve that
753 // so if we get a nested fault, we can just branch to page_fault
754 GLOBAL_ENTRY(fast_tlb_miss_reflect)
755 #ifndef FAST_TLB_MISS_REFLECT // see beginning of file
756 br.spnt.few page_fault ;;
757 #else
758 mov r31=pr
759 mov r30=cr.ipsr
760 mov r29=cr.iip
761 mov r16=cr.isr
762 mov r17=cr.ifa;;
763 // for now, always take slow path for region 0 (e.g. metaphys mode)
764 extr.u r21=r17,61,3;;
765 cmp.eq p7,p0=r0,r21
766 (p7) br.spnt.few page_fault ;;
767 // always take slow path for PL0 (e.g. __copy_from_user)
768 extr.u r21=r30,IA64_PSR_CPL0_BIT,2 ;;
769 cmp.eq p7,p0=r21,r0
770 (p7) br.spnt.few page_fault ;;
771 // slow path if strange ipsr or isr bits set
772 extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
773 cmp.ne p7,p0=r21,r0
774 (p7) br.spnt.few page_fault ;;
775 movl r21=IA64_ISR_IR|IA64_ISR_SP|IA64_ISR_NA ;;
776 and r21=r16,r21;;
777 cmp.ne p7,p0=r0,r21
778 (p7) br.spnt.few page_fault ;;
779 // also take slow path if virtual psr.ic=0
780 movl r18=XSI_PSR_IC;;
781 ld4 r21=[r18];;
782 cmp.eq p7,p0=r0,r21
783 (p7) br.spnt.few page_fault ;;
784 // OK, if we get to here, we are doing a fast vcpu_translate. Need to:
785 // 1) look in the virtual TR's (pinned), if not there
786 // 2) look in the 1-entry TLB (pinned), if not there
787 // 3) check the domain VHPT (NOT pinned, accesses domain memory!)
788 // If we find it in any of these places, we need to effectively do
789 // a hyper_itc_i/d
791 // short-term hack for now, if in region 5-7, take slow path
792 // since all Linux TRs are in region 5 or 7, we need not check TRs
793 extr.u r21=r17,61,3;;
794 cmp.le p7,p0=5,r21
795 (p7) br.spnt.few page_fault ;;
796 fast_tlb_no_tr_match:
797 movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
798 ld8 r27=[r27]
799 tbit.nz p6,p7=r16,IA64_ISR_X_BIT;;
800 (p6) adds r25=IA64_VCPU_ITLB_OFFSET,r27
801 (p7) adds r25=IA64_VCPU_DTLB_OFFSET,r27;;
802 ld8 r20=[r25],8;;
803 tbit.z p7,p0=r20,VTLB_PTE_P_BIT // present?
804 (p7) br.cond.spnt.few 1f;;
805 // if ifa is in range of tlb, don't bother to check rid, go slow path
806 ld8 r21=[r25],8;;
807 mov r23=1
808 extr.u r21=r21,IA64_ITIR_PS,IA64_ITIR_PS_LEN;;
809 shl r22=r23,r21
810 ld8 r21=[r25],8;;
811 cmp.ltu p7,p0=r17,r21
812 (p7) br.cond.sptk.many 1f;
813 add r21=r22,r21;;
814 cmp.ltu p7,p0=r17,r21
815 (p7) br.cond.spnt.few page_fault;;
817 1: // check the guest VHPT
818 adds r19 = XSI_PTA_OFS-XSI_PSR_IC_OFS, r18;;
819 ld8 r19=[r19]
820 // if (!rr.ve || !(pta & IA64_PTA_VE)) take slow way for now
821 // FIXME: later, we deliver an alt_d/i vector after thash and itir
822 extr.u r25=r17,61,3
823 adds r21=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18 ;;
824 shl r25=r25,3;;
825 add r21=r21,r25;;
826 ld8 r22=[r21];;
827 tbit.z p7,p0=r22,0
828 (p7) br.cond.spnt.few page_fault;;
829 tbit.z p7,p0=r19,IA64_PTA_VE_BIT
830 (p7) br.cond.spnt.few page_fault;;
831 tbit.nz p7,p0=r19,IA64_PTA_VF_BIT // long format VHPT
832 (p7) br.cond.spnt.few page_fault;;
834 // compute and save away itir (r22 & RR_PS_MASK)
835 movl r21=IA64_ITIR_PS_MASK;;
836 and r22=r22,r21;;
837 adds r21=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18 ;;
838 st8 [r21]=r22;;
840 // save away ifa
841 adds r21=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;;
842 st8 [r21]=r17;;
843 // see vcpu_thash to save away iha
844 shr.u r20 = r17, 61
845 addl r25 = 1, r0
846 movl r30 = 0xe000000000000000
847 ;;
848 and r21 = r30, r17 // VHPT_Addr1
849 ;;
850 shladd r28 = r20, 3, r18
851 adds r19 = XSI_PTA_OFS-XSI_PSR_IC_OFS, r18
852 ;;
853 adds r27 = XSI_RR0_OFS-XSI_PSR_IC_OFS, r28
854 addl r28 = 32767, r0
855 ld8 r24 = [r19] // pta
856 ;;
857 ld8 r23 = [r27] // rrs[vadr>>61]
858 extr.u r26 = r24, IA64_PTA_SIZE_BIT, IA64_PTA_SIZE_LEN
859 ;;
860 extr.u r22 = r23, IA64_RR_PS, IA64_RR_PS_LEN
861 shl r30 = r25, r26 // pt size
862 ;;
863 shr.u r19 = r17, r22 // ifa pg number
864 shr.u r29 = r24, IA64_PTA_BASE_BIT
865 adds r30 = -1, r30 // pt size mask
866 ;;
867 shladd r27 = r19, 3, r0 // vhpt offset
868 extr.u r26 = r30, 15, 46
869 ;;
870 andcm r24 = r29, r26
871 and r19 = r28, r27
872 shr.u r25 = r27, 15
873 ;;
874 and r23 = r26, r25
875 ;;
876 or r22 = r24, r23
877 ;;
878 dep.z r20 = r22, 15, 46
879 ;;
880 or r30 = r20, r21
881 ;;
882 //or r8 = r19, r30
883 or r19 = r19, r30
884 ;;
885 adds r23=XSI_IHA_OFS-XSI_PSR_IC_OFS,r18 ;;
886 st8 [r23]=r19
887 // done with thash, check guest VHPT
889 adds r20 = XSI_PTA_OFS-XSI_PSR_IC_OFS, r18;;
890 ld8 r24 = [r20];; // pta
891 // avoid recursively walking the VHPT
892 // if (((r17=address ^ r24=pta) & ((itir_mask(pta) << 3) >> 3)) != 0) {
893 mov r20=-8
894 xor r21=r17,r24
895 extr.u r24=r24,IA64_PTA_SIZE_BIT,IA64_PTA_SIZE_LEN;;
896 shl r20=r20,r24;;
897 shr.u r20=r20,3;;
898 and r21=r20,r21;;
899 cmp.eq p7,p0=r21,r0
900 (p7) br.cond.spnt.few 1f;;
901 // __copy_from_user(&pte, r19=(void *)(*iha), sizeof(pte)=8)
902 // prepare for possible nested dtlb fault
903 mov r29=b0
904 movl r30=guest_vhpt_miss
905 // now go fetch the entry from the guest VHPT
906 ld8 r20=[r19];;
907 // if we wind up here, we successfully loaded the VHPT entry
909 // this VHPT walker aborts on non-present pages instead
910 // of inserting a not-present translation, this allows
911 // vectoring directly to the miss handler
912 tbit.z p7,p0=r20,0
913 (p7) br.cond.spnt.few page_not_present;;
915 #ifdef FAST_REFLECT_CNT
916 movl r21=PERFC(fast_vhpt_translate);;
917 ld4 r22=[r21];;
918 adds r22=1,r22;;
919 st4 [r21]=r22;;
920 #endif
922 // prepare for fast_insert(PSCB(ifa),PSCB(itir),r16=pte)
923 // r16 == pte
924 // r17 == bit0: 1=inst, 0=data; bit1: 1=itc, 0=vcpu_translate
925 // r18 == XSI_PSR_IC_OFS
926 // r24 == ps
927 // r29 == saved value of b0 in case of recovery
928 // r30 == recovery ip if failure occurs
929 // r31 == pr
930 tbit.nz p6,p7=r16,IA64_ISR_X_BIT;;
931 (p6) mov r17=1
932 (p7) mov r17=0
933 mov r16=r20
934 mov r29=b0
935 movl r30=recover_and_page_fault
936 adds r21=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18 ;;
937 ld8 r24=[r21];;
938 extr.u r24=r24,IA64_ITIR_PS,IA64_ITIR_PS_LEN
939 // IFA already in PSCB
940 br.cond.sptk.many fast_insert;;
941 END(fast_tlb_miss_reflect)
943 // we get here if fast_insert fails (e.g. due to metaphysical lookup)
944 ENTRY(recover_and_page_fault)
945 #ifdef PERF_COUNTERS
946 movl r21=PERFC(recover_to_page_fault);;
947 ld4 r22=[r21];;
948 adds r22=1,r22;;
949 st4 [r21]=r22;;
950 #endif
951 mov b0=r29
952 br.cond.sptk.many page_fault;;
954 // if we wind up here, we missed in guest VHPT so recover
955 // from nested dtlb fault and reflect a tlb fault to the guest
956 guest_vhpt_miss:
957 mov b0=r29
958 // fault = IA64_VHPT_FAULT
959 mov r20=r0
960 br.cond.sptk.many 1f;
962 // if we get to here, we are ready to reflect
963 // need to set up virtual ifa, iha, itir (fast_reflect handles
964 // virtual isr, iip, ipsr, ifs
965 // see vcpu_get_itir_on_fault: get ps,rid,(FIXME key) from rr[ifa]
966 page_not_present:
967 tbit.nz p6,p7=r16,IA64_ISR_X_BIT;;
968 (p6) movl r20=0x400
969 (p7) movl r20=0x800
971 1: extr.u r25=r17,61,3;;
972 adds r21=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18
973 shl r25=r25,3;;
974 add r21=r21,r25;;
975 ld8 r22=[r21];;
976 extr.u r22=r22,IA64_RR_PS,IA64_RR_PS_LEN+IA64_RR_RID_LEN;;
977 dep.z r22=r22,IA64_RR_PS,IA64_RR_PS_LEN+IA64_RR_RID_LEN
978 adds r23=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18 ;;
979 st8 [r23]=r22
981 // fast reflect expects
982 // r16 == cr.isr
983 // r18 == XSI_PSR_IC
984 // r20 == offset into ivt
985 // r29 == iip
986 // r30 == ipsr
987 // r31 == pr
988 //mov r16=cr.isr
989 mov r29=cr.iip
990 mov r30=cr.ipsr
991 br.sptk.many fast_reflect;;
992 #endif
993 END(fast_tlb_miss_reflect)
995 ENTRY(slow_vcpu_rfi)
996 adds r22=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18;;
997 ld8 r22=[r22];;
998 tbit.z p6,p0=r22,63
999 (p6) br.spnt.few dispatch_break_fault ;;
1000 // If vifs.v is set, we have two IFS to consider:
1001 // * the guest IFS
1002 // * the hypervisor IFS (validated by cover)
1003 // Because IFS is copied to CFM and is used to adjust AR.BSP,
1004 // virtualization of rfi is not easy.
1005 // Previously there was a two steps method (a first rfi jumped to
1006 // a stub which performed a new rfi).
1007 // This new method discards the RS before executing the hypervisor
1008 // cover. After cover, IFS.IFM will be zero. This IFS would simply
1009 // clear CFM but not modifying AR.BSP. Therefore the guest IFS can
1010 // be used instead and there is no need of a second rfi.
1011 // Discarding the RS with the following alloc instruction just clears
1012 // CFM, which is safe because rfi will overwrite it.
1013 // There is a drawback: because the RS must be discarded before
1014 // executing C code, emulation of rfi must go through an hyperprivop
1015 // and not through normal instruction decoding.
1016 alloc r22=ar.pfs,0,0,0,0
1017 br.spnt.few dispatch_break_fault
1018 ;;
1019 END(slow_vcpu_rfi)
1021 // ensure that, if giving up, registers at entry to fast_hyperprivop unchanged
1022 ENTRY(hyper_rfi)
1023 #ifndef FAST_RFI
1024 br.spnt.few slow_vcpu_rfi ;;
1025 #endif
1026 // if no interrupts pending, proceed
1027 mov r30=r0
1028 cmp.eq p7,p0=r20,r0
1029 (p7) br.sptk.many 1f
1030 ;;
1031 adds r20=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;;
1032 ld8 r21=[r20];; // r21 = vcr.ipsr
1033 extr.u r22=r21,IA64_PSR_I_BIT,1 ;;
1034 mov r30=r22;;
1035 // r30 determines whether we might deliver an immediate extint
1036 #ifndef RFI_TO_INTERRUPT // see beginning of file
1037 cmp.ne p6,p0=r30,r0
1038 (p6) br.cond.spnt.few slow_vcpu_rfi ;;
1039 #endif
1040 1:
1041 adds r20=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;;
1042 ld8 r21=[r20];; // r21 = vcr.ipsr
1043 // if (!(vpsr.dt && vpsr.rt && vpsr.it)), do it the slow way
1044 movl r20=(IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_IT);;
1045 and r22=r20,r21
1046 ;;
1047 cmp.ne p7,p0=r22,r20
1048 (p7) br.spnt.few slow_vcpu_rfi ;;
1049 // if was in metaphys mode, do it the slow way (FIXME later?)
1050 adds r20=XSI_METAPHYS_OFS-XSI_PSR_IC_OFS,r18 ;;
1051 ld4 r20=[r20];;
1052 cmp.ne p7,p0=r20,r0
1053 (p7) br.spnt.few slow_vcpu_rfi ;;
1054 #if 0
1055 // if domain hasn't already done virtual bank switch
1056 // do it the slow way (FIXME later?)
1057 adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
1058 ld4 r20=[r20];;
1059 cmp.eq p7,p0=r20,r0
1060 (p7) br.spnt.few slow_vcpu_rfi ;;
1061 #endif
1062 adds r20=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;;
1063 ld8 r22=[r20];;
1064 1: // OK now, let's do an rfi.
1065 #ifdef FAST_HYPERPRIVOP_CNT
1066 movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_RFI);;
1067 ld4 r23=[r20];;
1068 adds r23=1,r23;;
1069 st4 [r20]=r23;;
1070 #endif
1071 #ifdef RFI_TO_INTERRUPT
1072 // maybe do an immediate interrupt delivery?
1073 cmp.ne p6,p0=r30,r0
1074 (p6) br.cond.spnt.few rfi_check_extint;;
1075 #endif
1077 just_do_rfi:
1078 // r18=&vpsr.i|vpsr.ic, r21==vpsr, r22=vcr.iip
1079 mov cr.iip=r22
1080 extr.u r19=r21,IA64_PSR_CPL0_BIT,2
1081 adds r20=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
1082 cmp.gtu p7,p0=CONFIG_CPL0_EMUL,r19
1083 ld8 r20=[r20];;
1084 (p7) mov r19=CONFIG_CPL0_EMUL
1085 dep r20=0,r20,38,25;; // ensure ifs has no reserved bits set
1086 mov cr.ifs=r20 ;;
1087 // ipsr.cpl = max(vcr.ipsr.cpl, IA64_PSR_CPL0_BIT);
1088 movl r20=THIS_CPU(current_psr_i_addr)
1089 dep r21=r19,r21,IA64_PSR_CPL0_BIT,2;;
1090 // vpsr.i = vcr.ipsr.i; vpsr.ic = vcr.ipsr.ic
1091 ld8 r20=[r20]
1092 mov r19=1
1093 extr.u r23=r21,IA64_PSR_I_BIT,1 ;;
1094 cmp.ne p7,p6=r23,r0 ;;
1095 // not done yet
1096 (p7) st1 [r20]=r0
1097 (p6) st1 [r20]=r19;;
1098 extr.u r23=r21,IA64_PSR_IC_BIT,1 ;;
1099 cmp.ne p7,p6=r23,r0 ;;
1100 (p7) st4 [r18]=r19;;
1101 (p6) st4 [r18]=r0;;
1102 // force on psr.ic, i, dt, rt, it, bn
1103 movl r20=(IA64_PSR_I|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT| \
1104 IA64_PSR_IT|IA64_PSR_BN)
1105 // keep cr.ipsr.pp and set vPSR.pp = vIPSR.pp
1106 mov r22=cr.ipsr
1107 ;;
1108 or r21=r21,r20
1109 tbit.z p10,p11 = r22, IA64_PSR_PP_BIT
1110 ;;
1111 adds r20=XSI_VPSR_DFH_OFS-XSI_PSR_IC_OFS,r18;;
1112 tbit.z p8,p9 = r21, IA64_PSR_DFH_BIT
1113 adds r23=XSI_VPSR_PP_OFS-XSI_PSR_IC_OFS,r18
1114 ;;
1115 (p9) mov r27=1;;
1116 (p9) st1 [r20]=r27
1117 dep r21=r22,r21,IA64_PSR_PP_BIT,1
1118 (p10) st1 [r23]=r0
1119 (p11) st1 [r23]=r27
1120 ;;
1121 (p8) st1 [r20]=r0
1122 (p8) adds r20=XSI_HPSR_DFH_OFS-XSI_PSR_IC_OFS,r18;;
1123 (p8) ld1 r27=[r20]
1124 ;;
1125 (p8) dep r21=r27,r21, IA64_PSR_DFH_BIT, 1
1126 ;;
1127 mov cr.ipsr=r21
1128 adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
1129 ld4 r21=[r20];;
1130 cmp.ne p7,p0=r21,r0 // domain already did "bank 1 switch?"
1131 (p7) br.cond.spnt.few 1f;
1132 // OK, now all set to go except for switch to virtual bank1
1133 mov r22=1;;
1134 st4 [r20]=r22
1135 mov r30=r2
1136 mov r29=r3
1137 mov r17=ar.unat;;
1138 adds r16=XSI_B1NATS_OFS-XSI_PSR_IC_OFS,r18
1139 adds r2=XSI_BANK1_R16_OFS-XSI_PSR_IC_OFS,r18
1140 adds r3=(XSI_BANK1_R16_OFS+8)-XSI_PSR_IC_OFS,r18;;
1141 ld8 r16=[r16];;
1142 mov ar.unat=r16;;
1143 bsw.1;;
1144 // FIXME?: ar.unat is not really handled correctly,
1145 // but may not matter if the OS is NaT-clean
1146 .mem.offset 0,0; ld8.fill r16=[r2],16
1147 .mem.offset 8,0; ld8.fill r17=[r3],16 ;;
1148 .mem.offset 0,0; ld8.fill r18=[r2],16
1149 .mem.offset 0,0; ld8.fill r19=[r3],16 ;;
1150 .mem.offset 8,0; ld8.fill r20=[r2],16
1151 .mem.offset 8,0; ld8.fill r21=[r3],16 ;;
1152 .mem.offset 8,0; ld8.fill r22=[r2],16
1153 .mem.offset 8,0; ld8.fill r23=[r3],16 ;;
1154 .mem.offset 8,0; ld8.fill r24=[r2],16
1155 .mem.offset 8,0; ld8.fill r25=[r3],16 ;;
1156 .mem.offset 8,0; ld8.fill r26=[r2],16
1157 .mem.offset 8,0; ld8.fill r27=[r3],16 ;;
1158 .mem.offset 8,0; ld8.fill r28=[r2],16
1159 .mem.offset 8,0; ld8.fill r29=[r3],16 ;;
1160 .mem.offset 8,0; ld8.fill r30=[r2],16
1161 .mem.offset 8,0; ld8.fill r31=[r3],16 ;;
1162 bsw.0 ;;
1163 mov ar.unat=r17
1164 mov r2=r30
1165 mov r3=r29
1166 1: mov pr=r31,-1
1167 ;;
1168 rfi
1169 ;;
1170 END(hyper_rfi)
1172 #ifdef RFI_TO_INTERRUPT
1173 ENTRY(rfi_check_extint)
1174 //br.sptk.many dispatch_break_fault ;;
1176 // r18=&vpsr.i|vpsr.ic, r21==vpsr, r22=vcr.iip
1177 // make sure none of these get trashed in case going to just_do_rfi
1178 movl r30=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
1179 ld8 r30=[r30];;
1180 adds r24=IA64_VCPU_INSVC3_OFFSET,r30
1181 mov r25=192
1182 adds r16=IA64_VCPU_IRR3_OFFSET,r30;;
1183 ld8 r23=[r16];;
1184 cmp.eq p6,p0=r23,r0;;
1185 (p6) adds r16=-8,r16;;
1186 (p6) adds r24=-8,r24;;
1187 (p6) adds r25=-64,r25;;
1188 (p6) ld8 r23=[r16];;
1189 (p6) cmp.eq p6,p0=r23,r0;;
1190 (p6) adds r16=-8,r16;;
1191 (p6) adds r24=-8,r24;;
1192 (p6) adds r25=-64,r25;;
1193 (p6) ld8 r23=[r16];;
1194 (p6) cmp.eq p6,p0=r23,r0;;
1195 (p6) adds r16=-8,r16;;
1196 (p6) adds r24=-8,r24;;
1197 (p6) adds r25=-64,r25;;
1198 (p6) ld8 r23=[r16];;
1199 cmp.eq p6,p0=r23,r0
1200 (p6) br.cond.spnt.few just_do_rfi; // this is actually an error
1201 // r16 points to non-zero element of irr, r23 has value
1202 // r24 points to corr element of insvc, r25 has elt*64
1203 ld8 r26=[r24];;
1204 cmp.geu p6,p0=r26,r23
1205 (p6) br.cond.spnt.many just_do_rfi;
1207 // not masked by insvc, get vector number
1208 shr.u r26=r23,1;;
1209 or r26=r23,r26;;
1210 shr.u r27=r26,2;;
1211 or r26=r26,r27;;
1212 shr.u r27=r26,4;;
1213 or r26=r26,r27;;
1214 shr.u r27=r26,8;;
1215 or r26=r26,r27;;
1216 shr.u r27=r26,16;;
1217 or r26=r26,r27;;
1218 shr.u r27=r26,32;;
1219 or r26=r26,r27;;
1220 andcm r26=0xffffffffffffffff,r26;;
1221 popcnt r26=r26;;
1222 sub r26=63,r26;;
1223 // r26 now contains the bit index (mod 64)
1224 mov r27=1;;
1225 shl r27=r27,r26;;
1226 // r27 now contains the (within the proper word) bit mask
1227 add r26=r25,r26
1228 // r26 now contains the vector [0..255]
1229 adds r20=XSI_TPR_OFS-XSI_PSR_IC_OFS,r18 ;;
1230 ld8 r20=[r20] ;;
1231 extr.u r28=r20,16,1
1232 extr.u r29=r20,4,4 ;;
1233 cmp.ne p6,p0=r28,r0 // if tpr.mmi is set, just rfi
1234 (p6) br.cond.spnt.few just_do_rfi;;
1235 shl r29=r29,4;;
1236 adds r29=15,r29;;
1237 cmp.ge p6,p0=r29,r26 // if tpr masks interrupt, just rfi
1238 (p6) br.cond.spnt.few just_do_rfi;;
1239 END(rfi_check_extint)
1241 // this doesn't work yet (dies early after getting to user mode)
1242 // but happens relatively infrequently, so fix it later.
1243 // NOTE that these will be counted incorrectly for now (for privcnt output)
1244 ENTRY(rfi_with_interrupt)
1245 #if 1
1246 br.sptk.many dispatch_break_fault ;;
1247 #endif
1249 // OK, have an unmasked vector, so deliver extint to vcr.iva+0x3000
1250 // r18 == XSI_PSR_IC
1251 // r21 == vipsr (ipsr in shared_mem)
1252 // r30 == IA64_KR(CURRENT)
1253 // r31 == pr
1254 mov r17=cr.ipsr
1255 mov r16=cr.isr;;
1256 // set shared_mem isr
1257 extr.u r16=r16,IA64_ISR_IR_BIT,1;; // grab cr.isr.ir bit
1258 dep r16=r16,r0,IA64_ISR_IR_BIT,1 // insert into cr.isr (rest of bits zero)
1259 extr.u r20=r21,IA64_PSR_RI_BIT,2 ;; // get v(!)psr.ri
1260 dep r16=r20,r16,IA64_PSR_RI_BIT,2 ;; // deposit cr.isr.ei
1261 adds r22=XSI_ISR_OFS-XSI_PSR_IC_OFS,r18 ;;
1262 st8 [r22]=r16;;
1263 movl r22=THIS_CPU(current_psr_i_addr)
1264 // set cr.ipsr (make sure cpl==2!)
1265 mov r29=r17
1266 movl r27=~DELIVER_PSR_CLR
1267 movl r28=DELIVER_PSR_SET | (CONFIG_CPL0_EMUL << IA64_PSR_CPL0_BIT)
1268 mov r20=1;;
1269 ld8 r22=[r22]
1270 and r29=r29,r27;;
1271 or r29=r29,r28;;
1272 mov cr.ipsr=r29
1273 // v.ipsr and v.iip are already set (and v.iip validated) as rfi target
1274 // set shared_mem interrupt_delivery_enabled to 0
1275 // set shared_mem interrupt_collection_enabled to 0
1276 st1 [r22]=r20
1277 st4 [r18]=r0;;
1278 // cover and set shared_mem precover_ifs to cr.ifs
1279 // set shared_mem ifs to 0
1280 #if 0
1281 cover ;;
1282 mov r20=cr.ifs
1283 adds r22=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
1284 st8 [r22]=r0 ;;
1285 adds r22=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
1286 st8 [r22]=r20 ;;
1287 // leave cr.ifs alone for later rfi
1288 #else
1289 adds r22=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
1290 ld8 r20=[r22];;
1291 st8 [r22]=r0 ;;
1292 adds r22=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
1293 st8 [r22]=r20 ;;
1294 #endif
1295 // set iip to go to domain IVA break instruction vector
1296 adds r22=IA64_VCPU_IVA_OFFSET,r30;;
1297 ld8 r23=[r22]
1298 movl r24=0x3000;;
1299 add r24=r24,r23;;
1300 mov cr.iip=r24;;
1301 #if 0
1302 // OK, now all set to go except for switch to virtual bank0
1303 mov r30=r2
1304 mov r29=r3;;
1305 adds r2=XSI_BANK1_OFS-XSI_PSR_IC_OFS,r18
1306 adds r3=(XSI_BANK1_OFS+8)-XSI_PSR_IC_OFS,r18;;
1307 bsw.1;;
1308 // FIXME: need to handle ar.unat!
1309 .mem.offset 0,0; st8.spill [r2]=r16,16
1310 .mem.offset 8,0; st8.spill [r3]=r17,16 ;;
1311 .mem.offset 0,0; st8.spill [r2]=r18,16
1312 .mem.offset 8,0; st8.spill [r3]=r19,16 ;;
1313 .mem.offset 0,0; st8.spill [r2]=r20,16
1314 .mem.offset 8,0; st8.spill [r3]=r21,16 ;;
1315 .mem.offset 0,0; st8.spill [r2]=r22,16
1316 .mem.offset 8,0; st8.spill [r3]=r23,16 ;;
1317 .mem.offset 0,0; st8.spill [r2]=r24,16
1318 .mem.offset 8,0; st8.spill [r3]=r25,16 ;;
1319 .mem.offset 0,0; st8.spill [r2]=r26,16
1320 .mem.offset 8,0; st8.spill [r3]=r27,16 ;;
1321 .mem.offset 0,0; st8.spill [r2]=r28,16
1322 .mem.offset 8,0; st8.spill [r3]=r29,16 ;;
1323 .mem.offset 0,0; st8.spill [r2]=r30,16
1324 .mem.offset 8,0; st8.spill [r3]=r31,16 ;;
1325 bsw.0 ;;
1326 mov r2=r30
1327 mov r3=r29;;
1328 #endif
1329 adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
1330 st4 [r20]=r0
1331 mov pr=r31,-1 ;;
1332 rfi
1333 END(rfi_with_interrupt)
1334 #endif // RFI_TO_INTERRUPT
1336 ENTRY(hyper_cover)
1337 #ifdef FAST_HYPERPRIVOP_CNT
1338 movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_COVER);;
1339 ld4 r21=[r20];;
1340 adds r21=1,r21;;
1341 st4 [r20]=r21;;
1342 #endif
1343 mov r24=cr.ipsr
1344 mov r25=cr.iip;;
1345 // skip test for vpsr.ic.. it's a prerequisite for hyperprivops
1346 cover ;;
1347 mov r30=cr.ifs
1348 adds r22=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18;;
1349 st8 [r22]=r30
1350 mov cr.ifs=r0
1351 // adjust return address to skip over break instruction
1352 extr.u r26=r24,41,2 ;;
1353 cmp.eq p6,p7=2,r26 ;;
1354 (p6) mov r26=0
1355 (p6) adds r25=16,r25
1356 (p7) adds r26=1,r26
1357 ;;
1358 dep r24=r26,r24,IA64_PSR_RI_BIT,2
1359 ;;
1360 mov cr.ipsr=r24
1361 mov cr.iip=r25
1362 mov pr=r31,-1 ;;
1363 rfi
1364 ;;
1365 END(hyper_cover)
1367 // return from metaphysical mode (meta=1) to virtual mode (meta=0)
1368 ENTRY(hyper_ssm_dt)
1369 #ifdef FAST_HYPERPRIVOP_CNT
1370 movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_SSM_DT);;
1371 ld4 r21=[r20];;
1372 adds r21=1,r21;;
1373 st4 [r20]=r21;;
1374 #endif
1375 mov r24=cr.ipsr
1376 mov r25=cr.iip
1377 adds r20=XSI_METAPHYS_OFS-XSI_PSR_IC_OFS,r18 ;;
1378 ld4 r21=[r20];;
1379 cmp.eq p7,p0=r21,r0 // meta==0?
1380 (p7) br.spnt.many 1f ;; // already in virtual mode
1381 movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
1382 ld8 r22=[r22];;
1383 adds r22=IA64_VCPU_META_SAVED_RR0_OFFSET,r22;;
1384 ld8 r23=[r22];;
1385 mov rr[r0]=r23;;
1386 srlz.i;;
1387 st4 [r20]=r0
1388 // adjust return address to skip over break instruction
1389 1: extr.u r26=r24,IA64_PSR_RI_BIT,2 ;;
1390 cmp.eq p6,p7=2,r26 ;;
1391 (p6) mov r26=0
1392 (p6) adds r25=16,r25
1393 (p7) adds r26=1,r26
1394 ;;
1395 dep r24=r26,r24,IA64_PSR_RI_BIT,2
1396 ;;
1397 mov cr.ipsr=r24
1398 mov cr.iip=r25
1399 mov pr=r31,-1 ;;
1400 rfi
1401 ;;
1402 END(hyper_ssm_dt)
1404 // go to metaphysical mode (meta=1) from virtual mode (meta=0)
1405 ENTRY(hyper_rsm_dt)
1406 #ifdef FAST_HYPERPRIVOP_CNT
1407 movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_RSM_DT);;
1408 ld4 r21=[r20];;
1409 adds r21=1,r21;;
1410 st4 [r20]=r21;;
1411 #endif
1412 mov r24=cr.ipsr
1413 mov r25=cr.iip
1414 adds r20=XSI_METAPHYS_OFS-XSI_PSR_IC_OFS,r18 ;;
1415 ld4 r21=[r20];;
1416 cmp.ne p7,p0=r21,r0 // meta==0?
1417 (p7) br.spnt.many 1f ;; // already in metaphysical mode
1418 movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
1419 ld8 r22=[r22];;
1420 adds r22=IA64_VCPU_META_RID_DT_OFFSET,r22;;
1421 ld8 r23=[r22];;
1422 mov rr[r0]=r23;;
1423 srlz.i;;
1424 adds r21=1,r0 ;;
1425 st4 [r20]=r21
1426 // adjust return address to skip over break instruction
1427 1: extr.u r26=r24,IA64_PSR_RI_BIT,2 ;;
1428 cmp.eq p6,p7=2,r26 ;;
1429 (p6) mov r26=0
1430 (p6) adds r25=16,r25
1431 (p7) adds r26=1,r26
1432 ;;
1433 dep r24=r26,r24,IA64_PSR_RI_BIT,2
1434 ;;
1435 mov cr.ipsr=r24
1436 mov cr.iip=r25
1437 mov pr=r31,-1 ;;
1438 rfi
1439 ;;
1440 END(hyper_rsm_dt)
1442 ENTRY(hyper_set_itm)
1443 // when we get to here r20=~=interrupts pending
1444 cmp.ne p7,p0=r20,r0
1445 (p7) br.spnt.many dispatch_break_fault ;;
1446 #ifdef FAST_HYPERPRIVOP_CNT
1447 movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_SET_ITM);;
1448 ld4 r21=[r20];;
1449 adds r21=1,r21;;
1450 st4 [r20]=r21;;
1451 #endif
1452 movl r20=THIS_CPU(cpu_info)+IA64_CPUINFO_ITM_NEXT_OFFSET;;
1453 ld8 r21=[r20];;
1454 movl r20=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
1455 ld8 r20=[r20];;
1456 adds r20=IA64_VCPU_DOMAIN_ITM_OFFSET,r20;;
1457 st8 [r20]=r8
1458 cmp.geu p6,p0=r21,r8;;
1459 (p6) mov r21=r8
1460 // now "safe set" cr.itm=r21
1461 mov r23=100;;
1462 2: mov cr.itm=r21;;
1463 srlz.d;;
1464 mov r22=ar.itc ;;
1465 cmp.leu p6,p0=r21,r22;;
1466 add r21=r21,r23;;
1467 shl r23=r23,1
1468 (p6) br.cond.spnt.few 2b;;
1469 1: mov r24=cr.ipsr
1470 mov r25=cr.iip;;
1471 extr.u r26=r24,IA64_PSR_RI_BIT,2 ;;
1472 cmp.eq p6,p7=2,r26 ;;
1473 (p6) mov r26=0
1474 (p6) adds r25=16,r25
1475 (p7) adds r26=1,r26
1476 ;;
1477 dep r24=r26,r24,IA64_PSR_RI_BIT,2
1478 ;;
1479 mov cr.ipsr=r24
1480 mov cr.iip=r25
1481 mov pr=r31,-1 ;;
1482 rfi
1483 ;;
1484 END(hyper_set_itm)
1486 ENTRY(hyper_get_psr)
1487 #ifdef FAST_HYPERPRIVOP_CNT
1488 movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_GET_PSR);;
1489 ld4 r21=[r20];;
1490 adds r21=1,r21;;
1491 st4 [r20]=r21;;
1492 #endif
1493 mov r24=cr.ipsr
1494 movl r8=0xffffffff | IA64_PSR_MC | IA64_PSR_IT;;
1495 // only return PSR{36:35,31:0}
1496 and r8=r8,r24
1497 // get vpsr.ic
1498 ld4 r21=[r18];;
1499 dep r8=r21,r8,IA64_PSR_IC_BIT,1
1500 // get vpsr.pp
1501 adds r20=XSI_VPSR_PP_OFS-XSI_PSR_IC_OFS,r18 ;;
1502 ld1 r21=[r20];;
1503 dep r8=r21,r8,IA64_PSR_PP_BIT,1
1504 // get vpsr.dt
1505 adds r20=XSI_METAPHYS_OFS-XSI_PSR_IC_OFS,r18 ;;
1506 ld4 r21=[r20];;
1507 cmp.ne p6,p0=r21,r0
1508 ;;
1509 (p6) dep.z r8=r8,IA64_PSR_DT_BIT,1
1510 // get vpsr.i
1511 adds r20=XSI_PSR_I_ADDR_OFS-XSI_PSR_IC_OFS,r18 ;;
1512 ld8 r20=[r20];;
1513 ld1 r21=[r20];;
1514 cmp.eq p8,p9=r0,r21
1515 ;;
1516 (p8) dep r8=-1,r8,IA64_PSR_I_BIT,1
1517 (p9) dep r8=0,r8,IA64_PSR_I_BIT,1
1518 // get vpsr.dfh
1519 adds r20=XSI_VPSR_DFH_OFS-XSI_PSR_IC_OFS,r18;;
1520 ld1 r21=[r20];;
1521 dep r8=r21,r8,IA64_PSR_DFH_BIT,1
1522 ;;
1523 mov r25=cr.iip
1524 extr.u r26=r24,IA64_PSR_RI_BIT,2 ;;
1525 cmp.eq p6,p7=2,r26 ;;
1526 (p6) mov r26=0
1527 (p6) adds r25=16,r25
1528 (p7) adds r26=1,r26
1529 ;;
1530 dep r24=r26,r24,IA64_PSR_RI_BIT,2
1531 ;;
1532 mov cr.ipsr=r24
1533 mov cr.iip=r25
1534 mov pr=r31,-1 ;;
1535 rfi
1536 ;;
1537 END(hyper_get_psr)
1540 ENTRY(hyper_get_rr)
1541 #ifdef FAST_HYPERPRIVOP_CNT
1542 movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_GET_RR);;
1543 ld4 r21=[r20];;
1544 adds r21=1,r21;;
1545 st4 [r20]=r21;;
1546 #endif
1547 extr.u r25=r8,61,3;;
1548 adds r20=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18
1549 shl r25=r25,3;;
1550 add r20=r20,r25;;
1551 ld8 r8=[r20]
1552 1: mov r24=cr.ipsr
1553 mov r25=cr.iip;;
1554 extr.u r26=r24,IA64_PSR_RI_BIT,2 ;;
1555 cmp.eq p6,p7=2,r26 ;;
1556 (p6) mov r26=0
1557 (p6) adds r25=16,r25
1558 (p7) adds r26=1,r26
1559 ;;
1560 dep r24=r26,r24,IA64_PSR_RI_BIT,2
1561 ;;
1562 mov cr.ipsr=r24
1563 mov cr.iip=r25
1564 mov pr=r31,-1 ;;
1565 rfi
1566 ;;
1567 END(hyper_get_rr)
1569 ENTRY(hyper_set_rr)
1570 extr.u r25=r8,61,3;;
1571 cmp.leu p7,p0=7,r25 // punt on setting rr7
1572 (p7) br.spnt.many dispatch_break_fault ;;
1573 #ifdef FAST_HYPERPRIVOP_CNT
1574 movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_SET_RR);;
1575 ld4 r21=[r20];;
1576 adds r21=1,r21;;
1577 st4 [r20]=r21;;
1578 #endif
1579 extr.u r26=r9,IA64_RR_RID,IA64_RR_RID_LEN // r26 = r9.rid
1580 movl r20=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
1581 ld8 r20=[r20];;
1582 adds r22=IA64_VCPU_STARTING_RID_OFFSET,r20
1583 adds r23=IA64_VCPU_ENDING_RID_OFFSET,r20
1584 adds r24=IA64_VCPU_META_SAVED_RR0_OFFSET,r20
1585 adds r21=IA64_VCPU_VHPT_PG_SHIFT_OFFSET,r20;;
1586 ld4 r22=[r22]
1587 ld4 r23=[r23]
1588 ld1 r21=[r21];;
1589 add r22=r26,r22;;
1590 cmp.geu p6,p0=r22,r23 // if r9.rid + starting_rid >= ending_rid
1591 (p6) br.cond.spnt.few 1f; // this is an error, but just ignore/return
1592 adds r20=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18
1593 shl r25=r25,3;;
1594 add r20=r20,r25;;
1595 st8 [r20]=r9;; // store away exactly what was passed
1596 // but adjust value actually placed in rr[r8]
1597 // r22 contains adjusted rid, "mangle" it (see regionreg.c)
1598 // and set ps to v->arch.vhpt_pg_shift and ve to 1
1599 extr.u r27=r22,0,8
1600 extr.u r28=r22,8,8
1601 extr.u r29=r22,16,8
1602 dep.z r23=r21,IA64_RR_PS,IA64_RR_PS_LEN;;
1603 dep r23=-1,r23,0,1;; // mangling is swapping bytes 1 & 3
1604 dep r23=r27,r23,24,8;;
1605 dep r23=r28,r23,16,8;;
1606 dep r23=r29,r23,8,8
1607 cmp.eq p6,p0=r25,r0;; // if rr0, save for metaphysical
1608 (p6) st8 [r24]=r23
1609 mov rr[r8]=r23;;
1610 // done, mosey on back
1611 1: mov r24=cr.ipsr
1612 mov r25=cr.iip;;
1613 extr.u r26=r24,IA64_PSR_RI_BIT,2 ;;
1614 cmp.eq p6,p7=2,r26 ;;
1615 (p6) mov r26=0
1616 (p6) adds r25=16,r25
1617 (p7) adds r26=1,r26
1618 ;;
1619 dep r24=r26,r24,IA64_PSR_RI_BIT,2
1620 ;;
1621 mov cr.ipsr=r24
1622 mov cr.iip=r25
1623 mov pr=r31,-1 ;;
1624 rfi
1625 ;;
1626 END(hyper_set_rr)
1628 // r8 = val0
1629 // r9 = val1
1630 // r10 = val2
1631 // r11 = val3
1632 // r14 = val4
1633 // mov rr[0x0000000000000000UL] = r8
1634 // mov rr[0x2000000000000000UL] = r9
1635 // mov rr[0x4000000000000000UL] = r10
1636 // mov rr[0x6000000000000000UL] = r11
1637 // mov rr[0x8000000000000000UL] = r14
1638 ENTRY(hyper_set_rr0_to_rr4)
1639 #ifndef FAST_SET_RR0_TO_RR4
1640 br.spnt.few dispatch_break_fault ;;
1641 #endif
1642 #ifdef FAST_HYPERPRIVOP_CNT
1643 movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_SET_RR0_TO_RR4);;
1644 ld4 r21=[r20];;
1645 adds r21=1,r21;;
1646 st4 [r20]=r21;;
1647 #endif
1648 movl r17=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
1649 ld8 r17=[r17];;
1651 adds r21=IA64_VCPU_STARTING_RID_OFFSET,r17
1652 adds r22=IA64_VCPU_ENDING_RID_OFFSET,r17
1653 adds r23=IA64_VCPU_VHPT_PG_SHIFT_OFFSET,r17
1654 ;;
1655 ld4 r21=[r21] // r21 = current->starting_rid
1656 extr.u r26=r8,IA64_RR_RID,IA64_RR_RID_LEN // r26 = r8.rid
1657 extr.u r27=r9,IA64_RR_RID,IA64_RR_RID_LEN // r27 = r9.rid
1658 ld4 r22=[r22] // r22 = current->ending_rid
1659 extr.u r28=r10,IA64_RR_RID,IA64_RR_RID_LEN // r28 = r10.rid
1660 extr.u r29=r11,IA64_RR_RID,IA64_RR_RID_LEN // r29 = r11.rid
1661 adds r24=IA64_VCPU_META_SAVED_RR0_OFFSET,r17
1662 extr.u r30=r14,IA64_RR_RID,IA64_RR_RID_LEN // r30 = r14.rid
1663 ld1 r23=[r23] // r23 = current->vhpt_pg_shift
1664 ;;
1665 add r16=r26,r21
1666 add r17=r27,r21
1667 add r19=r28,r21
1668 add r20=r29,r21
1669 add r21=r30,r21
1670 dep.z r23=r23,IA64_RR_PS,IA64_RR_PS_LEN // r23 = rr.ps
1671 ;;
1672 cmp.geu p6,p0=r16,r22 // if r8.rid + starting_rid >= ending_rid
1673 cmp.geu p7,p0=r17,r22 // if r9.rid + starting_rid >= ending_rid
1674 cmp.geu p8,p0=r19,r22 // if r10.rid + starting_rid >= ending_rid
1675 (p6) br.cond.spnt.few 1f // this is an error, but just ignore/return
1676 (p7) br.cond.spnt.few 1f // this is an error, but just ignore/return
1677 cmp.geu p9,p0=r20,r22 // if r11.rid + starting_rid >= ending_rid
1678 (p8) br.cond.spnt.few 1f // this is an error, but just ignore/return
1679 (p9) br.cond.spnt.few 1f // this is an error, but just ignore/return
1680 cmp.geu p10,p0=r21,r22 // if r14.rid + starting_rid >= ending_rid
1681 (p10) br.cond.spnt.few 1f // this is an error, but just ignore/return
1682 dep r23=-1,r23,0,1 // add rr.ve
1683 ;;
1684 mov r25=1
1685 adds r22=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18
1686 ;;
1687 shl r30=r25,61 // r30 = 0x2000000000000000
1689 #if 0
1690 // simple plain version
1691 // rr0
1692 st8 [r22]=r8, 8 // current->rrs[0] = r8
1694 mov r26=0 // r26=0x0000000000000000
1695 extr.u r27=r16,0,8
1696 extr.u r28=r16,8,8
1697 extr.u r29=r16,16,8;;
1698 dep r25=r27,r23,24,8;; // mangling is swapping bytes 1 & 3
1699 dep r25=r28,r25,16,8;;
1700 dep r25=r29,r25,8,8;;
1701 st8 [r24]=r25 // save for metaphysical
1702 mov rr[r26]=r25
1703 dv_serialize_data
1705 // rr1
1706 st8 [r22]=r9, 8 // current->rrs[1] = r9
1707 add r26=r26,r30 // r26 = 0x2000000000000000
1708 extr.u r27=r17,0,8
1709 extr.u r28=r17,8,8
1710 extr.u r29=r17,16,8;;
1711 dep r25=r27,r23,24,8;; // mangling is swapping bytes 1 & 3
1712 dep r25=r28,r25,16,8;;
1713 dep r25=r29,r25,8,8;;
1714 mov rr[r26]=r25
1715 dv_serialize_data
1717 // rr2
1718 st8 [r22]=r10, 8 // current->rrs[2] = r10
1719 add r26=r26,r30 // r26 = 0x4000000000000000
1720 extr.u r27=r19,0,8
1721 extr.u r28=r19,8,8
1722 extr.u r29=r19,16,8;;
1723 dep r25=r27,r23,24,8;; // mangling is swapping bytes 1 & 3
1724 dep r25=r28,r25,16,8;;
1725 dep r25=r29,r25,8,8;;
1726 mov rr[r26]=r25
1727 dv_serialize_data
1729 // rr3
1730 st8 [r22]=r11, 8 // current->rrs[3] = r11
1732 add r26=r26,r30 // r26 = 0x6000000000000000
1733 extr.u r27=r20,0,8
1734 extr.u r28=r20,8,8
1735 extr.u r29=r20,16,8;;
1736 dep r25=r27,r23,24,8;; // mangling is swapping bytes 1 & 3
1737 dep r25=r28,r25,16,8;;
1738 dep r25=r29,r25,8,8;;
1739 mov rr[r26]=r25
1740 dv_serialize_data
1742 // rr4
1743 st8 [r22]=r14 // current->rrs[4] = r14
1745 add r26=r26,r30 // r26 = 0x8000000000000000
1746 extr.u r27=r21,0,8
1747 extr.u r28=r21,8,8
1748 extr.u r29=r21,16,8;;
1749 dep r25=r27,r23,24,8;; // mangling is swapping bytes 1 & 3
1750 dep r25=r28,r25,16,8;;
1751 dep r25=r29,r25,8,8;;
1752 mov rr[r26]=r25
1753 dv_serialize_data
1754 #else
1755 // shuffled version
1756 // rr0
1757 // uses r27, r28, r29 for mangling
1758 // r25 for mangled value
1759 st8 [r22]=r8, 8 // current->rrs[0] = r8
1760 mov r26=0 // r26=0x0000000000000000
1761 extr.u r27=r16,0,8
1762 extr.u r28=r16,8,8
1763 extr.u r29=r16,16,8;;
1764 dep r25=r27,r23,24,8;; // mangling is swapping bytes 1 & 3
1765 dep r25=r28,r25,16,8;;
1766 dep r25=r29,r25,8,8;;
1767 st8 [r24]=r25 // save for metaphysical
1768 mov rr[r26]=r25
1769 dv_serialize_data
1771 // r16, r24, r25 is usable.
1772 // rr1
1773 // uses r25, r28, r29 for mangling
1774 // r25 for mangled value
1775 extr.u r25=r17,0,8
1776 extr.u r28=r17,8,8
1777 st8 [r22]=r9, 8 // current->rrs[1] = r9
1778 extr.u r29=r17,16,8 ;;
1779 add r26=r26,r30 // r26 = 0x2000000000000000
1780 extr.u r24=r19,8,8
1781 extr.u r16=r19,0,8
1782 dep r25=r25,r23,24,8;; // mangling is swapping bytes 1 & 3
1783 dep r25=r28,r25,16,8;;
1784 dep r25=r29,r25,8,8;;
1785 mov rr[r26]=r25
1786 dv_serialize_data
1788 // r16, r17, r24, r25 is usable
1789 // rr2
1790 // uses r16, r24, r29 for mangling
1791 // r17 for mangled value
1792 extr.u r29=r19,16,8
1793 extr.u r27=r20,0,8
1794 st8 [r22]=r10, 8 // current->rrs[2] = r10
1795 add r26=r26,r30 // r26 = 0x4000000000000000
1796 dep r17=r16,r23,24,8;; // mangling is swapping bytes 1 & 3
1797 dep r17=r24,r17,16,8;;
1798 dep r17=r29,r17,8,8;;
1799 mov rr[r26]=r17
1800 dv_serialize_data
1802 // r16, r17, r19, r24, r25 is usable
1803 // rr3
1804 // uses r27, r28, r29 for mangling
1805 // r25 for mangled value
1806 extr.u r28=r20,8,8
1807 extr.u r29=r20,16,8
1808 st8 [r22]=r11, 8 // current->rrs[3] = r11
1809 extr.u r16=r21,0,8
1810 add r26=r26,r30 // r26 = 0x6000000000000000
1811 dep r25=r27,r23,24,8;; // mangling is swapping bytes 1 & 3
1812 dep r25=r28,r25,16,8;;
1813 dep r25=r29,r25,8,8;;
1814 mov rr[r26]=r25
1815 dv_serialize_data
1817 // r16, r17, r19, r20, r24, r25
1818 // rr4
1819 // uses r16, r17, r24 for mangling
1820 // r25 for mangled value
1821 extr.u r17=r21,8,8
1822 extr.u r24=r21,16,8
1823 st8 [r22]=r14 // current->rrs[4] = r14
1824 add r26=r26,r30 // r26 = 0x8000000000000000
1825 dep r25=r16,r23,24,8;; // mangling is swapping bytes 1 & 3
1826 dep r25=r17,r25,16,8;;
1827 dep r25=r24,r25,8,8;;
1828 mov rr[r26]=r25
1829 dv_serialize_data
1830 #endif
1832 // done, mosey on back
1833 1: mov r24=cr.ipsr
1834 mov r25=cr.iip;;
1835 extr.u r26=r24,IA64_PSR_RI_BIT,2 ;;
1836 cmp.eq p6,p7=2,r26 ;;
1837 (p6) mov r26=0
1838 (p6) adds r25=16,r25
1839 (p7) adds r26=1,r26
1840 ;;
1841 dep r24=r26,r24,IA64_PSR_RI_BIT,2
1842 ;;
1843 mov cr.ipsr=r24
1844 mov cr.iip=r25
1845 mov pr=r31,-1 ;;
1846 rfi
1847 ;;
1848 END(hyper_set_rr0_to_rr4)
1850 ENTRY(hyper_set_kr)
1851 extr.u r25=r8,3,61;;
1852 cmp.ne p7,p0=r0,r25 // if kr# > 7, go slow way
1853 (p7) br.spnt.many dispatch_break_fault ;;
1854 #ifdef FAST_HYPERPRIVOP_CNT
1855 movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_SET_KR);;
1856 ld4 r21=[r20];;
1857 adds r21=1,r21;;
1858 st4 [r20]=r21;;
1859 #endif
1860 adds r21=XSI_KR0_OFS-XSI_PSR_IC_OFS,r18
1861 shl r20=r8,3;;
1862 add r22=r20,r21;;
1863 st8 [r22]=r9;;
1864 cmp.eq p7,p0=r8,r0
1865 adds r8=-1,r8;;
1866 (p7) mov ar0=r9;;
1867 cmp.eq p7,p0=r8,r0
1868 adds r8=-1,r8;;
1869 (p7) mov ar1=r9;;
1870 cmp.eq p7,p0=r8,r0
1871 adds r8=-1,r8;;
1872 (p7) mov ar2=r9;;
1873 cmp.eq p7,p0=r8,r0
1874 adds r8=-1,r8;;
1875 (p7) mov ar3=r9;;
1876 cmp.eq p7,p0=r8,r0
1877 adds r8=-1,r8;;
1878 (p7) mov ar4=r9;;
1879 cmp.eq p7,p0=r8,r0
1880 adds r8=-1,r8;;
1881 (p7) mov ar5=r9;;
1882 cmp.eq p7,p0=r8,r0
1883 adds r8=-1,r8;;
1884 (p7) mov ar6=r9;;
1885 cmp.eq p7,p0=r8,r0
1886 adds r8=-1,r8;;
1887 (p7) mov ar7=r9;;
1888 // done, mosey on back
1889 1: mov r24=cr.ipsr
1890 mov r25=cr.iip;;
1891 extr.u r26=r24,IA64_PSR_RI_BIT,2 ;;
1892 cmp.eq p6,p7=2,r26 ;;
1893 (p6) mov r26=0
1894 (p6) adds r25=16,r25
1895 (p7) adds r26=1,r26
1896 ;;
1897 dep r24=r26,r24,IA64_PSR_RI_BIT,2
1898 ;;
1899 mov cr.ipsr=r24
1900 mov cr.iip=r25
1901 mov pr=r31,-1 ;;
1902 rfi
1903 ;;
1904 END(hyper_set_kr)
1906 // this routine was derived from optimized assembly output from
1907 // vcpu_thash so it is dense and difficult to read but it works
1908 // On entry:
1909 // r18 == XSI_PSR_IC
1910 // r31 == pr
1911 ENTRY(hyper_thash)
1912 #ifdef FAST_HYPERPRIVOP_CNT
1913 movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_THASH);;
1914 ld4 r21=[r20];;
1915 adds r21=1,r21;;
1916 st4 [r20]=r21;;
1917 #endif
1918 shr.u r20 = r8, 61
1919 addl r25 = 1, r0
1920 movl r17 = 0xe000000000000000
1921 ;;
1922 and r21 = r17, r8 // VHPT_Addr1
1923 ;;
1924 shladd r28 = r20, 3, r18
1925 adds r19 = XSI_PTA_OFS-XSI_PSR_IC_OFS, r18
1926 ;;
1927 adds r27 = XSI_RR0_OFS-XSI_PSR_IC_OFS, r28
1928 addl r28 = 32767, r0
1929 ld8 r24 = [r19] // pta
1930 ;;
1931 ld8 r23 = [r27] // rrs[vadr>>61]
1932 extr.u r26 = r24, IA64_PTA_SIZE_BIT, IA64_PTA_SIZE_LEN
1933 ;;
1934 extr.u r22 = r23, IA64_RR_PS, IA64_RR_PS_LEN
1935 shl r30 = r25, r26
1936 ;;
1937 shr.u r19 = r8, r22
1938 shr.u r29 = r24, 15
1939 ;;
1940 adds r17 = -1, r30
1941 ;;
1942 shladd r27 = r19, 3, r0
1943 extr.u r26 = r17, 15, 46
1944 ;;
1945 andcm r24 = r29, r26
1946 and r19 = r28, r27
1947 shr.u r25 = r27, 15
1948 ;;
1949 and r23 = r26, r25
1950 ;;
1951 or r22 = r24, r23
1952 ;;
1953 dep.z r20 = r22, 15, 46
1954 ;;
1955 or r16 = r20, r21
1956 ;;
1957 or r8 = r19, r16
1958 // done, update iip/ipsr to next instruction
1959 mov r24=cr.ipsr
1960 mov r25=cr.iip;;
1961 extr.u r26=r24,IA64_PSR_RI_BIT,2 ;;
1962 cmp.eq p6,p7=2,r26 ;;
1963 (p6) mov r26=0
1964 (p6) adds r25=16,r25
1965 (p7) adds r26=1,r26
1966 ;;
1967 dep r24=r26,r24,IA64_PSR_RI_BIT,2
1968 ;;
1969 mov cr.ipsr=r24
1970 mov cr.iip=r25
1971 mov pr=r31,-1 ;;
1972 rfi
1973 ;;
1974 END(hyper_thash)
1976 ENTRY(hyper_ptc_ga)
1977 #ifndef FAST_PTC_GA
1978 br.spnt.few dispatch_break_fault ;;
1979 #endif
1980 // FIXME: validate not flushing Xen addresses
1981 #ifdef FAST_HYPERPRIVOP_CNT
1982 movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_PTC_GA);;
1983 ld4 r21=[r20];;
1984 adds r21=1,r21;;
1985 st4 [r20]=r21;;
1986 #endif
1987 movl r21=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
1988 ld8 r21=[r21];;
1989 adds r22=IA64_VCPU_VHPT_PG_SHIFT_OFFSET,r21
1990 mov r28=r8
1991 extr.u r19=r9,2,6 // addr_range=1<<((r9&0xfc)>>2)
1992 mov r20=1
1993 shr.u r24=r8,61
1994 movl r26=0x8000000000000000 // INVALID_TI_TAG
1995 mov r30=ar.lc
1996 ;;
1997 ld1 r22=[r22] // current->arch.vhpt_pg_shift
1998 shl r19=r20,r19
1999 cmp.eq p7,p0=7,r24
2000 (p7) br.spnt.many dispatch_break_fault ;; // slow way for rr7
2001 ;;
2002 shl r27=r22,2 // vhpt_pg_shift<<2 (for ptc.ga)
2003 shr.u r23=r19,r22 // repeat loop for n pages
2004 cmp.le p7,p0=r19,r0 // skip flush if size<=0
2005 (p7) br.cond.dpnt 2f ;;
2006 shl r24=r23,r22;;
2007 cmp.ne p7,p0=r24,r23 ;;
2008 (p7) adds r23=1,r23 ;; // n_pages<size<n_pages+1? extra iter
2009 mov ar.lc=r23
2010 shl r29=r20,r22;; // page_size
2011 1:
2012 thash r25=r28 ;;
2013 adds r25=16,r25 ;;
2014 ld8 r24=[r25] ;;
2015 // FIXME: should check if tag matches, not just blow it away
2016 or r24=r26,r24 ;; // vhpt_entry->ti_tag = 1
2017 st8 [r25]=r24
2018 ptc.ga r28,r27 ;;
2019 srlz.i ;;
2020 add r28=r29,r28
2021 br.cloop.sptk.few 1b
2022 ;;
2023 2:
2024 mov ar.lc=r30 ;;
2025 mov r29=cr.ipsr
2026 mov r30=cr.iip;;
2027 adds r25=IA64_VCPU_DTLB_OFFSET,r21
2028 adds r26=IA64_VCPU_ITLB_OFFSET,r21;;
2029 ld8 r24=[r25]
2030 ld8 r27=[r26] ;;
2031 and r24=-2,r24
2032 and r27=-2,r27 ;;
2033 st8 [r25]=r24 // set 1-entry i/dtlb as not present
2034 st8 [r26]=r27 ;;
2035 // increment to point to next instruction
2036 extr.u r26=r29,IA64_PSR_RI_BIT,2 ;;
2037 cmp.eq p6,p7=2,r26 ;;
2038 (p6) mov r26=0
2039 (p6) adds r30=16,r30
2040 (p7) adds r26=1,r26
2041 ;;
2042 dep r29=r26,r29,IA64_PSR_RI_BIT,2
2043 ;;
2044 mov cr.ipsr=r29
2045 mov cr.iip=r30
2046 mov pr=r31,-1 ;;
2047 rfi
2048 ;;
2049 END(hyper_ptc_ga)
2051 // recovery block for hyper_itc metaphysical memory lookup
2052 ENTRY(recover_and_dispatch_break_fault)
2053 #ifdef PERF_COUNTERS
2054 movl r21=PERFC(recover_to_break_fault);;
2055 ld4 r22=[r21];;
2056 adds r22=1,r22;;
2057 st4 [r21]=r22;;
2058 #endif
2059 mov b0=r29 ;;
2060 br.sptk.many dispatch_break_fault;;
2061 END(recover_and_dispatch_break_fault)
2063 // Registers at entry
2064 // r17 = break immediate (HYPERPRIVOP_ITC_D or I)
2065 // r18 == XSI_PSR_IC_OFS
2066 // r31 == pr
2067 ENTRY(hyper_itc)
2068 hyper_itc_i:
2069 // fall through, hyper_itc_d handles both i and d
2070 hyper_itc_d:
2071 #ifndef FAST_ITC
2072 br.sptk.many dispatch_break_fault ;;
2073 #else
2074 // ensure itir.ps >= xen's pagesize
2075 movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
2076 ld8 r27=[r27];;
2077 adds r22=IA64_VCPU_VHPT_PG_SHIFT_OFFSET,r27
2078 adds r23=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18 ;;
2079 ld1 r22=[r22]
2080 ld8 r23=[r23];;
2081 extr.u r24=r23,IA64_ITIR_PS,IA64_ITIR_PS_LEN;; // r24==logps
2082 cmp.gt p7,p0=r22,r24
2083 (p7) br.spnt.many dispatch_break_fault ;;
2084 adds r21=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;;
2085 ld8 r21=[r21];;
2086 // for now, punt on region0 inserts
2087 extr.u r21=r21,61,3;;
2088 cmp.eq p7,p0=r21,r0
2089 (p7) br.spnt.many dispatch_break_fault ;;
2090 adds r27=IA64_VCPU_DOMAIN_OFFSET,r27;;
2091 ld8 r27=[r27]
2092 // FIXME: is the global var dom0 always pinned? assume so for now
2093 movl r28=dom0;;
2094 ld8 r28=[r28];;
2095 // FIXME: for now, only handle dom0 (see lookup_domain_mpa below)
2096 cmp.ne p7,p0=r27,r28
2097 (p7) br.spnt.many dispatch_break_fault ;;
2098 #ifdef FAST_HYPERPRIVOP_CNT
2099 cmp.eq p6,p7=HYPERPRIVOP_ITC_D,r17;;
2100 (p6) movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_ITC_D)
2101 (p7) movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_ITC_I);;
2102 ld4 r21=[r20];;
2103 adds r21=1,r21;;
2104 st4 [r20]=r21;;
2105 #endif
2106 (p6) mov r17=2;;
2107 (p7) mov r17=3;;
2108 mov r29=b0 ;;
2109 movl r30=recover_and_dispatch_break_fault ;;
2110 mov r16=r8;;
2111 // fall through
2112 #endif
2113 END(hyper_itc)
2115 #if defined(FAST_ITC) || defined (FAST_TLB_MISS_REFLECT)
2117 // fast_insert(PSCB(ifa),r24=ps,r16=pte)
2118 // r16 == pte
2119 // r17 == bit0: 1=inst, 0=data; bit1: 1=itc, 0=vcpu_translate
2120 // r18 == XSI_PSR_IC_OFS
2121 // r24 == ps
2122 // r29 == saved value of b0 in case of recovery
2123 // r30 == recovery ip if failure occurs
2124 // r31 == pr
2125 ENTRY(fast_insert)
2126 // translate_domain_pte(r16=pteval,PSCB(ifa)=address,r24=itir)
2127 mov r19=1
2128 movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
2129 shl r20=r19,r24
2130 ld8 r27=[r27];;
2131 adds r23=IA64_VCPU_VHPT_PG_SHIFT_OFFSET,r27
2132 adds r20=-1,r20 // r20 == mask
2133 movl r19=_PAGE_PPN_MASK;;
2134 ld1 r23=[r23]
2135 mov r25=-1
2136 and r22=r16,r19;; // r22 == pteval & _PAGE_PPN_MASK
2137 andcm r19=r22,r20
2138 shl r25=r25,r23 // -1 << current->arch.vhpt_pg_shift
2139 adds r21=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;;
2140 ld8 r21=[r21];;
2141 and r20=r21,r20;;
2142 or r19=r19,r20;; // r19 == mpaddr
2143 // FIXME: for now, just do domain0 and skip mpaddr range checks
2144 and r20=r25,r19
2145 movl r21=PAGE_PHYS ;;
2146 or r20=r20,r21 ;; // r20==return value from lookup_domain_mpa
2147 // r16=pteval,r20=pteval2
2148 movl r19=_PAGE_PPN_MASK
2149 movl r21=_PAGE_PL_PRIV;;
2150 andcm r25=r16,r19 // r25==pteval & ~_PAGE_PPN_MASK
2151 and r22=r20,r19;;
2152 or r22=r22,r21;;
2153 or r22=r22,r25;; // r22==return value from translate_domain_pte
2154 // done with translate_domain_pte
2155 // now do vcpu_itc_no_srlz(vcpu,IorD,ifa,r22=pte,r16=mppte,r24=logps)
2156 // FIXME: for now, just domain0 and skip range check
2157 // psr.ic already cleared
2158 // NOTE: r24 still contains ps (from above)
2159 shladd r24=r24,2,r0;;
2160 mov cr.itir=r24
2161 adds r23=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;;
2162 ld8 r23=[r23];;
2163 mov cr.ifa=r23
2164 tbit.z p6,p7=r17,0;;
2165 (p6) itc.d r22
2166 (p7) itc.i r22;;
2167 dv_serialize_data
2168 // vhpt_insert(r23=vaddr,r22=pte,r24=logps<<2)
2169 thash r28=r23
2170 or r26=1,r22;;
2171 ttag r21=r23
2172 adds r25=8,r28
2173 mov r19=r28;;
2174 st8 [r25]=r24
2175 adds r20=16,r28;;
2176 st8 [r19]=r26
2177 st8 [r20]=r21;;
2178 // vcpu_set_tr_entry(trp,r22=pte|1,r24=itir,r23=ifa)
2179 // TR_ENTRY = {page_flags,itir,addr,rid}
2180 tbit.z p6,p7=r17,0
2181 adds r28=IA64_VCPU_STARTING_RID_OFFSET,r27
2182 (p6) adds r27=IA64_VCPU_DTLB_OFFSET,r27
2183 (p7) adds r27=IA64_VCPU_ITLB_OFFSET,r27;;
2184 st8 [r27]=r22,8;; // page_flags: already has pl >= 2 and p==1
2185 st8 [r27]=r24,8 // itir
2186 mov r19=-4096;;
2187 and r23=r23,r19;;
2188 st8 [r27]=r23,8 // ifa & ~0xfff
2189 adds r29 = XSI_RR0_OFS-XSI_PSR_IC_OFS,r18
2190 extr.u r25=r23,61,3;;
2191 shladd r29=r25,3,r29;;
2192 ld8 r29=[r29]
2193 movl r20=IA64_RR_RID_MASK;;
2194 and r29=r29,r20;;
2195 st8 [r27]=r29,-8;; // rid
2196 //if ps > 12
2197 cmp.eq p7,p0=12<<IA64_ITIR_PS,r24
2198 (p7) br.cond.sptk.many 1f;;
2199 // if (ps > 12) {
2200 // trp->ppn &= ~((1UL<<(ps-12))-1); trp->vadr &= ~((1UL<<ps)-1); }
2201 extr.u r29=r24,IA64_ITIR_PS,IA64_ITIR_PS_LEN
2202 mov r28=1;;
2203 shl r26=r28,r29;;
2204 adds r29=-12,r29;;
2205 shl r25=r28,r29;;
2206 mov r29=-1
2207 adds r26=-1,r26
2208 adds r25=-1,r25;;
2209 andcm r26=r29,r26 // ~((1UL<<ps)-1)
2210 andcm r25=r29,r25;; // ~((1UL<<(ps-12))-1)
2211 ld8 r29=[r27];;
2212 and r29=r29,r26;;
2213 st8 [r27]=r29,-16;;
2214 ld8 r29=[r27];;
2215 extr.u r28=r29,12,38;;
2216 movl r26=0xfffc000000000fff;;
2217 and r29=r29,r26
2218 and r28=r28,r25;;
2219 shl r28=r28,12;;
2220 or r29=r29,r28;;
2221 st8 [r27]=r29;;
2222 1: // done with vcpu_set_tr_entry
2223 //PSCBX(vcpu,i/dtlb_pte) = mp_pte
2224 movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
2225 ld8 r27=[r27];;
2226 tbit.z p6,p7=r17,0;;
2227 (p6) adds r27=IA64_VCPU_DTLB_PTE_OFFSET,r27
2228 (p7) adds r27=IA64_VCPU_ITLB_PTE_OFFSET,r27;;
2229 st8 [r27]=r16;;
2230 // done with vcpu_itc_no_srlz
2232 // if hyper_itc, increment to point to next instruction
2233 tbit.z p7,p0=r17,1
2234 (p7) br.cond.sptk.few no_inc_iip;;
2236 mov r29=cr.ipsr
2237 mov r30=cr.iip;;
2238 extr.u r26=r29,IA64_PSR_RI_BIT,2 ;;
2239 cmp.eq p6,p7=2,r26 ;;
2240 (p6) mov r26=0
2241 (p6) adds r30=16,r30
2242 (p7) adds r26=1,r26
2243 ;;
2244 dep r29=r26,r29,IA64_PSR_RI_BIT,2
2245 ;;
2246 mov cr.ipsr=r29
2247 mov cr.iip=r30;;
2249 no_inc_iip:
2250 mov pr=r31,-1 ;;
2251 rfi
2252 ;;
2253 END(fast_insert)
2254 #endif