ia64/xen-unstable

view xen/arch/ia64/xen/hyperprivop.S @ 9405:29dfadcc5029

[IA64] Followup to xen time cleanup

Clean up to xen time handler. Tristan #if 0 some code because it seems
redundant, which however is actually problematic logic as a reason for
an intermittent timer oops issue of dom0. So delete it now.

Also remove vcpu_wake, since wakeup current has nothing meaningful and
simply waste cpu cycle.

Signed-off-by: Kevin Tian <kevin.tian@intel.com>
author awilliam@xenbuild.aw
date Mon Mar 27 15:32:08 2006 -0700 (2006-03-27)
parents 2ff0ade58aab
children 27050b1390cf
line source
1 /*
2 * arch/ia64/kernel/hyperprivop.S
3 *
4 * Copyright (C) 2005 Hewlett-Packard Co
5 * Dan Magenheimer <dan.magenheimer@hp.com>
6 */
8 #include <linux/config.h>
10 #include <asm/asmmacro.h>
11 #include <asm/kregs.h>
12 #include <asm/offsets.h>
13 #include <asm/processor.h>
14 #include <asm/system.h>
15 #include <asm/debugger.h>
16 #include <public/arch-ia64.h>
19 #define _PAGE_PPN_MASK 0x0003fffffffff000 //asm/pgtable.h doesn't do assembly
20 #define PAGE_PHYS 0x0010000000000761 //__pgprot(__DIRTY_BITS|_PAGE_PL_2|_PAGE_AR_RWX)
21 #define _PAGE_PL_2 (2<<7)
23 #if 1 // change to 0 to turn off all fast paths
24 #define FAST_HYPERPRIVOPS
25 #define FAST_HYPERPRIVOP_CNT
26 #define FAST_REFLECT_CNT
27 //#define FAST_TICK // mostly working (unat problems) but default off for now
28 //#define FAST_TLB_MISS_REFLECT // mostly working but default off for now
29 //#define FAST_ITC // working but default off for now
30 #define FAST_BREAK
31 #define FAST_ACCESS_REFLECT
32 #define FAST_RFI
33 #define FAST_SSM_I
34 #define FAST_PTC_GA
35 #undef RFI_TO_INTERRUPT // not working yet
36 #endif
38 #define XEN_HYPER_RFI 0x1
39 #define XEN_HYPER_RSM_DT 0x2
40 #define XEN_HYPER_SSM_DT 0x3
41 #define XEN_HYPER_COVER 0x4
42 #define XEN_HYPER_ITC_D 0x5
43 #define XEN_HYPER_ITC_I 0x6
44 #define XEN_HYPER_SSM_I 0x7
45 #define XEN_HYPER_GET_IVR 0x8
46 #define XEN_HYPER_GET_TPR 0x9
47 #define XEN_HYPER_SET_TPR 0xa
48 #define XEN_HYPER_EOI 0xb
49 #define XEN_HYPER_SET_ITM 0xc
50 #define XEN_HYPER_THASH 0xd
51 #define XEN_HYPER_PTC_GA 0xe
52 #define XEN_HYPER_ITR_D 0xf
53 #define XEN_HYPER_GET_RR 0x10
54 #define XEN_HYPER_SET_RR 0x11
55 #define XEN_HYPER_SET_KR 0x12
57 #ifdef CONFIG_SMP
58 #warning "FIXME: ptc.ga instruction requires spinlock for SMP"
59 #undef FAST_PTC_GA
60 #endif
62 // FIXME: turn off for now... but NaTs may crash Xen so re-enable soon!
63 #define HANDLE_AR_UNAT
65 // FIXME: This is defined in include/asm-ia64/hw_irq.h but this
66 // doesn't appear to be include'able from assembly?
67 #define IA64_TIMER_VECTOR 0xef
69 // Should be included from common header file (also in process.c)
70 // NO PSR_CLR IS DIFFERENT! (CPL)
71 #define IA64_PSR_CPL1 (__IA64_UL(1) << IA64_PSR_CPL1_BIT)
72 #define IA64_PSR_CPL0 (__IA64_UL(1) << IA64_PSR_CPL0_BIT)
73 // note IA64_PSR_PK removed from following, why is this necessary?
74 #define DELIVER_PSR_SET (IA64_PSR_IC | IA64_PSR_I | \
75 IA64_PSR_DT | IA64_PSR_RT | IA64_PSR_CPL1 | \
76 IA64_PSR_IT | IA64_PSR_BN)
78 #define DELIVER_PSR_CLR (IA64_PSR_AC | IA64_PSR_DFL | IA64_PSR_DFH | \
79 IA64_PSR_SP | IA64_PSR_DI | IA64_PSR_SI | \
80 IA64_PSR_DB | IA64_PSR_LP | IA64_PSR_TB | \
81 IA64_PSR_MC | IA64_PSR_IS | \
82 IA64_PSR_ID | IA64_PSR_DA | IA64_PSR_DD | \
83 IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | IA64_PSR_IA)
85 // Note: not hand-scheduled for now
86 // Registers at entry
87 // r16 == cr.isr
88 // r17 == cr.iim
89 // r18 == XSI_PSR_IC_OFS
90 // r19 == vpsr.ic (low 32 bits) | vpsr.i (high 32 bits)
91 // r31 == pr
92 GLOBAL_ENTRY(fast_hyperprivop)
93 #ifndef FAST_HYPERPRIVOPS // see beginning of file
94 br.sptk.many dispatch_break_fault ;;
95 #endif
96 // HYPERPRIVOP_SSM_I?
97 // assumes domain interrupts pending, so just do it
98 cmp.eq p7,p6=XEN_HYPER_SSM_I,r17
99 (p7) br.sptk.many hyper_ssm_i;;
101 // FIXME. This algorithm gives up (goes to the slow path) if there
102 // are ANY interrupts pending, even if they are currently
103 // undeliverable. This should be improved later...
104 adds r20=XSI_PEND_OFS-XSI_PSR_IC_OFS,r18 ;;
105 ld4 r20=[r20] ;;
106 cmp.eq p7,p0=r0,r20
107 (p7) br.cond.sptk.many 1f
108 movl r20=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
109 ld8 r20=[r20];;
110 adds r21=IA64_VCPU_IRR0_OFFSET,r20;
111 adds r22=IA64_VCPU_IRR0_OFFSET+8,r20;;
112 ld8 r23=[r21],16; ld8 r24=[r22],16;;
113 ld8 r21=[r21]; ld8 r22=[r22];;
114 or r23=r23,r24; or r21=r21,r22;;
115 or r20=r23,r21;;
116 1: // when we get to here r20=~=interrupts pending
118 // HYPERPRIVOP_RFI?
119 cmp.eq p7,p6=XEN_HYPER_RFI,r17
120 (p7) br.sptk.many hyper_rfi;;
122 // HYPERPRIVOP_GET_IVR?
123 cmp.eq p7,p6=XEN_HYPER_GET_IVR,r17
124 (p7) br.sptk.many hyper_get_ivr;;
126 cmp.ne p7,p0=r20,r0
127 (p7) br.spnt.many dispatch_break_fault ;;
129 // HYPERPRIVOP_COVER?
130 cmp.eq p7,p6=XEN_HYPER_COVER,r17
131 (p7) br.sptk.many hyper_cover;;
133 // HYPERPRIVOP_SSM_DT?
134 cmp.eq p7,p6=XEN_HYPER_SSM_DT,r17
135 (p7) br.sptk.many hyper_ssm_dt;;
137 // HYPERPRIVOP_RSM_DT?
138 cmp.eq p7,p6=XEN_HYPER_RSM_DT,r17
139 (p7) br.sptk.many hyper_rsm_dt;;
141 // HYPERPRIVOP_GET_TPR?
142 cmp.eq p7,p6=XEN_HYPER_GET_TPR,r17
143 (p7) br.sptk.many hyper_get_tpr;;
145 // HYPERPRIVOP_SET_TPR?
146 cmp.eq p7,p6=XEN_HYPER_SET_TPR,r17
147 (p7) br.sptk.many hyper_set_tpr;;
149 // HYPERPRIVOP_EOI?
150 cmp.eq p7,p6=XEN_HYPER_EOI,r17
151 (p7) br.sptk.many hyper_eoi;;
153 // HYPERPRIVOP_SET_ITM?
154 cmp.eq p7,p6=XEN_HYPER_SET_ITM,r17
155 (p7) br.sptk.many hyper_set_itm;;
157 // HYPERPRIVOP_SET_RR?
158 cmp.eq p7,p6=XEN_HYPER_SET_RR,r17
159 (p7) br.sptk.many hyper_set_rr;;
161 // HYPERPRIVOP_GET_RR?
162 cmp.eq p7,p6=XEN_HYPER_GET_RR,r17
163 (p7) br.sptk.many hyper_get_rr;;
165 // HYPERPRIVOP_PTC_GA?
166 cmp.eq p7,p6=XEN_HYPER_PTC_GA,r17
167 (p7) br.sptk.many hyper_ptc_ga;;
169 // HYPERPRIVOP_ITC_D?
170 cmp.eq p7,p6=XEN_HYPER_ITC_D,r17
171 (p7) br.sptk.many hyper_itc_d;;
173 // HYPERPRIVOP_ITC_I?
174 cmp.eq p7,p6=XEN_HYPER_ITC_I,r17
175 (p7) br.sptk.many hyper_itc_i;;
177 // HYPERPRIVOP_THASH?
178 cmp.eq p7,p6=XEN_HYPER_THASH,r17
179 (p7) br.sptk.many hyper_thash;;
181 // HYPERPRIVOP_SET_KR?
182 cmp.eq p7,p6=XEN_HYPER_SET_KR,r17
183 (p7) br.sptk.many hyper_set_kr;;
185 // if not one of the above, give up for now and do it the slow way
186 br.sptk.many dispatch_break_fault ;;
189 // give up for now if: ipsr.be==1, ipsr.pp==1
190 // from reflect_interruption, don't need to:
191 // - printf first extint (debug only)
192 // - check for interrupt collection enabled (routine will force on)
193 // - set ifa (not valid for extint)
194 // - set iha (not valid for extint)
195 // - set itir (not valid for extint)
196 // DO need to
197 // - increment the HYPER_SSM_I fast_hyperprivop counter
198 // - set shared_mem iip to instruction after HYPER_SSM_I
199 // - set cr.iip to guest iva+0x3000
200 // - set shared_mem ipsr to [vcpu_get_ipsr_int_state]
201 // be = pp = bn = 0; dt = it = rt = 1; cpl = 3 or 0;
202 // i = shared_mem interrupt_delivery_enabled
203 // ic = shared_mem interrupt_collection_enabled
204 // ri = instruction after HYPER_SSM_I
205 // all other bits unchanged from real cr.ipsr
206 // - set cr.ipsr (DELIVER_PSR_SET/CLEAR, don't forget cpl!)
207 // - set shared_mem isr: isr.ei to instr following HYPER_SSM_I
208 // and isr.ri to cr.isr.ri (all other bits zero)
209 // - cover and set shared_mem precover_ifs to cr.ifs
210 // ^^^ MISSED THIS FOR fast_break??
211 // - set shared_mem ifs and incomplete_regframe to 0
212 // - set shared_mem interrupt_delivery_enabled to 0
213 // - set shared_mem interrupt_collection_enabled to 0
214 // - set r31 to SHAREDINFO_ADDR
215 // - virtual bank switch 0
216 // maybe implement later
217 // - verify that there really IS a deliverable interrupt pending
218 // - set shared_mem iva
219 // needs to be done but not implemented (in reflect_interruption)
220 // - set shared_mem iipa
221 // don't know for sure
222 // - set shared_mem unat
223 // r16 == cr.isr
224 // r17 == cr.iim
225 // r18 == XSI_PSR_IC
226 // r19 == vpsr.ic (low 32 bits) | vpsr.i (high 32 bits)
227 // r31 == pr
228 ENTRY(hyper_ssm_i)
229 #ifndef FAST_SSM_I
230 br.spnt.few dispatch_break_fault ;;
231 #endif
232 // give up for now if: ipsr.be==1, ipsr.pp==1
233 mov r30=cr.ipsr;;
234 mov r29=cr.iip;;
235 extr.u r21=r30,IA64_PSR_BE_BIT,1 ;;
236 cmp.ne p7,p0=r21,r0
237 (p7) br.sptk.many dispatch_break_fault ;;
238 extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
239 cmp.ne p7,p0=r21,r0
240 (p7) br.sptk.many dispatch_break_fault ;;
241 #ifdef FAST_HYPERPRIVOP_CNT
242 movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SSM_I);;
243 ld8 r21=[r20];;
244 adds r21=1,r21;;
245 st8 [r20]=r21;;
246 #endif
247 // set shared_mem iip to instruction after HYPER_SSM_I
248 extr.u r20=r30,41,2 ;;
249 cmp.eq p6,p7=2,r20 ;;
250 (p6) mov r20=0
251 (p6) adds r29=16,r29
252 (p7) adds r20=1,r20 ;;
253 dep r30=r20,r30,41,2;; // adjust cr.ipsr.ri but don't save yet
254 adds r21=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;;
255 st8 [r21]=r29 ;;
256 // set shared_mem isr
257 extr.u r16=r16,38,1;; // grab cr.isr.ir bit
258 dep r16=r16,r0,38,1 ;; // insert into cr.isr (rest of bits zero)
259 dep r16=r20,r16,41,2 ;; // deposit cr.isr.ri
260 adds r21=XSI_ISR_OFS-XSI_PSR_IC_OFS,r18 ;;
261 st8 [r21]=r16 ;;
262 // set cr.ipsr
263 mov r29=r30 ;;
264 movl r28=DELIVER_PSR_SET;;
265 movl r27=~DELIVER_PSR_CLR;;
266 or r29=r29,r28;;
267 and r29=r29,r27;;
268 mov cr.ipsr=r29;;
269 // set shared_mem ipsr (from ipsr in r30 with ipsr.ri already set)
270 extr.u r29=r30,IA64_PSR_CPL0_BIT,2;;
271 cmp.eq p6,p7=3,r29;;
272 (p6) dep r30=-1,r30,IA64_PSR_CPL0_BIT,2
273 (p7) dep r30=0,r30,IA64_PSR_CPL0_BIT,2
274 ;;
275 // FOR SSM_I ONLY, also turn on psr.i and psr.ic
276 movl r28=(IA64_PSR_DT|IA64_PSR_IT|IA64_PSR_RT|IA64_PSR_I|IA64_PSR_IC);;
277 // movl r27=~(IA64_PSR_BE|IA64_PSR_PP|IA64_PSR_BN);;
278 movl r27=~(IA64_PSR_BE|IA64_PSR_BN);;
279 or r30=r30,r28;;
280 and r30=r30,r27;;
281 adds r21=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;;
282 st8 [r21]=r30 ;;
283 // set shared_mem interrupt_delivery_enabled to 0
284 // set shared_mem interrupt_collection_enabled to 0
285 st8 [r18]=r0;;
286 // cover and set shared_mem precover_ifs to cr.ifs
287 // set shared_mem ifs and incomplete_regframe to 0
288 cover ;;
289 mov r20=cr.ifs;;
290 adds r21=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;;
291 st4 [r21]=r0 ;;
292 adds r21=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
293 st8 [r21]=r0 ;;
294 adds r21=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
295 st8 [r21]=r20 ;;
296 // leave cr.ifs alone for later rfi
297 // set iip to go to domain IVA break instruction vector
298 movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
299 ld8 r22=[r22];;
300 adds r22=IA64_VCPU_IVA_OFFSET,r22;;
301 ld8 r23=[r22];;
302 movl r24=0x3000;;
303 add r24=r24,r23;;
304 mov cr.iip=r24;;
305 // OK, now all set to go except for switch to virtual bank0
306 mov r30=r2; mov r29=r3;;
307 adds r2=XSI_BANK1_OFS-XSI_PSR_IC_OFS,r18;
308 adds r3=(XSI_BANK1_OFS+8)-XSI_PSR_IC_OFS,r18;;
309 bsw.1;;
310 // FIXME?: ar.unat is not really handled correctly,
311 // but may not matter if the OS is NaT-clean
312 .mem.offset 0,0; st8.spill [r2]=r16,16;
313 .mem.offset 8,0; st8.spill [r3]=r17,16 ;;
314 .mem.offset 0,0; st8.spill [r2]=r18,16;
315 .mem.offset 8,0; st8.spill [r3]=r19,16 ;;
316 .mem.offset 0,0; st8.spill [r2]=r20,16;
317 .mem.offset 8,0; st8.spill [r3]=r21,16 ;;
318 .mem.offset 0,0; st8.spill [r2]=r22,16;
319 .mem.offset 8,0; st8.spill [r3]=r23,16 ;;
320 .mem.offset 0,0; st8.spill [r2]=r24,16;
321 .mem.offset 8,0; st8.spill [r3]=r25,16 ;;
322 .mem.offset 0,0; st8.spill [r2]=r26,16;
323 .mem.offset 8,0; st8.spill [r3]=r27,16 ;;
324 .mem.offset 0,0; st8.spill [r2]=r28,16;
325 .mem.offset 8,0; st8.spill [r3]=r29,16 ;;
326 .mem.offset 0,0; st8.spill [r2]=r30,16;
327 .mem.offset 8,0; st8.spill [r3]=r31,16 ;;
328 movl r31=XSI_IPSR;;
329 bsw.0 ;;
330 mov r2=r30; mov r3=r29;;
331 adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
332 st4 [r20]=r0 ;;
333 mov pr=r31,-1 ;;
334 rfi
335 ;;
337 // reflect domain clock interrupt
338 // r31 == pr
339 // r30 == cr.ivr
340 // r29 == rp
341 GLOBAL_ENTRY(fast_tick_reflect)
342 #ifndef FAST_TICK // see beginning of file
343 br.cond.sptk.many rp;;
344 #endif
345 mov r28=IA64_TIMER_VECTOR;;
346 cmp.ne p6,p0=r28,r30
347 (p6) br.cond.spnt.few rp;;
348 movl r20=THIS_CPU(cpu_info)+IA64_CPUINFO_ITM_NEXT_OFFSET;;
349 ld8 r26=[r20];;
350 mov r27=ar.itc;;
351 adds r27=200,r27;; // safety margin
352 cmp.ltu p6,p0=r26,r27
353 (p6) br.cond.spnt.few rp;;
354 mov r17=cr.ipsr;;
355 // slow path if: ipsr.be==1, ipsr.pp==1
356 extr.u r21=r17,IA64_PSR_BE_BIT,1 ;;
357 cmp.ne p6,p0=r21,r0
358 (p6) br.cond.spnt.few rp;;
359 extr.u r21=r17,IA64_PSR_PP_BIT,1 ;;
360 cmp.ne p6,p0=r21,r0
361 (p6) br.cond.spnt.few rp;;
362 // definitely have a domain tick
363 mov cr.eoi=r0;;
364 mov rp=r29;;
365 mov cr.itm=r26;; // ensure next tick
366 #ifdef FAST_REFLECT_CNT
367 movl r20=fast_reflect_count+((0x3000>>8)*8);;
368 ld8 r21=[r20];;
369 adds r21=1,r21;;
370 st8 [r20]=r21;;
371 #endif
372 // vcpu_pend_timer(current)
373 movl r18=XSI_PSR_IC;;
374 adds r20=XSI_ITV_OFS-XSI_PSR_IC_OFS,r18 ;;
375 ld8 r20=[r20];;
376 cmp.eq p6,p0=r20,r0 // if cr.itv==0 done
377 (p6) br.cond.spnt.few fast_tick_reflect_done;;
378 tbit.nz p6,p0=r20,16;; // check itv.m (discard) bit
379 (p6) br.cond.spnt.few fast_tick_reflect_done;;
380 extr.u r27=r20,0,6 // r27 has low 6 bits of itv.vector
381 extr.u r26=r20,6,2;; // r26 has irr index of itv.vector
382 movl r19=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
383 ld8 r19=[r19];;
384 adds r22=IA64_VCPU_DOMAIN_ITM_LAST_OFFSET,r19
385 adds r23=IA64_VCPU_DOMAIN_ITM_OFFSET,r19;;
386 ld8 r24=[r22];;
387 ld8 r23=[r23];;
388 cmp.eq p6,p0=r23,r24 // skip if this tick already delivered
389 (p6) br.cond.spnt.few fast_tick_reflect_done;;
390 // set irr bit
391 adds r21=IA64_VCPU_IRR0_OFFSET,r19;
392 shl r26=r26,3;;
393 add r21=r21,r26;;
394 mov r25=1;;
395 shl r22=r25,r27;;
396 ld8 r23=[r21];;
397 or r22=r22,r23;;
398 st8 [r21]=r22;;
399 // set PSCB(pending_interruption)!
400 adds r20=XSI_PEND_OFS-XSI_PSR_IC_OFS,r18 ;;
401 st4 [r20]=r25;;
403 // if interrupted at pl0, we're done
404 extr.u r16=r17,IA64_PSR_CPL0_BIT,2;;
405 cmp.eq p6,p0=r16,r0;;
406 (p6) br.cond.spnt.few fast_tick_reflect_done;;
407 // if guest vpsr.i is off, we're done
408 adds r21=XSI_PSR_I_OFS-XSI_PSR_IC_OFS,r18 ;;
409 ld4 r21=[r21];;
410 cmp.eq p6,p0=r21,r0
411 (p6) br.cond.spnt.few fast_tick_reflect_done;;
413 // OK, we have a clock tick to deliver to the active domain!
414 // so deliver to iva+0x3000
415 // r17 == cr.ipsr
416 // r18 == XSI_PSR_IC
417 // r19 == IA64_KR(CURRENT)
418 // r31 == pr
419 mov r16=cr.isr;;
420 mov r29=cr.iip;;
421 adds r21=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;;
422 st8 [r21]=r29 ;;
423 // set shared_mem isr
424 extr.u r16=r16,38,1;; // grab cr.isr.ir bit
425 dep r16=r16,r0,38,1 ;; // insert into cr.isr (rest of bits zero)
426 extr.u r20=r17,41,2 ;; // get ipsr.ri
427 dep r16=r20,r16,41,2 ;; // deposit cr.isr.ei
428 adds r21=XSI_ISR_OFS-XSI_PSR_IC_OFS,r18 ;;
429 st8 [r21]=r16 ;;
430 // set cr.ipsr (make sure cpl==2!)
431 mov r29=r17 ;;
432 movl r28=DELIVER_PSR_SET;;
433 movl r27=~(DELIVER_PSR_CLR|IA64_PSR_CPL0);;
434 or r29=r29,r28;;
435 and r29=r29,r27;;
436 mov cr.ipsr=r29;;
437 // set shared_mem ipsr (from ipsr in r17 with ipsr.ri already set)
438 extr.u r29=r17,IA64_PSR_CPL0_BIT,2;;
439 cmp.eq p6,p7=3,r29;;
440 (p6) dep r17=-1,r17,IA64_PSR_CPL0_BIT,2
441 (p7) dep r17=0,r17,IA64_PSR_CPL0_BIT,2
442 ;;
443 movl r28=(IA64_PSR_DT|IA64_PSR_IT|IA64_PSR_RT);;
444 movl r27=~(IA64_PSR_BE|IA64_PSR_PP|IA64_PSR_BN|IA64_PSR_I|IA64_PSR_IC);;
445 dep r21=-1,r21,IA64_PSR_CPL1_BIT,1 ;;
446 or r17=r17,r28;;
447 and r17=r17,r27;;
448 ld4 r16=[r18],4;;
449 cmp.ne p6,p0=r16,r0;;
450 (p6) dep r17=-1,r17,IA64_PSR_IC_BIT,1 ;;
451 ld4 r16=[r18],-4;;
452 cmp.ne p6,p0=r16,r0;;
453 (p6) dep r17=-1,r17,IA64_PSR_I_BIT,1 ;;
454 adds r21=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;;
455 st8 [r21]=r17 ;;
456 // set shared_mem interrupt_delivery_enabled to 0
457 // set shared_mem interrupt_collection_enabled to 0
458 st8 [r18]=r0;;
459 // cover and set shared_mem precover_ifs to cr.ifs
460 // set shared_mem ifs and incomplete_regframe to 0
461 cover ;;
462 mov r20=cr.ifs;;
463 adds r21=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;;
464 st4 [r21]=r0 ;;
465 adds r21=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
466 st8 [r21]=r0 ;;
467 adds r21=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
468 st8 [r21]=r20 ;;
469 // leave cr.ifs alone for later rfi
470 // set iip to go to domain IVA break instruction vector
471 adds r22=IA64_VCPU_IVA_OFFSET,r19;;
472 ld8 r23=[r22];;
473 movl r24=0x3000;;
474 add r24=r24,r23;;
475 mov cr.iip=r24;;
476 // OK, now all set to go except for switch to virtual bank0
477 mov r30=r2; mov r29=r3;;
478 #ifdef HANDLE_AR_UNAT
479 mov r28=ar.unat;
480 #endif
481 adds r2=XSI_BANK1_OFS-XSI_PSR_IC_OFS,r18;
482 adds r3=(XSI_BANK1_OFS+8)-XSI_PSR_IC_OFS,r18;;
483 bsw.1;;
484 .mem.offset 0,0; st8.spill [r2]=r16,16;
485 .mem.offset 8,0; st8.spill [r3]=r17,16 ;;
486 .mem.offset 0,0; st8.spill [r2]=r18,16;
487 .mem.offset 8,0; st8.spill [r3]=r19,16 ;;
488 .mem.offset 0,0; st8.spill [r2]=r20,16;
489 .mem.offset 8,0; st8.spill [r3]=r21,16 ;;
490 .mem.offset 0,0; st8.spill [r2]=r22,16;
491 .mem.offset 8,0; st8.spill [r3]=r23,16 ;;
492 .mem.offset 0,0; st8.spill [r2]=r24,16;
493 .mem.offset 8,0; st8.spill [r3]=r25,16 ;;
494 .mem.offset 0,0; st8.spill [r2]=r26,16;
495 .mem.offset 8,0; st8.spill [r3]=r27,16 ;;
496 .mem.offset 0,0; st8.spill [r2]=r28,16;
497 .mem.offset 8,0; st8.spill [r3]=r29,16 ;;
498 .mem.offset 0,0; st8.spill [r2]=r30,16;
499 .mem.offset 8,0; st8.spill [r3]=r31,16 ;;
500 #ifdef HANDLE_AR_UNAT
501 // r16~r23 are preserved regsin bank0 regs, we need to restore them,
502 // r24~r31 are scratch regs, we don't need to handle NaT bit,
503 // because OS handler must assign it before access it
504 ld8 r16=[r2],16;
505 ld8 r17=[r3],16;;
506 ld8 r18=[r2],16;
507 ld8 r19=[r3],16;;
508 ld8 r20=[r2],16;
509 ld8 r21=[r3],16;;
510 ld8 r22=[r2],16;
511 ld8 r23=[r3],16;;
512 #endif
513 movl r31=XSI_IPSR;;
514 bsw.0 ;;
515 mov r24=ar.unat;
516 mov r2=r30; mov r3=r29;;
517 #ifdef HANDLE_AR_UNAT
518 mov ar.unat=r28;
519 #endif
520 adds r25=XSI_B1NATS_OFS-XSI_PSR_IC_OFS,r18 ;
521 adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
522 st8 [r25]=r24;
523 st4 [r20]=r0 ;;
524 fast_tick_reflect_done:
525 mov pr=r31,-1 ;;
526 rfi
527 END(fast_tick_reflect)
529 // reflect domain breaks directly to domain
530 // r16 == cr.isr
531 // r17 == cr.iim
532 // r18 == XSI_PSR_IC
533 // r19 == vpsr.ic (low 32 bits) | vpsr.i (high 32 bits)
534 // r31 == pr
535 GLOBAL_ENTRY(fast_break_reflect)
536 #ifndef FAST_BREAK // see beginning of file
537 br.sptk.many dispatch_break_fault ;;
538 #endif
539 mov r30=cr.ipsr;;
540 mov r29=cr.iip;;
541 extr.u r21=r30,IA64_PSR_BE_BIT,1 ;;
542 cmp.ne p7,p0=r21,r0 ;;
543 (p7) br.spnt.few dispatch_break_fault ;;
544 extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
545 cmp.ne p7,p0=r21,r0 ;;
546 (p7) br.spnt.few dispatch_break_fault ;;
547 movl r20=IA64_PSR_CPL ;;
548 and r22=r20,r30 ;;
549 cmp.ne p7,p0=r22,r0
550 (p7) br.spnt.many 1f ;;
551 cmp.eq p7,p0=r17,r0
552 (p7) br.spnt.few dispatch_break_fault ;;
553 #ifdef CRASH_DEBUG
554 movl r21=CDB_BREAK_NUM ;;
555 cmp.eq p7,p0=r17,r21
556 (p7) br.spnt.few dispatch_break_fault ;;
557 #endif
558 1:
559 #if 1 /* special handling in case running on simulator */
560 movl r20=first_break;;
561 ld4 r23=[r20];;
562 movl r21=0x80001;
563 movl r22=0x80002;;
564 cmp.ne p7,p0=r23,r0;;
565 (p7) br.spnt.few dispatch_break_fault ;;
566 cmp.eq p7,p0=r21,r17;
567 (p7) br.spnt.few dispatch_break_fault ;;
568 cmp.eq p7,p0=r22,r17;
569 (p7) br.spnt.few dispatch_break_fault ;;
570 #endif
571 movl r20=0x2c00;
572 // save iim in shared_info
573 adds r21=XSI_IIM_OFS-XSI_PSR_IC_OFS,r18 ;;
574 st8 [r21]=r17;;
575 // fall through
578 // reflect to domain ivt+r20
579 // sets up isr,iip,ipsr,ifs (FIXME: do iipa too)
580 // r16 == cr.isr
581 // r18 == XSI_PSR_IC
582 // r20 == offset into ivt
583 // r29 == iip
584 // r30 == ipsr
585 // r31 == pr
586 ENTRY(fast_reflect)
587 #ifdef FAST_REFLECT_CNT
588 movl r22=fast_reflect_count;
589 shr r23=r20,5;;
590 add r22=r22,r23;;
591 ld8 r21=[r22];;
592 adds r21=1,r21;;
593 st8 [r22]=r21;;
594 #endif
595 // save iip in shared_info (DON'T POINT TO NEXT INSTRUCTION!)
596 adds r21=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;;
597 st8 [r21]=r29;;
598 // set shared_mem isr
599 adds r21=XSI_ISR_OFS-XSI_PSR_IC_OFS,r18 ;;
600 st8 [r21]=r16 ;;
601 // set cr.ipsr
602 mov r29=r30 ;;
603 movl r28=DELIVER_PSR_SET;;
604 movl r27=~(DELIVER_PSR_CLR|IA64_PSR_CPL0);;
605 or r29=r29,r28;;
606 and r29=r29,r27;;
607 mov cr.ipsr=r29;;
608 // set shared_mem ipsr (from ipsr in r30 with ipsr.ri already set)
609 extr.u r29=r30,IA64_PSR_CPL0_BIT,2;;
610 cmp.eq p6,p7=3,r29;;
611 (p6) dep r30=-1,r30,IA64_PSR_CPL0_BIT,2
612 (p7) dep r30=0,r30,IA64_PSR_CPL0_BIT,2
613 ;;
614 movl r28=(IA64_PSR_DT|IA64_PSR_IT|IA64_PSR_RT);;
615 movl r27=~(IA64_PSR_BE|IA64_PSR_PP|IA64_PSR_BN);;
616 or r30=r30,r28;;
617 and r30=r30,r27;;
618 // also set shared_mem ipsr.i and ipsr.ic appropriately
619 ld8 r24=[r18];;
620 extr.u r22=r24,32,32
621 cmp4.eq p6,p7=r24,r0;;
622 (p6) dep r30=0,r30,IA64_PSR_IC_BIT,1
623 (p7) dep r30=-1,r30,IA64_PSR_IC_BIT,1 ;;
624 cmp4.eq p6,p7=r22,r0;;
625 (p6) dep r30=0,r30,IA64_PSR_I_BIT,1
626 (p7) dep r30=-1,r30,IA64_PSR_I_BIT,1 ;;
627 adds r21=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;;
628 st8 [r21]=r30 ;;
629 // set shared_mem interrupt_delivery_enabled to 0
630 // set shared_mem interrupt_collection_enabled to 0
631 st8 [r18]=r0;;
632 // cover and set shared_mem precover_ifs to cr.ifs
633 // set shared_mem ifs and incomplete_regframe to 0
634 cover ;;
635 mov r24=cr.ifs;;
636 adds r21=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;;
637 st4 [r21]=r0 ;;
638 adds r21=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
639 st8 [r21]=r0 ;;
640 adds r21=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
641 st8 [r21]=r24 ;;
642 // vpsr.i = vpsr.ic = 0 on delivery of interruption
643 st8 [r18]=r0;;
644 // FIXME: need to save iipa and isr to be arch-compliant
645 // set iip to go to domain IVA break instruction vector
646 movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
647 ld8 r22=[r22];;
648 adds r22=IA64_VCPU_IVA_OFFSET,r22;;
649 ld8 r23=[r22];;
650 add r20=r20,r23;;
651 mov cr.iip=r20;;
652 // OK, now all set to go except for switch to virtual bank0
653 mov r30=r2; mov r29=r3;;
654 #ifdef HANDLE_AR_UNAT
655 mov r28=ar.unat;
656 #endif
657 adds r2=XSI_BANK1_OFS-XSI_PSR_IC_OFS,r18;
658 adds r3=(XSI_BANK1_OFS+8)-XSI_PSR_IC_OFS,r18;;
659 bsw.1;;
660 .mem.offset 0,0; st8.spill [r2]=r16,16;
661 .mem.offset 8,0; st8.spill [r3]=r17,16 ;;
662 .mem.offset 0,0; st8.spill [r2]=r18,16;
663 .mem.offset 8,0; st8.spill [r3]=r19,16 ;;
664 .mem.offset 0,0; st8.spill [r2]=r20,16;
665 .mem.offset 8,0; st8.spill [r3]=r21,16 ;;
666 .mem.offset 0,0; st8.spill [r2]=r22,16;
667 .mem.offset 8,0; st8.spill [r3]=r23,16 ;;
668 .mem.offset 0,0; st8.spill [r2]=r24,16;
669 .mem.offset 8,0; st8.spill [r3]=r25,16 ;;
670 .mem.offset 0,0; st8.spill [r2]=r26,16;
671 .mem.offset 8,0; st8.spill [r3]=r27,16 ;;
672 .mem.offset 0,0; st8.spill [r2]=r28,16;
673 .mem.offset 8,0; st8.spill [r3]=r29,16 ;;
674 .mem.offset 0,0; st8.spill [r2]=r30,16;
675 .mem.offset 8,0; st8.spill [r3]=r31,16 ;;
676 #ifdef HANDLE_AR_UNAT
677 // r16~r23 are preserved regsin bank0 regs, we need to restore them,
678 // r24~r31 are scratch regs, we don't need to handle NaT bit,
679 // because OS handler must assign it before access it
680 ld8 r16=[r2],16;
681 ld8 r17=[r3],16;;
682 ld8 r18=[r2],16;
683 ld8 r19=[r3],16;;
684 ld8 r20=[r2],16;
685 ld8 r21=[r3],16;;
686 ld8 r22=[r2],16;
687 ld8 r23=[r3],16;;
688 #endif
689 movl r31=XSI_IPSR;;
690 bsw.0 ;;
691 mov r24=ar.unat;
692 mov r2=r30; mov r3=r29;;
693 #ifdef HANDLE_AR_UNAT
694 mov ar.unat=r28;
695 #endif
696 adds r25=XSI_B1NATS_OFS-XSI_PSR_IC_OFS,r18 ;
697 adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
698 st8 [r25]=r24;
699 st4 [r20]=r0 ;;
700 mov pr=r31,-1 ;;
701 rfi
702 ;;
704 // reflect access faults (0x2400,0x2800,0x5300) directly to domain
705 // r16 == isr
706 // r17 == ifa
707 // r19 == reflect number (only pass-thru to dispatch_reflection)
708 // r20 == offset into ivt
709 // r31 == pr
710 GLOBAL_ENTRY(fast_access_reflect)
711 #ifndef FAST_ACCESS_REFLECT // see beginning of file
712 br.spnt.few dispatch_reflection ;;
713 #endif
714 mov r30=cr.ipsr;;
715 mov r29=cr.iip;;
716 extr.u r21=r30,IA64_PSR_BE_BIT,1 ;;
717 cmp.ne p7,p0=r21,r0
718 (p7) br.spnt.few dispatch_reflection ;;
719 extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
720 cmp.ne p7,p0=r21,r0
721 (p7) br.spnt.few dispatch_reflection ;;
722 extr.u r21=r30,IA64_PSR_CPL0_BIT,2 ;;
723 cmp.eq p7,p0=r21,r0
724 (p7) br.spnt.few dispatch_reflection ;;
725 movl r18=XSI_PSR_IC;;
726 ld8 r21=[r18];;
727 cmp.eq p7,p0=r0,r21
728 (p7) br.spnt.few dispatch_reflection ;;
729 // set shared_mem ifa, FIXME: should we validate it?
730 mov r17=cr.ifa;;
731 adds r21=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;;
732 st8 [r21]=r17 ;;
733 // get rr[ifa] and save to itir in shared memory (extra bits ignored)
734 shr.u r22=r17,61
735 adds r23=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18
736 adds r21=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18 ;;
737 shladd r22=r22,3,r21;;
738 ld8 r22=[r22];;
739 st8 [r23]=r22;;
740 br.cond.sptk.many fast_reflect;;
742 // when we get to here, VHPT_CCHAIN_LOOKUP has failed and everything
743 // is as it was at the time of original miss. We want to preserve that
744 // so if we get a nested fault, we can just branch to page_fault
745 GLOBAL_ENTRY(fast_tlb_miss_reflect)
746 #ifndef FAST_TLB_MISS_REFLECT // see beginning of file
747 br.spnt.few page_fault ;;
748 #endif
749 mov r31=pr
750 mov r30=cr.ipsr
751 mov r29=cr.iip
752 mov r16=cr.isr
753 mov r17=cr.ifa;;
754 // for now, always take slow path for region 0 (e.g. metaphys mode)
755 extr.u r21=r17,61,3;;
756 cmp.eq p7,p0=r0,r21
757 (p7) br.spnt.few page_fault ;;
758 // always take slow path for PL0 (e.g. __copy_from_user)
759 extr.u r21=r30,IA64_PSR_CPL0_BIT,2 ;;
760 cmp.eq p7,p0=r21,r0
761 (p7) br.spnt.few page_fault ;;
762 // slow path if strange ipsr or isr bits set
763 extr.u r21=r30,IA64_PSR_BE_BIT,1 ;;
764 cmp.ne p7,p0=r21,r0
765 (p7) br.spnt.few page_fault ;;
766 extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
767 cmp.ne p7,p0=r21,r0
768 (p7) br.spnt.few page_fault ;;
769 movl r21=IA64_ISR_IR|IA64_ISR_SP|IA64_ISR_NA ;;
770 and r21=r16,r21;;
771 cmp.ne p7,p0=r0,r21
772 (p7) br.spnt.few page_fault ;;
773 // also take slow path if virtual psr.ic=0
774 movl r18=XSI_PSR_IC;;
775 ld4 r21=[r18];;
776 cmp.eq p7,p0=r0,r21
777 (p7) br.spnt.few page_fault ;;
778 // OK, if we get to here, we are doing a fast vcpu_translate. Need to:
779 // 1) look in the virtual TR's (pinned), if not there
780 // 2) look in the 1-entry TLB (pinned), if not there
781 // 3) check the domain VHPT (NOT pinned, accesses domain memory!)
782 // If we find it in any of these places, we need to effectively do
783 // a hyper_itc_i/d
785 // short-term hack for now, if in region 5-7, take slow path
786 // since all Linux TRs are in region 5 or 7, we need not check TRs
787 extr.u r21=r17,61,3;;
788 cmp.le p7,p0=5,r21
789 (p7) br.spnt.few page_fault ;;
790 fast_tlb_no_tr_match:
791 movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
792 ld8 r27=[r27];;
793 tbit.nz p6,p7=r16,IA64_ISR_X_BIT;;
794 (p6) adds r25=IA64_VCPU_ITLB_OFFSET,r27;;
795 (p7) adds r25=IA64_VCPU_DTLB_OFFSET,r27;;
796 ld8 r20=[r25],8;;
797 tbit.z p7,p0=r20,0;; // present?
798 (p7) br.cond.spnt.few 1f;;
799 // if ifa is in range of tlb, don't bother to check rid, go slow path
800 ld8 r21=[r25],8;;
801 mov r23=1
802 extr.u r21=r21,2,6;;
803 shl r22=r23,r21
804 ld8 r21=[r25],8;;
805 cmp.ltu p7,p0=r17,r21
806 (p7) br.cond.sptk.many 1f;
807 add r21=r22,r21;;
808 cmp.ltu p7,p0=r17,r21
809 (p7) br.cond.spnt.few page_fault;;
811 1: // check the guest VHPT
812 adds r19 = XSI_PTA_OFS-XSI_PSR_IC_OFS, r18;;
813 ld8 r19=[r19];;
814 tbit.nz p7,p0=r19,IA64_PTA_VF_BIT;; // long format VHPT
815 (p7) br.cond.spnt.few page_fault;;
816 // if (!rr.ve || !(pta & IA64_PTA_VE)) take slow way for now
817 // FIXME: later, we deliver an alt_d/i vector after thash and itir
818 tbit.z p7,p0=r19,IA64_PTA_VE_BIT;; //
819 (p7) br.cond.spnt.few page_fault;;
820 extr.u r25=r17,61,3;;
821 adds r21=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18 ;;
822 shl r25=r25,3;;
823 add r21=r21,r25;;
824 ld8 r22=[r21];;
825 tbit.z p7,p0=r22,0
826 (p7) br.cond.spnt.few page_fault;;
828 // compute and save away itir (r22 & RR_PS_MASK)
829 movl r21=0xfc;;
830 and r22=r22,r21;;
831 adds r21=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18 ;;
832 st8 [r21]=r22;;
834 // save away ifa
835 adds r21=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;;
836 st8 [r21]=r17;;
837 // see vcpu_thash to save away iha
838 shr.u r20 = r17, 61
839 addl r25 = 1, r0
840 movl r30 = 0xe000000000000000
841 ;;
842 and r21 = r30, r17 // VHPT_Addr1
843 ;;
844 shladd r28 = r20, 3, r18
845 adds r19 = XSI_PTA_OFS-XSI_PSR_IC_OFS, r18
846 ;;
847 adds r27 = XSI_RR0_OFS-XSI_PSR_IC_OFS, r28
848 addl r28 = 32767, r0
849 ld8 r24 = [r19] // pta
850 ;;
851 ld8 r23 = [r27] // rrs[vadr>>61]
852 extr.u r26 = r24, 2, 6
853 ;;
854 extr.u r22 = r23, 2, 6
855 shl r30 = r25, r26
856 ;;
857 shr.u r19 = r17, r22
858 shr.u r29 = r24, 15
859 ;;
860 adds r30 = -1, r30
861 ;;
862 shladd r27 = r19, 3, r0
863 extr.u r26 = r30, 15, 46
864 ;;
865 andcm r24 = r29, r26
866 and r19 = r28, r27
867 shr.u r25 = r27, 15
868 ;;
869 and r23 = r26, r25
870 ;;
871 or r22 = r24, r23
872 ;;
873 dep.z r20 = r22, 15, 46
874 ;;
875 or r30 = r20, r21
876 ;;
877 //or r8 = r19, r30
878 or r19 = r19, r30
879 ;;
880 adds r23=XSI_IHA_OFS-XSI_PSR_IC_OFS,r18 ;;
881 st8 [r23]=r19;;
882 // done with thash, check guest VHPT
884 adds r20 = XSI_PTA_OFS-XSI_PSR_IC_OFS, r18;;
885 ld8 r24 = [r20];; // pta
886 // avoid recursively walking the VHPT
887 // if (((r17=address ^ r24=pta) & ((itir_mask(pta) << 3) >> 3)) != 0) {
888 mov r20=-8
889 xor r21=r17,r24
890 extr.u r24=r24,2,6;;
891 shl r20=r20,r24;;
892 shr.u r20=r20,3;;
893 and r21=r20,r21;;
894 cmp.eq p7,p0=r21,r0
895 (p7) br.cond.spnt.few 1f;;
896 // __copy_from_user(&pte, r19=(void *)(*iha), sizeof(pte)=8)
897 // prepare for possible nested dtlb fault
898 mov r29=b0
899 movl r30=guest_vhpt_miss;;
900 // now go fetch the entry from the guest VHPT
901 ld8 r20=[r19];;
902 // if we wind up here, we successfully loaded the VHPT entry
904 // this VHPT walker aborts on non-present pages instead
905 // of inserting a not-present translation, this allows
906 // vectoring directly to the miss handler
907 tbit.z p7,p0=r20,0
908 (p7) br.cond.spnt.few page_not_present;;
910 #ifdef FAST_REFLECT_CNT
911 movl r21=fast_vhpt_translate_count;;
912 ld8 r22=[r21];;
913 adds r22=1,r22;;
914 st8 [r21]=r22;;
915 #endif
917 // prepare for fast_insert(PSCB(ifa),PSCB(itir),r16=pte)
918 // r16 == pte
919 // r17 == bit0: 1=inst, 0=data; bit1: 1=itc, 0=vcpu_translate
920 // r18 == XSI_PSR_IC_OFS
921 // r24 == ps
922 // r29 == saved value of b0 in case of recovery
923 // r30 == recovery ip if failure occurs
924 // r31 == pr
925 tbit.nz p6,p7=r16,IA64_ISR_X_BIT;;
926 (p6) mov r17=1;;
927 (p7) mov r17=0;;
928 mov r16=r20
929 mov r29=b0 ;;
930 movl r30=recover_and_page_fault ;;
931 adds r21=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18 ;;
932 ld8 r24=[r21];;
933 extr.u r24=r24,2,6;;
934 // IFA already in PSCB
935 br.cond.sptk.many fast_insert;;
937 // we get here if fast_insert fails (e.g. due to metaphysical lookup)
938 ENTRY(recover_and_page_fault)
939 #ifdef FAST_REFLECT_CNT
940 movl r21=recover_to_page_fault_count;;
941 ld8 r22=[r21];;
942 adds r22=1,r22;;
943 st8 [r21]=r22;;
944 #endif
945 mov b0=r29;;
946 br.cond.sptk.many page_fault;;
948 // if we wind up here, we missed in guest VHPT so recover
949 // from nested dtlb fault and reflect a tlb fault to the guest
950 guest_vhpt_miss:
951 mov b0=r29;;
952 // fault = IA64_VHPT_FAULT
953 mov r20=r0
954 br.cond.sptk.many 1f;
956 // if we get to here, we are ready to reflect
957 // need to set up virtual ifa, iha, itir (fast_reflect handles
958 // virtual isr, iip, ipsr, ifs
959 // see vcpu_get_itir_on_fault: get ps,rid,(FIXME key) from rr[ifa]
960 page_not_present:
961 tbit.nz p6,p7=r16,IA64_ISR_X_BIT;;
962 (p6) movl r20=0x400;;
963 (p7) movl r20=0x800;;
965 1: extr.u r25=r17,61,3;;
966 adds r21=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18 ;;
967 shl r25=r25,3;;
968 add r21=r21,r25;;
969 ld8 r22=[r21];;
970 extr.u r22=r22,2,30;;
971 dep.z r22=r22,2,30;;
972 adds r23=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18 ;;
973 st8 [r23]=r22;;
975 // fast reflect expects
976 // r16 == cr.isr
977 // r18 == XSI_PSR_IC
978 // r20 == offset into ivt
979 // r29 == iip
980 // r30 == ipsr
981 // r31 == pr
982 //mov r16=cr.isr
983 mov r29=cr.iip
984 mov r30=cr.ipsr
985 br.sptk.many fast_reflect;;
986 END(fast_tlb_miss_reflect)
988 // ensure that, if giving up, registers at entry to fast_hyperprivop unchanged
989 ENTRY(hyper_rfi)
990 #ifndef FAST_RFI
991 br.spnt.few dispatch_break_fault ;;
992 #endif
993 // if no interrupts pending, proceed
994 mov r30=r0
995 cmp.eq p7,p0=r20,r0
996 (p7) br.sptk.many 1f
997 ;;
998 adds r20=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;;
999 ld8 r21=[r20];; // r21 = vcr.ipsr
1000 extr.u r22=r21,IA64_PSR_I_BIT,1 ;;
1001 mov r30=r22
1002 // r30 determines whether we might deliver an immediate extint
1003 1:
1004 adds r20=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;;
1005 ld8 r21=[r20];; // r21 = vcr.ipsr
1006 extr.u r22=r21,IA64_PSR_BE_BIT,1 ;;
1007 // if turning on psr.be, give up for now and do it the slow way
1008 cmp.ne p7,p0=r22,r0
1009 (p7) br.spnt.few dispatch_break_fault ;;
1010 // if (!(vpsr.dt && vpsr.rt && vpsr.it)), do it the slow way
1011 movl r20=(IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_IT);;
1012 and r22=r20,r21
1013 ;;
1014 cmp.ne p7,p0=r22,r20
1015 (p7) br.spnt.few dispatch_break_fault ;;
1016 // if was in metaphys mode, do it the slow way (FIXME later?)
1017 adds r20=XSI_METAPHYS_OFS-XSI_PSR_IC_OFS,r18 ;;
1018 ld4 r20=[r20];;
1019 cmp.ne p7,p0=r20,r0
1020 (p7) br.spnt.few dispatch_break_fault ;;
1021 // if domain hasn't already done virtual bank switch
1022 // do it the slow way (FIXME later?)
1023 #if 0
1024 adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
1025 ld4 r20=[r20];;
1026 cmp.eq p7,p0=r20,r0
1027 (p7) br.spnt.few dispatch_break_fault ;;
1028 #endif
1029 // validate vcr.iip, if in Xen range, do it the slow way
1030 adds r20=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;;
1031 ld8 r22=[r20];;
1032 movl r23=XEN_VIRT_SPACE_LOW
1033 movl r24=XEN_VIRT_SPACE_HIGH ;;
1034 cmp.ltu p0,p7=r22,r23 ;; // if !(iip<low) &&
1035 (p7) cmp.geu p0,p7=r22,r24 ;; // !(iip>=high)
1036 (p7) br.spnt.few dispatch_break_fault ;;
1037 #ifndef RFI_TO_INTERRUPT // see beginning of file
1038 cmp.ne p6,p0=r30,r0
1039 (p6) br.cond.spnt.few dispatch_break_fault ;;
1040 #endif
1042 1: // OK now, let's do an rfi.
1043 #ifdef FAST_HYPERPRIVOP_CNT
1044 movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_RFI);;
1045 ld8 r23=[r20];;
1046 adds r23=1,r23;;
1047 st8 [r20]=r23;;
1048 #endif
1049 #ifdef RFI_TO_INTERRUPT
1050 // maybe do an immediate interrupt delivery?
1051 cmp.ne p6,p0=r30,r0
1052 (p6) br.cond.spnt.few rfi_check_extint;;
1053 #endif
1055 just_do_rfi:
1056 // r18=&vpsr.i|vpsr.ic, r21==vpsr, r22=vcr.iip
1057 mov cr.iip=r22;;
1058 adds r20=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;;
1059 st4 [r20]=r0 ;;
1060 adds r20=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
1061 ld8 r20=[r20];;
1062 dep r20=0,r20,38,25;; // ensure ifs has no reserved bits set
1063 mov cr.ifs=r20 ;;
1064 // ipsr.cpl == (vcr.ipsr.cpl == 0) 2 : 3;
1065 dep r21=-1,r21,IA64_PSR_CPL1_BIT,1 ;;
1066 // vpsr.i = vcr.ipsr.i; vpsr.ic = vcr.ipsr.ic
1067 mov r19=r0 ;;
1068 extr.u r23=r21,IA64_PSR_I_BIT,1 ;;
1069 cmp.ne p7,p6=r23,r0 ;;
1070 // not done yet
1071 (p7) dep r19=-1,r19,32,1
1072 extr.u r23=r21,IA64_PSR_IC_BIT,1 ;;
1073 cmp.ne p7,p6=r23,r0 ;;
1074 (p7) dep r19=-1,r19,0,1 ;;
1075 st8 [r18]=r19 ;;
1076 // force on psr.ic, i, dt, rt, it, bn
1077 movl r20=(IA64_PSR_I|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_IT|IA64_PSR_BN)
1078 ;;
1079 or r21=r21,r20
1080 ;;
1081 mov cr.ipsr=r21
1082 adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
1083 ld4 r21=[r20];;
1084 cmp.ne p7,p0=r21,r0 // domain already did "bank 1 switch?"
1085 (p7) br.cond.spnt.few 1f;
1086 // OK, now all set to go except for switch to virtual bank1
1087 mov r22=1;; st4 [r20]=r22;
1088 mov r30=r2; mov r29=r3;;
1089 mov r17=ar.unat;;
1090 adds r16=XSI_B1NATS_OFS-XSI_PSR_IC_OFS,r18
1091 adds r2=XSI_BANK1_OFS-XSI_PSR_IC_OFS,r18;
1092 adds r3=(XSI_BANK1_OFS+8)-XSI_PSR_IC_OFS,r18;;
1093 ld8 r16=[r16];;
1094 mov ar.unat=r16;;
1095 bsw.1;;
1096 // FIXME?: ar.unat is not really handled correctly,
1097 // but may not matter if the OS is NaT-clean
1098 .mem.offset 0,0; ld8.fill r16=[r2],16 ;
1099 .mem.offset 8,0; ld8.fill r17=[r3],16 ;;
1100 .mem.offset 0,0; ld8.fill r18=[r2],16 ;
1101 .mem.offset 0,0; ld8.fill r19=[r3],16 ;;
1102 .mem.offset 8,0; ld8.fill r20=[r2],16 ;
1103 .mem.offset 8,0; ld8.fill r21=[r3],16 ;;
1104 .mem.offset 8,0; ld8.fill r22=[r2],16 ;
1105 .mem.offset 8,0; ld8.fill r23=[r3],16 ;;
1106 .mem.offset 8,0; ld8.fill r24=[r2],16 ;
1107 .mem.offset 8,0; ld8.fill r25=[r3],16 ;;
1108 .mem.offset 8,0; ld8.fill r26=[r2],16 ;
1109 .mem.offset 8,0; ld8.fill r27=[r3],16 ;;
1110 .mem.offset 8,0; ld8.fill r28=[r2],16 ;
1111 .mem.offset 8,0; ld8.fill r29=[r3],16 ;;
1112 .mem.offset 8,0; ld8.fill r30=[r2],16 ;
1113 .mem.offset 8,0; ld8.fill r31=[r3],16 ;;
1114 bsw.0 ;;
1115 mov ar.unat=r17;;
1116 mov r2=r30; mov r3=r29;;
1117 1: mov pr=r31,-1
1118 ;;
1119 rfi
1120 ;;
1122 #ifdef RFI_TO_INTERRUPT
1123 GLOBAL_ENTRY(rfi_check_extint)
1124 //br.sptk.many dispatch_break_fault ;;
1126 // r18=&vpsr.i|vpsr.ic, r21==vpsr, r22=vcr.iip
1127 // make sure none of these get trashed in case going to just_do_rfi
1128 movl r30=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
1129 ld8 r30=[r30];;
1130 adds r24=IA64_VCPU_INSVC3_OFFSET,r30;;
1131 mov r25=192
1132 adds r16=IA64_VCPU_IRR3_OFFSET,r30;;
1133 ld8 r23=[r16];;
1134 cmp.eq p6,p0=r23,r0;;
1135 (p6) adds r16=-8,r16;;
1136 (p6) adds r24=-8,r24;;
1137 (p6) adds r25=-64,r25;;
1138 (p6) ld8 r23=[r16];;
1139 (p6) cmp.eq p6,p0=r23,r0;;
1140 (p6) adds r16=-8,r16;;
1141 (p6) adds r24=-8,r24;;
1142 (p6) adds r25=-64,r25;;
1143 (p6) ld8 r23=[r16];;
1144 (p6) cmp.eq p6,p0=r23,r0;;
1145 (p6) adds r16=-8,r16;;
1146 (p6) adds r24=-8,r24;;
1147 (p6) adds r25=-64,r25;;
1148 (p6) ld8 r23=[r16];;
1149 (p6) cmp.eq p6,p0=r23,r0;;
1150 cmp.eq p6,p0=r23,r0
1151 (p6) br.cond.spnt.few just_do_rfi; // this is actually an error
1152 // r16 points to non-zero element of irr, r23 has value
1153 // r24 points to corr element of insvc, r25 has elt*64
1154 ld8 r26=[r24];;
1155 cmp.geu p6,p0=r26,r23
1156 (p6) br.cond.spnt.many just_do_rfi;
1158 // not masked by insvc, get vector number
1159 shr.u r26=r23,1;;
1160 or r26=r23,r26;;
1161 shr.u r27=r26,2;;
1162 or r26=r26,r27;;
1163 shr.u r27=r26,4;;
1164 or r26=r26,r27;;
1165 shr.u r27=r26,8;;
1166 or r26=r26,r27;;
1167 shr.u r27=r26,16;;
1168 or r26=r26,r27;;
1169 shr.u r27=r26,32;;
1170 or r26=r26,r27;;
1171 andcm r26=0xffffffffffffffff,r26;;
1172 popcnt r26=r26;;
1173 sub r26=63,r26;;
1174 // r26 now contains the bit index (mod 64)
1175 mov r27=1;;
1176 shl r27=r27,r26;;
1177 // r27 now contains the (within the proper word) bit mask
1178 add r26=r25,r26
1179 // r26 now contains the vector [0..255]
1180 adds r20=XSI_TPR_OFS-XSI_PSR_IC_OFS,r18 ;;
1181 ld8 r20=[r20] ;;
1182 extr.u r28=r20,16,1
1183 extr.u r29=r20,4,4 ;;
1184 cmp.ne p6,p0=r28,r0 // if tpr.mmi is set, just rfi
1185 (p6) br.cond.spnt.few just_do_rfi;;
1186 shl r29=r29,4;;
1187 adds r29=15,r29;;
1188 cmp.ge p6,p0=r29,r26 // if tpr masks interrupt, just rfi
1189 (p6) br.cond.spnt.few just_do_rfi;;
1191 // this doesn't work yet (dies early after getting to user mode)
1192 // but happens relatively infrequently, so fix it later.
1193 // NOTE that these will be counted incorrectly for now (for privcnt output)
1194 GLOBAL_ENTRY(rfi_with_interrupt)
1195 #if 1
1196 br.sptk.many dispatch_break_fault ;;
1197 #endif
1199 // OK, have an unmasked vector, so deliver extint to vcr.iva+0x3000
1200 // r18 == XSI_PSR_IC
1201 // r21 == vipsr (ipsr in shared_mem)
1202 // r30 == IA64_KR(CURRENT)
1203 // r31 == pr
1204 mov r17=cr.ipsr;;
1205 mov r16=cr.isr;;
1206 // set shared_mem isr
1207 extr.u r16=r16,38,1;; // grab cr.isr.ir bit
1208 dep r16=r16,r0,38,1 ;; // insert into cr.isr (rest of bits zero)
1209 extr.u r20=r21,41,2 ;; // get v(!)psr.ri
1210 dep r16=r20,r16,41,2 ;; // deposit cr.isr.ei
1211 adds r22=XSI_ISR_OFS-XSI_PSR_IC_OFS,r18 ;;
1212 st8 [r22]=r16 ;;
1213 // set cr.ipsr (make sure cpl==2!)
1214 mov r29=r17 ;;
1215 movl r28=DELIVER_PSR_SET;;
1216 movl r27=~(DELIVER_PSR_CLR|IA64_PSR_CPL0);;
1217 or r29=r29,r28;;
1218 and r29=r29,r27;;
1219 mov cr.ipsr=r29;;
1220 // v.ipsr and v.iip are already set (and v.iip validated) as rfi target
1221 // set shared_mem interrupt_delivery_enabled to 0
1222 // set shared_mem interrupt_collection_enabled to 0
1223 st8 [r18]=r0;;
1224 // cover and set shared_mem precover_ifs to cr.ifs
1225 // set shared_mem ifs and incomplete_regframe to 0
1226 #if 0
1227 cover ;;
1228 mov r20=cr.ifs;;
1229 adds r22=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;;
1230 st4 [r22]=r0 ;;
1231 adds r22=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
1232 st8 [r22]=r0 ;;
1233 adds r22=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
1234 st8 [r22]=r20 ;;
1235 // leave cr.ifs alone for later rfi
1236 #else
1237 adds r22=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;;
1238 st4 [r22]=r0 ;;
1239 adds r22=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
1240 ld8 r20=[r22];;
1241 st8 [r22]=r0 ;;
1242 adds r22=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
1243 st8 [r22]=r20 ;;
1244 #endif
1245 // set iip to go to domain IVA break instruction vector
1246 adds r22=IA64_VCPU_IVA_OFFSET,r30;;
1247 ld8 r23=[r22];;
1248 movl r24=0x3000;;
1249 add r24=r24,r23;;
1250 mov cr.iip=r24;;
1251 #if 0
1252 // OK, now all set to go except for switch to virtual bank0
1253 mov r30=r2; mov r29=r3;;
1254 adds r2=XSI_BANK1_OFS-XSI_PSR_IC_OFS,r18;
1255 adds r3=(XSI_BANK1_OFS+8)-XSI_PSR_IC_OFS,r18;;
1256 bsw.1;;
1257 // FIXME: need to handle ar.unat!
1258 .mem.offset 0,0; st8.spill [r2]=r16,16;
1259 .mem.offset 8,0; st8.spill [r3]=r17,16 ;;
1260 .mem.offset 0,0; st8.spill [r2]=r18,16;
1261 .mem.offset 8,0; st8.spill [r3]=r19,16 ;;
1262 .mem.offset 0,0; st8.spill [r2]=r20,16;
1263 .mem.offset 8,0; st8.spill [r3]=r21,16 ;;
1264 .mem.offset 0,0; st8.spill [r2]=r22,16;
1265 .mem.offset 8,0; st8.spill [r3]=r23,16 ;;
1266 .mem.offset 0,0; st8.spill [r2]=r24,16;
1267 .mem.offset 8,0; st8.spill [r3]=r25,16 ;;
1268 .mem.offset 0,0; st8.spill [r2]=r26,16;
1269 .mem.offset 8,0; st8.spill [r3]=r27,16 ;;
1270 .mem.offset 0,0; st8.spill [r2]=r28,16;
1271 .mem.offset 8,0; st8.spill [r3]=r29,16 ;;
1272 .mem.offset 0,0; st8.spill [r2]=r30,16;
1273 .mem.offset 8,0; st8.spill [r3]=r31,16 ;;
1274 movl r31=XSI_IPSR;;
1275 bsw.0 ;;
1276 mov r2=r30; mov r3=r29;;
1277 #else
1278 bsw.1;;
1279 movl r31=XSI_IPSR;;
1280 bsw.0 ;;
1281 #endif
1282 adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
1283 st4 [r20]=r0 ;;
1284 mov pr=r31,-1 ;;
1285 rfi
1286 #endif // RFI_TO_INTERRUPT
1288 ENTRY(hyper_cover)
1289 #ifdef FAST_HYPERPRIVOP_CNT
1290 movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_COVER);;
1291 ld8 r21=[r20];;
1292 adds r21=1,r21;;
1293 st8 [r20]=r21;;
1294 #endif
1295 mov r24=cr.ipsr
1296 mov r25=cr.iip;;
1297 // skip test for vpsr.ic.. it's a prerequisite for hyperprivops
1298 cover ;;
1299 adds r20=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;;
1300 mov r30=cr.ifs;;
1301 adds r22=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18
1302 ld4 r21=[r20] ;;
1303 cmp.eq p6,p7=r21,r0 ;;
1304 (p6) st8 [r22]=r30;;
1305 (p7) st4 [r20]=r0;;
1306 mov cr.ifs=r0;;
1307 // adjust return address to skip over break instruction
1308 extr.u r26=r24,41,2 ;;
1309 cmp.eq p6,p7=2,r26 ;;
1310 (p6) mov r26=0
1311 (p6) adds r25=16,r25
1312 (p7) adds r26=1,r26
1313 ;;
1314 dep r24=r26,r24,41,2
1315 ;;
1316 mov cr.ipsr=r24
1317 mov cr.iip=r25
1318 mov pr=r31,-1 ;;
1319 rfi
1320 ;;
1322 // return from metaphysical mode (meta=1) to virtual mode (meta=0)
1323 ENTRY(hyper_ssm_dt)
1324 #ifdef FAST_HYPERPRIVOP_CNT
1325 movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SSM_DT);;
1326 ld8 r21=[r20];;
1327 adds r21=1,r21;;
1328 st8 [r20]=r21;;
1329 #endif
1330 mov r24=cr.ipsr
1331 mov r25=cr.iip;;
1332 adds r20=XSI_METAPHYS_OFS-XSI_PSR_IC_OFS,r18 ;;
1333 ld4 r21=[r20];;
1334 cmp.eq p7,p0=r21,r0 // meta==0?
1335 (p7) br.spnt.many 1f ;; // already in virtual mode
1336 movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
1337 ld8 r22=[r22];;
1338 adds r22=IA64_VCPU_META_SAVED_RR0_OFFSET,r22;;
1339 ld8 r23=[r22];;
1340 mov rr[r0]=r23;;
1341 srlz.i;;
1342 st4 [r20]=r0 ;;
1343 // adjust return address to skip over break instruction
1344 1: extr.u r26=r24,41,2 ;;
1345 cmp.eq p6,p7=2,r26 ;;
1346 (p6) mov r26=0
1347 (p6) adds r25=16,r25
1348 (p7) adds r26=1,r26
1349 ;;
1350 dep r24=r26,r24,41,2
1351 ;;
1352 mov cr.ipsr=r24
1353 mov cr.iip=r25
1354 mov pr=r31,-1 ;;
1355 rfi
1356 ;;
1358 // go to metaphysical mode (meta=1) from virtual mode (meta=0)
1359 ENTRY(hyper_rsm_dt)
1360 #ifdef FAST_HYPERPRIVOP_CNT
1361 movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_RSM_DT);;
1362 ld8 r21=[r20];;
1363 adds r21=1,r21;;
1364 st8 [r20]=r21;;
1365 #endif
1366 mov r24=cr.ipsr
1367 mov r25=cr.iip;;
1368 adds r20=XSI_METAPHYS_OFS-XSI_PSR_IC_OFS,r18 ;;
1369 ld4 r21=[r20];;
1370 cmp.ne p7,p0=r21,r0 // meta==0?
1371 (p7) br.spnt.many 1f ;; // already in metaphysical mode
1372 movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
1373 ld8 r22=[r22];;
1374 adds r22=IA64_VCPU_META_RR0_OFFSET,r22;;
1375 ld8 r23=[r22];;
1376 mov rr[r0]=r23;;
1377 srlz.i;;
1378 adds r21=1,r0 ;;
1379 st4 [r20]=r21 ;;
1380 // adjust return address to skip over break instruction
1381 1: extr.u r26=r24,41,2 ;;
1382 cmp.eq p6,p7=2,r26 ;;
1383 (p6) mov r26=0
1384 (p6) adds r25=16,r25
1385 (p7) adds r26=1,r26
1386 ;;
1387 dep r24=r26,r24,41,2
1388 ;;
1389 mov cr.ipsr=r24
1390 mov cr.iip=r25
1391 mov pr=r31,-1 ;;
1392 rfi
1393 ;;
1395 ENTRY(hyper_get_tpr)
1396 #ifdef FAST_HYPERPRIVOP_CNT
1397 movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_GET_TPR);;
1398 ld8 r21=[r20];;
1399 adds r21=1,r21;;
1400 st8 [r20]=r21;;
1401 #endif
1402 mov r24=cr.ipsr
1403 mov r25=cr.iip;;
1404 adds r20=XSI_TPR_OFS-XSI_PSR_IC_OFS,r18 ;;
1405 ld8 r8=[r20];;
1406 extr.u r26=r24,41,2 ;;
1407 cmp.eq p6,p7=2,r26 ;;
1408 (p6) mov r26=0
1409 (p6) adds r25=16,r25
1410 (p7) adds r26=1,r26
1411 ;;
1412 dep r24=r26,r24,41,2
1413 ;;
1414 mov cr.ipsr=r24
1415 mov cr.iip=r25
1416 mov pr=r31,-1 ;;
1417 rfi
1418 ;;
1419 END(hyper_get_tpr)
1421 // if we get to here, there are no interrupts pending so we
1422 // can change virtual tpr to any value without fear of provoking
1423 // (or accidentally missing) delivering an interrupt
1424 ENTRY(hyper_set_tpr)
1425 #ifdef FAST_HYPERPRIVOP_CNT
1426 movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SET_TPR);;
1427 ld8 r21=[r20];;
1428 adds r21=1,r21;;
1429 st8 [r20]=r21;;
1430 #endif
1431 mov r24=cr.ipsr
1432 mov r25=cr.iip;;
1433 movl r27=0xff00;;
1434 adds r20=XSI_TPR_OFS-XSI_PSR_IC_OFS,r18 ;;
1435 andcm r8=r8,r27;;
1436 st8 [r20]=r8;;
1437 extr.u r26=r24,41,2 ;;
1438 cmp.eq p6,p7=2,r26 ;;
1439 (p6) mov r26=0
1440 (p6) adds r25=16,r25
1441 (p7) adds r26=1,r26
1442 ;;
1443 dep r24=r26,r24,41,2
1444 ;;
1445 mov cr.ipsr=r24
1446 mov cr.iip=r25
1447 mov pr=r31,-1 ;;
1448 rfi
1449 ;;
1450 END(hyper_set_tpr)
1452 ENTRY(hyper_get_ivr)
1453 #ifdef FAST_HYPERPRIVOP_CNT
1454 movl r22=fast_hyperpriv_cnt+(8*XEN_HYPER_GET_IVR);;
1455 ld8 r21=[r22];;
1456 adds r21=1,r21;;
1457 st8 [r22]=r21;;
1458 #endif
1459 mov r8=15;;
1460 // when we get to here r20=~=interrupts pending
1461 cmp.eq p7,p0=r20,r0;;
1462 (p7) adds r20=XSI_PEND_OFS-XSI_PSR_IC_OFS,r18 ;;
1463 (p7) st4 [r20]=r0;;
1464 (p7) br.spnt.many 1f ;;
1465 movl r30=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
1466 ld8 r30=[r30];;
1467 adds r24=IA64_VCPU_INSVC3_OFFSET,r30;;
1468 mov r25=192
1469 adds r22=IA64_VCPU_IRR3_OFFSET,r30;;
1470 ld8 r23=[r22];;
1471 cmp.eq p6,p0=r23,r0;;
1472 (p6) adds r22=-8,r22;;
1473 (p6) adds r24=-8,r24;;
1474 (p6) adds r25=-64,r25;;
1475 (p6) ld8 r23=[r22];;
1476 (p6) cmp.eq p6,p0=r23,r0;;
1477 (p6) adds r22=-8,r22;;
1478 (p6) adds r24=-8,r24;;
1479 (p6) adds r25=-64,r25;;
1480 (p6) ld8 r23=[r22];;
1481 (p6) cmp.eq p6,p0=r23,r0;;
1482 (p6) adds r22=-8,r22;;
1483 (p6) adds r24=-8,r24;;
1484 (p6) adds r25=-64,r25;;
1485 (p6) ld8 r23=[r22];;
1486 (p6) cmp.eq p6,p0=r23,r0;;
1487 cmp.eq p6,p0=r23,r0
1488 (p6) br.cond.spnt.few 1f; // this is actually an error
1489 // r22 points to non-zero element of irr, r23 has value
1490 // r24 points to corr element of insvc, r25 has elt*64
1491 ld8 r26=[r24];;
1492 cmp.geu p6,p0=r26,r23
1493 (p6) br.cond.spnt.many 1f;
1494 // not masked by insvc, get vector number
1495 shr.u r26=r23,1;;
1496 or r26=r23,r26;;
1497 shr.u r27=r26,2;;
1498 or r26=r26,r27;;
1499 shr.u r27=r26,4;;
1500 or r26=r26,r27;;
1501 shr.u r27=r26,8;;
1502 or r26=r26,r27;;
1503 shr.u r27=r26,16;;
1504 or r26=r26,r27;;
1505 shr.u r27=r26,32;;
1506 or r26=r26,r27;;
1507 andcm r26=0xffffffffffffffff,r26;;
1508 popcnt r26=r26;;
1509 sub r26=63,r26;;
1510 // r26 now contains the bit index (mod 64)
1511 mov r27=1;;
1512 shl r27=r27,r26;;
1513 // r27 now contains the (within the proper word) bit mask
1514 add r26=r25,r26
1515 // r26 now contains the vector [0..255]
1516 adds r20=XSI_TPR_OFS-XSI_PSR_IC_OFS,r18 ;;
1517 ld8 r20=[r20] ;;
1518 extr.u r28=r20,16,1
1519 extr.u r29=r20,4,4 ;;
1520 cmp.ne p6,p0=r28,r0 // if tpr.mmi is set, return SPURIOUS
1521 (p6) br.cond.spnt.few 1f;
1522 shl r29=r29,4;;
1523 adds r29=15,r29;;
1524 cmp.ge p6,p0=r29,r26
1525 (p6) br.cond.spnt.few 1f;
1526 // OK, have an unmasked vector to process/return
1527 ld8 r25=[r24];;
1528 or r25=r25,r27;;
1529 st8 [r24]=r25;;
1530 ld8 r25=[r22];;
1531 andcm r25=r25,r27;;
1532 st8 [r22]=r25;;
1533 mov r8=r26;;
1534 // if its a clock tick, remember itm to avoid delivering it twice
1535 adds r20=XSI_ITV_OFS-XSI_PSR_IC_OFS,r18 ;;
1536 ld8 r20=[r20];;
1537 extr.u r20=r20,0,8;;
1538 cmp.eq p6,p0=r20,r8
1539 adds r22=IA64_VCPU_DOMAIN_ITM_LAST_OFFSET,r30
1540 adds r23=IA64_VCPU_DOMAIN_ITM_OFFSET,r30;;
1541 ld8 r23=[r23];;
1542 (p6) st8 [r22]=r23;;
1543 // all done
1544 1: mov r24=cr.ipsr
1545 mov r25=cr.iip;;
1546 extr.u r26=r24,41,2 ;;
1547 cmp.eq p6,p7=2,r26 ;;
1548 (p6) mov r26=0
1549 (p6) adds r25=16,r25
1550 (p7) adds r26=1,r26
1551 ;;
1552 dep r24=r26,r24,41,2
1553 ;;
1554 mov cr.ipsr=r24
1555 mov cr.iip=r25
1556 mov pr=r31,-1 ;;
1557 rfi
1558 ;;
1559 END(hyper_get_ivr)
1561 ENTRY(hyper_eoi)
1562 // when we get to here r20=~=interrupts pending
1563 cmp.ne p7,p0=r20,r0
1564 (p7) br.spnt.many dispatch_break_fault ;;
1565 #ifdef FAST_HYPERPRIVOP_CNT
1566 movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_EOI);;
1567 ld8 r21=[r20];;
1568 adds r21=1,r21;;
1569 st8 [r20]=r21;;
1570 #endif
1571 movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
1572 ld8 r22=[r22];;
1573 adds r22=IA64_VCPU_INSVC3_OFFSET,r22;;
1574 ld8 r23=[r22];;
1575 cmp.eq p6,p0=r23,r0;;
1576 (p6) adds r22=-8,r22;;
1577 (p6) ld8 r23=[r22];;
1578 (p6) cmp.eq p6,p0=r23,r0;;
1579 (p6) adds r22=-8,r22;;
1580 (p6) ld8 r23=[r22];;
1581 (p6) cmp.eq p6,p0=r23,r0;;
1582 (p6) adds r22=-8,r22;;
1583 (p6) ld8 r23=[r22];;
1584 (p6) cmp.eq p6,p0=r23,r0;;
1585 cmp.eq p6,p0=r23,r0
1586 (p6) br.cond.spnt.few 1f; // this is actually an error
1587 // r22 points to non-zero element of insvc, r23 has value
1588 shr.u r24=r23,1;;
1589 or r24=r23,r24;;
1590 shr.u r25=r24,2;;
1591 or r24=r24,r25;;
1592 shr.u r25=r24,4;;
1593 or r24=r24,r25;;
1594 shr.u r25=r24,8;;
1595 or r24=r24,r25;;
1596 shr.u r25=r24,16;;
1597 or r24=r24,r25;;
1598 shr.u r25=r24,32;;
1599 or r24=r24,r25;;
1600 andcm r24=0xffffffffffffffff,r24;;
1601 popcnt r24=r24;;
1602 sub r24=63,r24;;
1603 // r24 now contains the bit index
1604 mov r25=1;;
1605 shl r25=r25,r24;;
1606 andcm r23=r23,r25;;
1607 st8 [r22]=r23;;
1608 1: mov r24=cr.ipsr
1609 mov r25=cr.iip;;
1610 extr.u r26=r24,41,2 ;;
1611 cmp.eq p6,p7=2,r26 ;;
1612 (p6) mov r26=0
1613 (p6) adds r25=16,r25
1614 (p7) adds r26=1,r26
1615 ;;
1616 dep r24=r26,r24,41,2
1617 ;;
1618 mov cr.ipsr=r24
1619 mov cr.iip=r25
1620 mov pr=r31,-1 ;;
1621 rfi
1622 ;;
1623 END(hyper_eoi)
1625 ENTRY(hyper_set_itm)
1626 // when we get to here r20=~=interrupts pending
1627 cmp.ne p7,p0=r20,r0
1628 (p7) br.spnt.many dispatch_break_fault ;;
1629 #ifdef FAST_HYPERPRIVOP_CNT
1630 movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SET_ITM);;
1631 ld8 r21=[r20];;
1632 adds r21=1,r21;;
1633 st8 [r20]=r21;;
1634 #endif
1635 movl r20=THIS_CPU(cpu_info)+IA64_CPUINFO_ITM_NEXT_OFFSET;;
1636 ld8 r21=[r20];;
1637 movl r20=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
1638 ld8 r20=[r20];;
1639 adds r20=IA64_VCPU_DOMAIN_ITM_OFFSET,r20;;
1640 st8 [r20]=r8;;
1641 cmp.geu p6,p0=r21,r8;;
1642 (p6) mov r21=r8;;
1643 // now "safe set" cr.itm=r21
1644 mov r23=100;;
1645 2: mov cr.itm=r21;;
1646 srlz.d;;
1647 mov r22=ar.itc ;;
1648 cmp.leu p6,p0=r21,r22;;
1649 add r21=r21,r23;;
1650 shl r23=r23,1;;
1651 (p6) br.cond.spnt.few 2b;;
1652 1: mov r24=cr.ipsr
1653 mov r25=cr.iip;;
1654 extr.u r26=r24,41,2 ;;
1655 cmp.eq p6,p7=2,r26 ;;
1656 (p6) mov r26=0
1657 (p6) adds r25=16,r25
1658 (p7) adds r26=1,r26
1659 ;;
1660 dep r24=r26,r24,41,2
1661 ;;
1662 mov cr.ipsr=r24
1663 mov cr.iip=r25
1664 mov pr=r31,-1 ;;
1665 rfi
1666 ;;
1667 END(hyper_set_itm)
1669 ENTRY(hyper_get_rr)
1670 #ifdef FAST_HYPERPRIVOP_CNT
1671 movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_GET_RR);;
1672 ld8 r21=[r20];;
1673 adds r21=1,r21;;
1674 st8 [r20]=r21;;
1675 #endif
1676 extr.u r25=r8,61,3;;
1677 adds r20=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18 ;;
1678 shl r25=r25,3;;
1679 add r20=r20,r25;;
1680 ld8 r8=[r20];;
1681 1: mov r24=cr.ipsr
1682 mov r25=cr.iip;;
1683 extr.u r26=r24,41,2 ;;
1684 cmp.eq p6,p7=2,r26 ;;
1685 (p6) mov r26=0
1686 (p6) adds r25=16,r25
1687 (p7) adds r26=1,r26
1688 ;;
1689 dep r24=r26,r24,41,2
1690 ;;
1691 mov cr.ipsr=r24
1692 mov cr.iip=r25
1693 mov pr=r31,-1 ;;
1694 rfi
1695 ;;
1696 END(hyper_get_rr)
1698 ENTRY(hyper_set_rr)
1699 extr.u r25=r8,61,3;;
1700 cmp.leu p7,p0=7,r25 // punt on setting rr7
1701 (p7) br.spnt.many dispatch_break_fault ;;
1702 #ifdef FAST_HYPERPRIVOP_CNT
1703 movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SET_RR);;
1704 ld8 r21=[r20];;
1705 adds r21=1,r21;;
1706 st8 [r20]=r21;;
1707 #endif
1708 extr.u r26=r9,8,24 // r26 = r9.rid
1709 movl r20=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
1710 ld8 r20=[r20];;
1711 adds r21=IA64_VCPU_STARTING_RID_OFFSET,r20;;
1712 ld4 r22=[r21];;
1713 adds r21=IA64_VCPU_ENDING_RID_OFFSET,r20;;
1714 ld4 r23=[r21];;
1715 adds r24=IA64_VCPU_META_SAVED_RR0_OFFSET,r20;;
1716 add r22=r26,r22;;
1717 cmp.geu p6,p0=r22,r23 // if r9.rid + starting_rid >= ending_rid
1718 (p6) br.cond.spnt.few 1f; // this is an error, but just ignore/return
1719 // r21=starting_rid
1720 adds r20=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18 ;;
1721 shl r25=r25,3;;
1722 add r20=r20,r25;;
1723 st8 [r20]=r9;; // store away exactly what was passed
1724 // but adjust value actually placed in rr[r8]
1725 // r22 contains adjusted rid, "mangle" it (see regionreg.c)
1726 // and set ps to PAGE_SHIFT and ve to 1
1727 extr.u r27=r22,0,8
1728 extr.u r28=r22,8,8
1729 extr.u r29=r22,16,8;;
1730 dep.z r23=PAGE_SHIFT,2,6;;
1731 dep r23=-1,r23,0,1;; // mangling is swapping bytes 1 & 3
1732 dep r23=r27,r23,24,8;;
1733 dep r23=r28,r23,16,8;;
1734 dep r23=r29,r23,8,8
1735 cmp.eq p6,p0=r25,r0;; // if rr0, save for metaphysical
1736 (p6) st8 [r24]=r23
1737 mov rr[r8]=r23;;
1738 // done, mosey on back
1739 1: mov r24=cr.ipsr
1740 mov r25=cr.iip;;
1741 extr.u r26=r24,41,2 ;;
1742 cmp.eq p6,p7=2,r26 ;;
1743 (p6) mov r26=0
1744 (p6) adds r25=16,r25
1745 (p7) adds r26=1,r26
1746 ;;
1747 dep r24=r26,r24,41,2
1748 ;;
1749 mov cr.ipsr=r24
1750 mov cr.iip=r25
1751 mov pr=r31,-1 ;;
1752 rfi
1753 ;;
1754 END(hyper_set_rr)
1756 ENTRY(hyper_set_kr)
1757 extr.u r25=r8,3,61;;
1758 cmp.ne p7,p0=r0,r25 // if kr# > 7, go slow way
1759 (p7) br.spnt.many dispatch_break_fault ;;
1760 #ifdef FAST_HYPERPRIVOP_CNT
1761 movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SET_KR);;
1762 ld8 r21=[r20];;
1763 adds r21=1,r21;;
1764 st8 [r20]=r21;;
1765 #endif
1766 adds r21=XSI_KR0_OFS-XSI_PSR_IC_OFS,r18 ;;
1767 shl r20=r8,3;;
1768 add r22=r20,r21;;
1769 st8 [r22]=r9;;
1770 cmp.eq p7,p0=r8,r0
1771 adds r8=-1,r8;;
1772 (p7) mov ar0=r9;;
1773 cmp.eq p7,p0=r8,r0
1774 adds r8=-1,r8;;
1775 (p7) mov ar1=r9;;
1776 cmp.eq p7,p0=r8,r0
1777 adds r8=-1,r8;;
1778 (p7) mov ar2=r9;;
1779 cmp.eq p7,p0=r8,r0
1780 adds r8=-1,r8;;
1781 (p7) mov ar3=r9;;
1782 cmp.eq p7,p0=r8,r0
1783 adds r8=-1,r8;;
1784 (p7) mov ar4=r9;;
1785 cmp.eq p7,p0=r8,r0
1786 adds r8=-1,r8;;
1787 (p7) mov ar5=r9;;
1788 cmp.eq p7,p0=r8,r0
1789 adds r8=-1,r8;;
1790 (p7) mov ar6=r9;;
1791 cmp.eq p7,p0=r8,r0
1792 adds r8=-1,r8;;
1793 (p7) mov ar7=r9;;
1794 // done, mosey on back
1795 1: mov r24=cr.ipsr
1796 mov r25=cr.iip;;
1797 extr.u r26=r24,41,2 ;;
1798 cmp.eq p6,p7=2,r26 ;;
1799 (p6) mov r26=0
1800 (p6) adds r25=16,r25
1801 (p7) adds r26=1,r26
1802 ;;
1803 dep r24=r26,r24,41,2
1804 ;;
1805 mov cr.ipsr=r24
1806 mov cr.iip=r25
1807 mov pr=r31,-1 ;;
1808 rfi
1809 ;;
1810 END(hyper_set_kr)
1812 // this routine was derived from optimized assembly output from
1813 // vcpu_thash so it is dense and difficult to read but it works
1814 // On entry:
1815 // r18 == XSI_PSR_IC
1816 // r31 == pr
1817 GLOBAL_ENTRY(hyper_thash)
1818 #ifdef FAST_HYPERPRIVOP_CNT
1819 movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_THASH);;
1820 ld8 r21=[r20];;
1821 adds r21=1,r21;;
1822 st8 [r20]=r21;;
1823 #endif
1824 shr.u r20 = r8, 61
1825 addl r25 = 1, r0
1826 movl r17 = 0xe000000000000000
1827 ;;
1828 and r21 = r17, r8 // VHPT_Addr1
1829 ;;
1830 shladd r28 = r20, 3, r18
1831 adds r19 = XSI_PTA_OFS-XSI_PSR_IC_OFS, r18
1832 ;;
1833 adds r27 = XSI_RR0_OFS-XSI_PSR_IC_OFS, r28
1834 addl r28 = 32767, r0
1835 ld8 r24 = [r19] // pta
1836 ;;
1837 ld8 r23 = [r27] // rrs[vadr>>61]
1838 extr.u r26 = r24, 2, 6
1839 ;;
1840 extr.u r22 = r23, 2, 6
1841 shl r30 = r25, r26
1842 ;;
1843 shr.u r19 = r8, r22
1844 shr.u r29 = r24, 15
1845 ;;
1846 adds r17 = -1, r30
1847 ;;
1848 shladd r27 = r19, 3, r0
1849 extr.u r26 = r17, 15, 46
1850 ;;
1851 andcm r24 = r29, r26
1852 and r19 = r28, r27
1853 shr.u r25 = r27, 15
1854 ;;
1855 and r23 = r26, r25
1856 ;;
1857 or r22 = r24, r23
1858 ;;
1859 dep.z r20 = r22, 15, 46
1860 ;;
1861 or r16 = r20, r21
1862 ;;
1863 or r8 = r19, r16
1864 // done, update iip/ipsr to next instruction
1865 mov r24=cr.ipsr
1866 mov r25=cr.iip;;
1867 extr.u r26=r24,41,2 ;;
1868 cmp.eq p6,p7=2,r26 ;;
1869 (p6) mov r26=0
1870 (p6) adds r25=16,r25
1871 (p7) adds r26=1,r26
1872 ;;
1873 dep r24=r26,r24,41,2
1874 ;;
1875 mov cr.ipsr=r24
1876 mov cr.iip=r25
1877 mov pr=r31,-1 ;;
1878 rfi
1879 ;;
1880 END(hyper_thash)
1882 ENTRY(hyper_ptc_ga)
1883 #ifndef FAST_PTC_GA
1884 br.spnt.few dispatch_break_fault ;;
1885 #endif
1886 // FIXME: validate not flushing Xen addresses
1887 #ifdef FAST_HYPERPRIVOP_CNT
1888 movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_PTC_GA);;
1889 ld8 r21=[r20];;
1890 adds r21=1,r21;;
1891 st8 [r20]=r21;;
1892 #endif
1893 mov r28=r8
1894 extr.u r19=r9,2,6 // addr_range=1<<((r9&0xfc)>>2)
1895 mov r20=1
1896 shr.u r24=r8,61
1897 addl r27=56,r0 // PAGE_SHIFT<<2 (for ptc.ga)
1898 movl r26=0x8000000000000000 // INVALID_TI_TAG
1899 mov r30=ar.lc
1900 ;;
1901 shl r19=r20,r19
1902 cmp.eq p7,p0=7,r24
1903 (p7) br.spnt.many dispatch_break_fault ;; // slow way for rr7
1904 ;;
1905 cmp.le p7,p0=r19,r0 // skip flush if size<=0
1906 (p7) br.cond.dpnt 2f ;;
1907 extr.u r24=r19,0,PAGE_SHIFT
1908 shr.u r23=r19,PAGE_SHIFT ;; // repeat loop for n pages
1909 cmp.ne p7,p0=r24,r0 ;;
1910 (p7) adds r23=1,r23 ;; // n_pages<size<n_pages+1? extra iter
1911 mov ar.lc=r23
1912 movl r29=PAGE_SIZE;;
1913 1:
1914 thash r25=r28 ;;
1915 adds r25=16,r25 ;;
1916 ld8 r24=[r25] ;;
1917 // FIXME: should check if tag matches, not just blow it away
1918 or r24=r26,r24 ;; // vhpt_entry->ti_tag = 1
1919 st8 [r25]=r24
1920 ptc.ga r28,r27 ;;
1921 srlz.i ;;
1922 add r28=r29,r28
1923 br.cloop.sptk.few 1b
1924 ;;
1925 2:
1926 mov ar.lc=r30 ;;
1927 mov r29=cr.ipsr
1928 mov r30=cr.iip;;
1929 movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
1930 ld8 r27=[r27];;
1931 adds r25=IA64_VCPU_DTLB_OFFSET,r27
1932 adds r26=IA64_VCPU_ITLB_OFFSET,r27;;
1933 ld8 r24=[r25]
1934 ld8 r27=[r26] ;;
1935 and r24=-2,r24
1936 and r27=-2,r27 ;;
1937 st8 [r25]=r24 // set 1-entry i/dtlb as not present
1938 st8 [r26]=r27 ;;
1939 // increment to point to next instruction
1940 extr.u r26=r29,41,2 ;;
1941 cmp.eq p6,p7=2,r26 ;;
1942 (p6) mov r26=0
1943 (p6) adds r30=16,r30
1944 (p7) adds r26=1,r26
1945 ;;
1946 dep r29=r26,r29,41,2
1947 ;;
1948 mov cr.ipsr=r29
1949 mov cr.iip=r30
1950 mov pr=r31,-1 ;;
1951 rfi
1952 ;;
1953 END(hyper_ptc_ga)
1955 // recovery block for hyper_itc metaphysical memory lookup
1956 ENTRY(recover_and_dispatch_break_fault)
1957 #ifdef FAST_REFLECT_CNT
1958 movl r21=recover_to_break_fault_count;;
1959 ld8 r22=[r21];;
1960 adds r22=1,r22;;
1961 st8 [r21]=r22;;
1962 #endif
1963 mov b0=r29 ;;
1964 br.sptk.many dispatch_break_fault;;
1966 // Registers at entry
1967 // r17 = break immediate (XEN_HYPER_ITC_D or I)
1968 // r18 == XSI_PSR_IC_OFS
1969 // r31 == pr
1970 GLOBAL_ENTRY(hyper_itc)
1971 ENTRY(hyper_itc_i)
1972 // fall through, hyper_itc_d handles both i and d
1973 ENTRY(hyper_itc_d)
1974 #ifndef FAST_ITC
1975 br.sptk.many dispatch_break_fault ;;
1976 #endif
1977 // ensure itir.ps >= xen's pagesize
1978 adds r23=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18 ;;
1979 ld8 r23=[r23];;
1980 extr.u r24=r23,2,6;; // r24==logps
1981 cmp.gt p7,p0=PAGE_SHIFT,r24
1982 (p7) br.spnt.many dispatch_break_fault ;;
1983 adds r21=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;;
1984 ld8 r21=[r21];;
1985 // for now, punt on region0 inserts
1986 extr.u r21=r21,61,3;;
1987 cmp.eq p7,p0=r21,r0
1988 (p7) br.spnt.many dispatch_break_fault ;;
1989 movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
1990 ld8 r27=[r27];;
1991 adds r27=IA64_VCPU_DOMAIN_OFFSET,r27;;
1992 ld8 r27=[r27];;
1993 // FIXME: is the global var dom0 always pinned? assume so for now
1994 movl r28=dom0;;
1995 ld8 r28=[r28];;
1996 // FIXME: for now, only handle dom0 (see lookup_domain_mpa below)
1997 cmp.ne p7,p0=r27,r28
1998 (p7) br.spnt.many dispatch_break_fault ;;
1999 #ifdef FAST_HYPERPRIVOP_CNT
2000 cmp.eq p6,p7=XEN_HYPER_ITC_D,r17;;
2001 (p6) movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_ITC_D);;
2002 (p7) movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_ITC_I);;
2003 ld8 r21=[r20];;
2004 adds r21=1,r21;;
2005 st8 [r20]=r21;;
2006 #endif
2007 (p6) mov r17=2;;
2008 (p7) mov r17=3;;
2009 mov r29=b0 ;;
2010 movl r30=recover_and_dispatch_break_fault ;;
2011 mov r16=r8;;
2012 // fall through
2015 // fast_insert(PSCB(ifa),r24=ps,r16=pte)
2016 // r16 == pte
2017 // r17 == bit0: 1=inst, 0=data; bit1: 1=itc, 0=vcpu_translate
2018 // r18 == XSI_PSR_IC_OFS
2019 // r24 == ps
2020 // r29 == saved value of b0 in case of recovery
2021 // r30 == recovery ip if failure occurs
2022 // r31 == pr
2023 GLOBAL_ENTRY(fast_insert)
2024 // translate_domain_pte(r16=pteval,PSCB(ifa)=address,r24=itir)
2025 mov r19=1;;
2026 shl r20=r19,r24;;
2027 adds r20=-1,r20;; // r20 == mask
2028 movl r19=_PAGE_PPN_MASK;;
2029 and r22=r16,r19;; // r22 == pteval & _PAGE_PPN_MASK
2030 andcm r19=r22,r20;;
2031 adds r21=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;;
2032 ld8 r21=[r21];;
2033 and r20=r21,r20;;
2034 or r19=r19,r20;; // r19 == mpaddr
2035 // FIXME: for now, just do domain0 and skip mpaddr range checks
2036 dep r20=r0,r19,0,PAGE_SHIFT
2037 movl r21=PAGE_PHYS ;;
2038 or r20=r20,r21 ;; // r20==return value from lookup_domain_mpa
2039 // r16=pteval,r20=pteval2
2040 movl r19=_PAGE_PPN_MASK
2041 movl r21=_PAGE_PL_2;;
2042 andcm r25=r16,r19;; // r25==pteval & ~_PAGE_PPN_MASK
2043 and r22=r20,r19;;
2044 or r22=r22,r21;;
2045 or r22=r22,r25;; // r22==return value from translate_domain_pte
2046 // done with translate_domain_pte
2047 // now do vcpu_itc_no_srlz(vcpu,IorD,ifa,r22=pte,r16=mppte,r24=logps)
2048 // FIXME: for now, just domain0 and skip range check
2049 // psr.ic already cleared
2050 // NOTE: r24 still contains ps (from above)
2051 shladd r24=r24,2,r0;;
2052 mov cr.itir=r24;;
2053 adds r23=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;;
2054 ld8 r23=[r23];;
2055 mov cr.ifa=r23;;
2056 tbit.z p6,p7=r17,0;;
2057 (p6) itc.d r22;;
2058 (p7) itc.i r22;;
2059 dv_serialize_data
2060 // FIXME: how do I make assembler warnings go away here?
2061 // vhpt_insert(r23=vaddr,r22=pte,r24=logps<<2)
2062 thash r28=r23
2063 or r26=1,r22;;
2064 ttag r21=r23
2065 adds r25=8,r28
2066 mov r19=r28;;
2067 st8 [r25]=r24
2068 adds r20=16,r28;;
2069 st8 [r19]=r26
2070 st8 [r20]=r21;;
2071 // vcpu_set_tr_entry(trp,r22=pte|1,r24=itir,r23=ifa)
2072 // TR_ENTRY = {page_flags,itir,addr,rid}
2073 tbit.z p6,p7=r17,0;;
2074 movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
2075 ld8 r27=[r27];;
2076 adds r28=IA64_VCPU_STARTING_RID_OFFSET,r27
2077 (p6) adds r27=IA64_VCPU_DTLB_OFFSET,r27
2078 (p7) adds r27=IA64_VCPU_ITLB_OFFSET,r27;;
2079 st8 [r27]=r22,8;; // page_flags: already has pl >= 2 and p==1
2080 st8 [r27]=r24,8;; // itir
2081 mov r19=-4096;;
2082 and r23=r23,r19;;
2083 st8 [r27]=r23,8;; // ifa & ~0xfff
2084 adds r29 = XSI_RR0_OFS-XSI_PSR_IC_OFS,r18
2085 extr.u r25=r23,61,3;;
2086 shladd r29=r25,3,r29;;
2087 ld8 r29=[r29];;
2088 movl r20=0xffffff00;;
2089 and r29=r29,r20;;
2090 st8 [r27]=r29,-8;; // rid
2091 //if ps > 12
2092 cmp.eq p7,p0=12<<2,r24
2093 (p7) br.cond.sptk.many 1f;;
2094 // if (ps > 12) {
2095 // trp->ppn &= ~((1UL<<(ps-12))-1); trp->vadr &= ~((1UL<<ps)-1); }
2096 extr.u r29=r24,2,6
2097 mov r28=1;;
2098 shl r26=r28,r29;;
2099 adds r29=-12,r29;;
2100 shl r25=r28,r29;;
2101 mov r29=-1
2102 adds r26=-1,r26
2103 adds r25=-1,r25;;
2104 andcm r26=r29,r26 // ~((1UL<<ps)-1)
2105 andcm r25=r29,r25;; // ~((1UL<<(ps-12))-1)
2106 ld8 r29=[r27];;
2107 and r29=r29,r26;;
2108 st8 [r27]=r29,-16;;
2109 ld8 r29=[r27];;
2110 extr.u r28=r29,12,38;;
2111 movl r26=0xfffc000000000fff;;
2112 and r29=r29,r26
2113 and r28=r28,r25;;
2114 shl r28=r28,12;;
2115 or r29=r29,r28;;
2116 st8 [r27]=r29;;
2117 1: // done with vcpu_set_tr_entry
2118 //PSCBX(vcpu,i/dtlb_pte) = mp_pte
2119 movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
2120 ld8 r27=[r27];;
2121 tbit.z p6,p7=r17,0;;
2122 (p6) adds r27=IA64_VCPU_DTLB_PTE_OFFSET,r27
2123 (p7) adds r27=IA64_VCPU_ITLB_PTE_OFFSET,r27;;
2124 st8 [r27]=r16;;
2125 // done with vcpu_itc_no_srlz
2127 // if hyper_itc, increment to point to next instruction
2128 tbit.z p7,p0=r17,1
2129 (p7) br.cond.sptk.few no_inc_iip;;
2131 mov r29=cr.ipsr
2132 mov r30=cr.iip;;
2133 extr.u r26=r29,41,2 ;;
2134 cmp.eq p6,p7=2,r26 ;;
2135 (p6) mov r26=0
2136 (p6) adds r30=16,r30
2137 (p7) adds r26=1,r26
2138 ;;
2139 dep r29=r26,r29,41,2
2140 ;;
2141 mov cr.ipsr=r29
2142 mov cr.iip=r30;;
2144 no_inc_iip:
2145 mov pr=r31,-1 ;;
2146 rfi
2147 ;;
2148 END(fast_insert)