ia64/xen-unstable
changeset 5796:89d92ce10924
Add fast path for thash hyperprivop
Add fast path for ptc.ga hyperprivop
Add fast path for iaccess/daccess reflect
Add fast path for ptc.ga hyperprivop
Add fast path for iaccess/daccess reflect
author | djm@kirby.fc.hp.com |
---|---|
date | Sat Jul 09 07:37:13 2005 -0700 (2005-07-09) |
parents | 0e7741276468 |
children | ca44d2dbb273 |
files | xen/arch/ia64/asm-offsets.c xen/arch/ia64/hyperprivop.S xen/arch/ia64/ivt.S |
line diff
1.1 --- a/xen/arch/ia64/asm-offsets.c Sat Jul 09 07:36:13 2005 -0700 1.2 +++ b/xen/arch/ia64/asm-offsets.c Sat Jul 09 07:37:13 2005 -0700 1.3 @@ -46,6 +46,8 @@ void foo(void) 1.4 DEFINE(XSI_PSR_IC, (SHAREDINFO_ADDR+offsetof(vcpu_info_t, arch.interrupt_collection_enabled))); 1.5 DEFINE(XSI_PSR_I_OFS, offsetof(vcpu_info_t, arch.interrupt_delivery_enabled)); 1.6 DEFINE(XSI_IIP_OFS, offsetof(vcpu_info_t, arch.iip)); 1.7 + DEFINE(XSI_IFA_OFS, offsetof(vcpu_info_t, arch.ifa)); 1.8 + DEFINE(XSI_ITIR_OFS, offsetof(vcpu_info_t, arch.itir)); 1.9 DEFINE(XSI_IPSR, (SHAREDINFO_ADDR+offsetof(vcpu_info_t, arch.ipsr))); 1.10 DEFINE(XSI_IPSR_OFS, offsetof(vcpu_info_t, arch.ipsr)); 1.11 DEFINE(XSI_IFS_OFS, offsetof(vcpu_info_t, arch.ifs)); 1.12 @@ -61,6 +63,7 @@ void foo(void) 1.13 DEFINE(XSI_PEND_OFS, offsetof(vcpu_info_t, arch.pending_interruption)); 1.14 DEFINE(XSI_RR0_OFS, offsetof(vcpu_info_t, arch.rrs[0])); 1.15 DEFINE(XSI_TPR_OFS, offsetof(vcpu_info_t, arch.tpr)); 1.16 + DEFINE(XSI_PTA_OFS, offsetof (vcpu_info_t, arch.pta)); 1.17 DEFINE(XSI_ITV_OFS, offsetof(vcpu_info_t, arch.itv)); 1.18 //DEFINE(IA64_TASK_BLOCKED_OFFSET,offsetof (struct task_struct, blocked)); 1.19 //DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, clear_child_tid)); 1.20 @@ -85,6 +88,8 @@ void foo(void) 1.21 DEFINE(IA64_VCPU_ENDING_RID_OFFSET, offsetof (struct vcpu, arch.ending_rid)); 1.22 DEFINE(IA64_VCPU_DOMAIN_ITM_OFFSET, offsetof (struct vcpu, arch.domain_itm)); 1.23 DEFINE(IA64_VCPU_DOMAIN_ITM_LAST_OFFSET, offsetof (struct vcpu, arch.domain_itm_last)); 1.24 + DEFINE(IA64_VCPU_ITLB_OFFSET, offsetof (struct vcpu, arch.itlb)); 1.25 + DEFINE(IA64_VCPU_DTLB_OFFSET, offsetof (struct vcpu, arch.dtlb)); 1.26 1.27 BLANK(); 1.28 DEFINE(IA64_CPUINFO_ITM_NEXT_OFFSET, offsetof (struct cpuinfo_ia64, itm_next));
2.1 --- a/xen/arch/ia64/hyperprivop.S Sat Jul 09 07:36:13 2005 -0700 2.2 +++ b/xen/arch/ia64/hyperprivop.S Sat Jul 09 07:37:13 2005 -0700 2.3 @@ -125,6 +125,10 @@ 1: // when we get to here r20=~=interrup 2.4 cmp.eq p7,p6=XEN_HYPER_ITC_I,r17 2.5 (p7) br.sptk.many hyper_itc_i;; 2.6 2.7 + // HYPERPRIVOP_THASH? 2.8 + cmp.eq p7,p6=XEN_HYPER_THASH,r17 2.9 +(p7) br.sptk.many hyper_thash;; 2.10 + 2.11 // if not one of the above, give up for now and do it the slow way 2.12 br.sptk.many dispatch_break_fault ;; 2.13 2.14 @@ -440,7 +444,6 @@ fast_tick_reflect_done: 2.15 END(fast_tick_reflect) 2.16 2.17 // reflect domain breaks directly to domain 2.18 -// FIXME: DOES NOT WORK YET 2.19 // r16 == cr.isr 2.20 // r17 == cr.iim 2.21 // r18 == XSI_PSR_IC 2.22 @@ -471,15 +474,30 @@ GLOBAL_ENTRY(fast_break_reflect) 2.23 cmp.eq p7,p0=r22,r17; 2.24 (p7) br.spnt.few dispatch_break_fault ;; 2.25 #endif 2.26 -#ifdef FAST_REFLECT_CNT 2.27 - movl r20=fast_reflect_count+((0x2c00>>8)*8);; 2.28 - ld8 r21=[r20];; 2.29 - adds r21=1,r21;; 2.30 - st8 [r20]=r21;; 2.31 -#endif 2.32 + movl r20=0x2c00; 2.33 // save iim in shared_info 2.34 adds r21=XSI_IIM_OFS-XSI_PSR_IC_OFS,r18 ;; 2.35 st8 [r21]=r17;; 2.36 + // fall through 2.37 + 2.38 + 2.39 +// reflect to domain ivt+r20 2.40 +// sets up isr,iip,ipsr,ifs (FIXME: do iipa too) 2.41 +// r16 == cr.isr 2.42 +// r18 == XSI_PSR_IC 2.43 +// r20 == offset into ivt 2.44 +// r29 == iip 2.45 +// r30 == ipsr 2.46 +// r31 == pr 2.47 +ENTRY(fast_reflect) 2.48 +#ifdef FAST_REFLECT_CNT 2.49 + movl r22=fast_reflect_count; 2.50 + shr r23=r20,5;; 2.51 + add r22=r22,r23;; 2.52 + ld8 r21=[r22];; 2.53 + adds r21=1,r21;; 2.54 + st8 [r22]=r21;; 2.55 +#endif 2.56 // save iip in shared_info (DON'T POINT TO NEXT INSTRUCTION!) 2.57 adds r21=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;; 2.58 st8 [r21]=r29;; 2.59 @@ -504,9 +522,9 @@ GLOBAL_ENTRY(fast_break_reflect) 2.60 or r30=r30,r28;; 2.61 and r30=r30,r27;; 2.62 // also set shared_mem ipsr.i and ipsr.ic appropriately 2.63 - ld8 r20=[r18];; 2.64 - extr.u r22=r20,32,32 2.65 - cmp4.eq p6,p7=r20,r0;; 2.66 + ld8 r24=[r18];; 2.67 + extr.u r22=r24,32,32 2.68 + cmp4.eq p6,p7=r24,r0;; 2.69 (p6) dep r30=0,r30,IA64_PSR_IC_BIT,1 2.70 (p7) dep r30=-1,r30,IA64_PSR_IC_BIT,1 ;; 2.71 cmp4.eq p6,p7=r22,r0;; 2.72 @@ -520,13 +538,13 @@ GLOBAL_ENTRY(fast_break_reflect) 2.73 // cover and set shared_mem precover_ifs to cr.ifs 2.74 // set shared_mem ifs and incomplete_regframe to 0 2.75 cover ;; 2.76 - mov r20=cr.ifs;; 2.77 + mov r24=cr.ifs;; 2.78 adds r21=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;; 2.79 st4 [r21]=r0 ;; 2.80 adds r21=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;; 2.81 st8 [r21]=r0 ;; 2.82 adds r21=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;; 2.83 - st8 [r21]=r20 ;; 2.84 + st8 [r21]=r24 ;; 2.85 // vpsr.i = vpsr.ic = 0 on delivery of interruption 2.86 st8 [r18]=r0;; 2.87 // FIXME: need to save iipa and isr to be arch-compliant 2.88 @@ -534,22 +552,30 @@ GLOBAL_ENTRY(fast_break_reflect) 2.89 mov r22=IA64_KR(CURRENT);; 2.90 adds r22=IA64_VCPU_IVA_OFFSET,r22;; 2.91 ld8 r23=[r22];; 2.92 - movl r24=0x2c00;; 2.93 - add r24=r24,r23;; 2.94 - mov cr.iip=r24;; 2.95 + add r20=r20,r23;; 2.96 + mov cr.iip=r20;; 2.97 // OK, now all set to go except for switch to virtual bank0 2.98 mov r30=r2; mov r29=r3;; 2.99 adds r2=XSI_BANK1_OFS-XSI_PSR_IC_OFS,r18; 2.100 adds r3=(XSI_BANK1_OFS+8)-XSI_PSR_IC_OFS,r18;; 2.101 bsw.1;; 2.102 - st8 [r2]=r16,16; st8 [r3]=r17,16 ;; 2.103 - st8 [r2]=r18,16; st8 [r3]=r19,16 ;; 2.104 - st8 [r2]=r20,16; st8 [r3]=r21,16 ;; 2.105 - st8 [r2]=r22,16; st8 [r3]=r23,16 ;; 2.106 - st8 [r2]=r24,16; st8 [r3]=r25,16 ;; 2.107 - st8 [r2]=r26,16; st8 [r3]=r27,16 ;; 2.108 - st8 [r2]=r28,16; st8 [r3]=r29,16 ;; 2.109 - st8 [r2]=r30,16; st8 [r3]=r31,16 ;; 2.110 + // FIXME: need to handle ar.unat! 2.111 + .mem.offset 0,0; st8.spill [r2]=r16,16; 2.112 + .mem.offset 8,0; st8.spill [r3]=r17,16 ;; 2.113 + .mem.offset 0,0; st8.spill [r2]=r18,16; 2.114 + .mem.offset 8,0; st8.spill [r3]=r19,16 ;; 2.115 + .mem.offset 0,0; st8.spill [r2]=r20,16; 2.116 + .mem.offset 8,0; st8.spill [r3]=r21,16 ;; 2.117 + .mem.offset 0,0; st8.spill [r2]=r22,16; 2.118 + .mem.offset 8,0; st8.spill [r3]=r23,16 ;; 2.119 + .mem.offset 0,0; st8.spill [r2]=r24,16; 2.120 + .mem.offset 8,0; st8.spill [r3]=r25,16 ;; 2.121 + .mem.offset 0,0; st8.spill [r2]=r26,16; 2.122 + .mem.offset 8,0; st8.spill [r3]=r27,16 ;; 2.123 + .mem.offset 0,0; st8.spill [r2]=r28,16; 2.124 + .mem.offset 8,0; st8.spill [r3]=r29,16 ;; 2.125 + .mem.offset 0,0; st8.spill [r2]=r30,16; 2.126 + .mem.offset 8,0; st8.spill [r3]=r31,16 ;; 2.127 movl r31=XSI_IPSR;; 2.128 bsw.0 ;; 2.129 mov r2=r30; mov r3=r29;; 2.130 @@ -559,6 +585,41 @@ GLOBAL_ENTRY(fast_break_reflect) 2.131 rfi 2.132 ;; 2.133 2.134 +// reflect access faults (0x2400,0x2800,0x5300) directly to domain 2.135 +// r16 == isr 2.136 +// r17 == ifa 2.137 +// r19 == reflect number (only pass-thru to dispatch_reflection) 2.138 +// r20 == offset into ivt 2.139 +// r31 == pr 2.140 +GLOBAL_ENTRY(fast_access_reflect) 2.141 + mov r30=cr.ipsr;; 2.142 + mov r29=cr.iip;; 2.143 + extr.u r21=r30,IA64_PSR_BE_BIT,1 ;; 2.144 + cmp.ne p7,p0=r21,r0 2.145 +(p7) br.spnt.few dispatch_reflection ;; 2.146 + extr.u r21=r30,IA64_PSR_PP_BIT,1 ;; 2.147 + cmp.ne p7,p0=r21,r0 2.148 +(p7) br.spnt.few dispatch_reflection ;; 2.149 + extr.u r21=r30,IA64_PSR_CPL0_BIT,2 ;; 2.150 + cmp.eq p7,p0=r21,r0 2.151 +(p7) br.spnt.few dispatch_reflection ;; 2.152 + movl r18=XSI_PSR_IC;; 2.153 + ld8 r21=[r18];; 2.154 + cmp.eq p7,p0=r0,r21 2.155 +(p7) br.spnt.few dispatch_reflection ;; 2.156 + // set shared_mem ifa, FIXME: should we validate it? 2.157 + mov r17=cr.ifa;; 2.158 + adds r21=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;; 2.159 + st8 [r21]=r17 ;; 2.160 + // get rr[ifa] and save to itir in shared memory (extra bits ignored) 2.161 + shr.u r22=r17,61 2.162 + adds r23=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18 2.163 + adds r21=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18 ;; 2.164 + shladd r22=r22,3,r21;; 2.165 + ld8 r22=[r22];; 2.166 + st8 [r23]=r22;; 2.167 + br.cond.sptk.many fast_reflect;; 2.168 + 2.169 2.170 // ensure that, if giving up, registers at entry to fast_hyperprivop unchanged 2.171 ENTRY(hyper_rfi) 2.172 @@ -1312,8 +1373,146 @@ 1: mov r24=cr.ipsr 2.173 ;; 2.174 END(hyper_set_rr) 2.175 2.176 +// this routine was derived from optimized assembly output from 2.177 +// vcpu_thash so it is dense and difficult to read but it works 2.178 +// On entry: 2.179 +// r18 == XSI_PSR_IC 2.180 +// r31 == pr 2.181 +GLOBAL_ENTRY(hyper_thash) 2.182 +#ifdef FAST_HYPERPRIVOP_CNT 2.183 + movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_THASH);; 2.184 + ld8 r21=[r20];; 2.185 + adds r21=1,r21;; 2.186 + st8 [r20]=r21;; 2.187 +#endif 2.188 + shr.u r20 = r8, 61 2.189 + addl r25 = 1, r0 2.190 + movl r17 = 0xe000000000000000 2.191 + ;; 2.192 + and r21 = r17, r8 // VHPT_Addr1 2.193 + ;; 2.194 + shladd r28 = r20, 3, r18 2.195 + adds r19 = XSI_PTA_OFS-XSI_PSR_IC_OFS, r18 2.196 + ;; 2.197 + adds r27 = XSI_RR0_OFS-XSI_PSR_IC_OFS, r28 2.198 + addl r28 = 32767, r0 2.199 + ld8 r24 = [r19] // pta 2.200 + ;; 2.201 + ld8 r23 = [r27] // rrs[vadr>>61] 2.202 + extr.u r26 = r24, 2, 6 2.203 + ;; 2.204 + extr.u r22 = r23, 2, 6 2.205 + shl r30 = r25, r26 2.206 + ;; 2.207 + shr.u r19 = r8, r22 2.208 + shr.u r29 = r24, 15 2.209 + ;; 2.210 + adds r17 = -1, r30 2.211 + ;; 2.212 + shladd r27 = r19, 3, r0 2.213 + extr.u r26 = r17, 15, 46 2.214 + ;; 2.215 + andcm r24 = r29, r26 2.216 + and r19 = r28, r27 2.217 + shr.u r25 = r27, 15 2.218 + ;; 2.219 + and r23 = r26, r25 2.220 + ;; 2.221 + or r22 = r24, r23 2.222 + ;; 2.223 + dep.z r20 = r22, 15, 46 2.224 + ;; 2.225 + or r16 = r20, r21 2.226 + ;; 2.227 + or r8 = r19, r16 2.228 + // done, update iip/ipsr to next instruction 2.229 + mov r24=cr.ipsr 2.230 + mov r25=cr.iip;; 2.231 + extr.u r26=r24,41,2 ;; 2.232 + cmp.eq p6,p7=2,r26 ;; 2.233 +(p6) mov r26=0 2.234 +(p6) adds r25=16,r25 2.235 +(p7) adds r26=1,r26 2.236 + ;; 2.237 + dep r24=r26,r24,41,2 2.238 + ;; 2.239 + mov cr.ipsr=r24 2.240 + mov cr.iip=r25 2.241 + mov pr=r31,-1 ;; 2.242 + rfi 2.243 + ;; 2.244 +END(hyper_thash) 2.245 + 2.246 ENTRY(hyper_ptc_ga) 2.247 - br.spnt.many dispatch_break_fault ;; 2.248 +#ifdef CONFIG_SMP 2.249 +FIXME: ptc.ga instruction requires spinlock for SMP 2.250 +#endif 2.251 + // FIXME: validate not flushing Xen addresses 2.252 +#ifdef FAST_HYPERPRIVOP_CNT 2.253 + movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_PTC_GA);; 2.254 + ld8 r21=[r20];; 2.255 + adds r21=1,r21;; 2.256 + st8 [r20]=r21;; 2.257 +#endif 2.258 + mov r28=r8 2.259 + extr.u r19=r9,2,6 // addr_range=1<<((r9&0xfc)>>2) 2.260 + mov r20=1 2.261 + shr.u r24=r8,61 2.262 + addl r27=56,r0 // PAGE_SHIFT<<2 (for ptc.ga) 2.263 + movl r26=0x8000000000000000 // INVALID_TI_TAG 2.264 + mov r30=ar.lc 2.265 + ;; 2.266 + shl r19=r20,r19 2.267 + cmp.eq p7,p0=7,r24 2.268 +(p7) br.spnt.many dispatch_break_fault ;; // slow way for rr7 2.269 + ;; 2.270 + cmp.le p7,p0=r19,r0 // skip flush if size<=0 2.271 +(p7) br.cond.dpnt 2f ;; 2.272 + extr.u r24=r19,0,PAGE_SHIFT 2.273 + shr.u r23=r19,PAGE_SHIFT ;; // repeat loop for n pages 2.274 + cmp.ne p7,p0=r24,r0 ;; 2.275 +(p7) adds r23=1,r23 ;; // n_pages<size<n_pages+1? extra iter 2.276 + mov ar.lc=r23 2.277 + movl r29=PAGE_SIZE;; 2.278 +1: 2.279 + thash r25=r28 ;; 2.280 + adds r25=16,r25 ;; 2.281 + ld8 r24=[r25] ;; 2.282 + // FIXME: should check if tag matches, not just blow it away 2.283 + or r24=r26,r24 ;; // vhpt_entry->ti_tag = 1 2.284 + st8 [r25]=r24 2.285 + ptc.ga r28,r27 ;; 2.286 + srlz.i ;; 2.287 + add r28=r29,r28 2.288 + br.cloop.sptk.few 1b 2.289 + ;; 2.290 +2: 2.291 + mov ar.lc=r30 ;; 2.292 + mov r29=cr.ipsr 2.293 + mov r30=cr.iip;; 2.294 + mov r27=IA64_KR(CURRENT);; 2.295 + adds r25=IA64_VCPU_DTLB_OFFSET,r27 2.296 + adds r26=IA64_VCPU_ITLB_OFFSET,r27;; 2.297 + ld8 r24=[r25] 2.298 + ld8 r27=[r26] ;; 2.299 + and r24=-2,r24 2.300 + and r27=-2,r27 ;; 2.301 + st8 [r25]=r24 // set 1-entry i/dtlb as not present 2.302 + st8 [r26]=r27 ;; 2.303 + // increment to point to next instruction 2.304 + extr.u r26=r29,41,2 ;; 2.305 + cmp.eq p6,p7=2,r26 ;; 2.306 +(p6) mov r26=0 2.307 +(p6) adds r30=16,r30 2.308 +(p7) adds r26=1,r26 2.309 + ;; 2.310 + dep r29=r26,r29,41,2 2.311 + ;; 2.312 + mov cr.ipsr=r29 2.313 + mov cr.iip=r30 2.314 + mov pr=r31,-1 ;; 2.315 + rfi 2.316 + ;; 2.317 END(hyper_ptc_ga) 2.318 2.319 ENTRY(hyper_itc_d) 2.320 @@ -1323,5 +1522,3 @@ END(hyper_itc_d) 2.321 ENTRY(hyper_itc_i) 2.322 br.spnt.many dispatch_break_fault ;; 2.323 END(hyper_itc_i) 2.324 - 2.325 -// ignore me
3.1 --- a/xen/arch/ia64/ivt.S Sat Jul 09 07:36:13 2005 -0700 3.2 +++ b/xen/arch/ia64/ivt.S Sat Jul 09 07:37:13 2005 -0700 3.3 @@ -666,7 +666,12 @@ END(dirty_bit) 3.4 // 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27) 3.5 ENTRY(iaccess_bit) 3.6 #ifdef XEN 3.7 - REFLECT(9) 3.8 + mov r31=pr; 3.9 + mov r16=cr.isr 3.10 + mov r17=cr.ifa 3.11 + mov r19=9 3.12 + movl r20=0x2400 3.13 + br.sptk.many fast_access_reflect;; 3.14 #endif 3.15 DBG_FAULT(9) 3.16 // Like Entry 8, except for instruction access 3.17 @@ -734,7 +739,12 @@ END(iaccess_bit) 3.18 // 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55) 3.19 ENTRY(daccess_bit) 3.20 #ifdef XEN 3.21 - REFLECT(10) 3.22 + mov r31=pr; 3.23 + mov r16=cr.isr 3.24 + mov r17=cr.ifa 3.25 + mov r19=10 3.26 + movl r20=0x2800 3.27 + br.sptk.many fast_access_reflect;; 3.28 #endif 3.29 DBG_FAULT(10) 3.30 // Like Entry 8, except for data access 3.31 @@ -1395,7 +1405,12 @@ END(iaccess_rights) 3.32 // 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53) 3.33 ENTRY(daccess_rights) 3.34 #ifdef XEN 3.35 - REFLECT(23) 3.36 + mov r31=pr; 3.37 + mov r16=cr.isr 3.38 + mov r17=cr.ifa 3.39 + mov r19=23 3.40 + movl r20=0x5300 3.41 + br.sptk.many fast_access_reflect;; 3.42 #endif 3.43 DBG_FAULT(23) 3.44 mov r16=cr.ifa 3.45 @@ -1821,7 +1836,7 @@ END(ia32_interrupt) 3.46 3.47 #ifdef XEN 3.48 .org ia64_ivt+0x8000 3.49 -ENTRY(dispatch_reflection) 3.50 +GLOBAL_ENTRY(dispatch_reflection) 3.51 /* 3.52 * Input: 3.53 * psr.ic: off