ia64/xen-unstable

changeset 17196:43a87df9a11e

[IA64] Hand optimize for hyperprivop

This patch slightly optimizes hyperprivop emulation especially hyper_rfi.
It shows about 2% faster in fstat system call on dom0.

Signed-off-by: Kouya Shimura <kouya@jp.fujitsu.com>
author Alex Williamson <alex.williamson@hp.com>
date Fri Mar 07 13:11:35 2008 -0700 (2008-03-07)
parents 6225df3ff209
children 7619c93e6028
files xen/arch/ia64/xen/hyperprivop.S
line diff
     1.1 --- a/xen/arch/ia64/xen/hyperprivop.S	Fri Mar 07 13:09:47 2008 -0700
     1.2 +++ b/xen/arch/ia64/xen/hyperprivop.S	Fri Mar 07 13:11:35 2008 -0700
     1.3 @@ -67,19 +67,18 @@
     1.4  //	r19 == ipsr.cpl
     1.5  //	r31 == pr
     1.6  GLOBAL_ENTRY(fast_hyperprivop)
     1.7 +	adds r20=XSI_PSR_I_ADDR_OFS-XSI_PSR_IC_OFS,r18
     1.8  	// HYPERPRIVOP_SSM_I?
     1.9  	// assumes domain interrupts pending, so just do it
    1.10  	cmp.eq p7,p6=HYPERPRIVOP_SSM_I,r17
    1.11  (p7)	br.sptk.many hyper_ssm_i;;
    1.12  
    1.13  	// Check pending event indication
    1.14 -	adds r20=XSI_PSR_I_ADDR_OFS-XSI_PSR_IC_OFS, r18;;
    1.15 -	ld8 r20=[r20]
    1.16 +	ld8 r20=[r20]		// interrupt_mask_addr
    1.17  	;;
    1.18  	ld1 r22=[r20],-1	// evtchn_upcall_mask
    1.19  	;;
    1.20  	ld1 r20=[r20]		// evtchn_upcall_pending
    1.21 -	;;
    1.22  
    1.23  	// HYPERPRIVOP_RFI?
    1.24  	cmp.eq p7,p6=HYPERPRIVOP_RFI,r17
    1.25 @@ -210,9 +209,8 @@ ENTRY(hyper_ssm_i)
    1.26  	// give up for now if: ipsr.be==1, ipsr.pp==1
    1.27  	mov r30=cr.ipsr
    1.28  	mov r29=cr.iip;;
    1.29 -	extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
    1.30 -	cmp.ne p7,p0=r21,r0
    1.31 -(p7)	br.sptk.many dispatch_break_fault ;;
    1.32 +	tbit.nz p7,p0=r30,IA64_PSR_PP_BIT
    1.33 +(p7)	br.spnt.many dispatch_break_fault ;;
    1.34  #ifdef FAST_HYPERPRIVOP_CNT
    1.35  	movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_SSM_I);;
    1.36  	ld4 r21=[r20];;
    1.37 @@ -220,8 +218,7 @@ ENTRY(hyper_ssm_i)
    1.38  	st4 [r20]=r21;;
    1.39  #endif
    1.40  	// set shared_mem iip to instruction after HYPER_SSM_I
    1.41 -	extr.u r20=r30,IA64_PSR_RI_BIT,2 ;;
    1.42 -	cmp.eq p6,p7=2,r20 ;;
    1.43 +	tbit.nz p6,p7=r30,IA64_PSR_RI_BIT+1 ;;	// cr.ipsr.ri >= 2 ?
    1.44  (p6)	mov r20=0
    1.45  (p6)	adds r29=16,r29
    1.46  (p7)	adds r20=1,r20 ;;
    1.47 @@ -346,8 +343,7 @@ GLOBAL_ENTRY(fast_tick_reflect)
    1.48  (p6)	br.cond.spnt.few rp;;
    1.49  	mov r17=cr.ipsr;;
    1.50  	// slow path if: ipsr.pp==1
    1.51 -	extr.u r21=r17,IA64_PSR_PP_BIT,1 ;;
    1.52 -	cmp.ne p6,p0=r21,r0
    1.53 +	tbit.nz p6,p0=r17,IA64_PSR_PP_BIT
    1.54  (p6)	br.cond.spnt.few rp;;
    1.55  	// definitely have a domain tick
    1.56  	mov cr.eoi=r0
    1.57 @@ -537,8 +533,7 @@ GLOBAL_ENTRY(fast_break_reflect)
    1.58  #endif
    1.59  	mov r30=cr.ipsr
    1.60  	mov r29=cr.iip;;
    1.61 -	extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
    1.62 -	cmp.ne p7,p0=r21,r0
    1.63 +	tbit.nz p7,p0=r30,IA64_PSR_PP_BIT
    1.64  (p7)	br.spnt.few dispatch_break_fault ;;
    1.65          movl r20=IA64_PSR_CPL ;; 
    1.66          and r22=r20,r30 ;;
    1.67 @@ -722,8 +717,7 @@ GLOBAL_ENTRY(fast_access_reflect)
    1.68  #endif
    1.69  	mov r30=cr.ipsr
    1.70  	mov r29=cr.iip;;
    1.71 -	extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
    1.72 -	cmp.ne p7,p0=r21,r0
    1.73 +	tbit.nz p7,p0=r30,IA64_PSR_PP_BIT
    1.74  (p7)	br.spnt.few dispatch_reflection ;;
    1.75  	extr.u r21=r30,IA64_PSR_CPL0_BIT,2 ;;
    1.76  	cmp.eq p7,p0=r21,r0
    1.77 @@ -769,8 +763,7 @@ GLOBAL_ENTRY(fast_tlb_miss_reflect)
    1.78  	cmp.eq p7,p0=r21,r0
    1.79  (p7)	br.spnt.few page_fault ;;
    1.80  	// slow path if strange ipsr or isr bits set
    1.81 -	extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
    1.82 -	cmp.ne p7,p0=r21,r0
    1.83 +	tbit.nz p7,p0=r30,IA64_PSR_PP_BIT,1
    1.84  (p7)	br.spnt.few page_fault ;;
    1.85  	movl r21=IA64_ISR_IR|IA64_ISR_SP|IA64_ISR_NA ;;
    1.86  	and r21=r16,r21;;
    1.87 @@ -1023,45 +1016,27 @@ ENTRY(hyper_rfi)
    1.88  #ifndef FAST_RFI
    1.89  	br.spnt.few slow_vcpu_rfi ;;
    1.90  #endif
    1.91 -	// if no interrupts pending, proceed
    1.92 -	mov r30=r0
    1.93 -	cmp.eq p7,p0=r20,r0
    1.94 -(p7)	br.sptk.many 1f
    1.95 -	;;
    1.96 -	adds r20=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;;
    1.97 -	ld8 r21=[r20];;		// r21 = vcr.ipsr
    1.98 -	extr.u r22=r21,IA64_PSR_I_BIT,1 ;;
    1.99 -	mov r30=r22;;
   1.100 -	// r30 determines whether we might deliver an immediate extint
   1.101 -#ifndef RFI_TO_INTERRUPT // see beginning of file
   1.102 -	cmp.ne p6,p0=r30,r0
   1.103 -(p6)	br.cond.spnt.few slow_vcpu_rfi ;;
   1.104 -#endif
   1.105 -1:
   1.106 -	adds r20=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;;
   1.107 -	ld8 r21=[r20];;		// r21 = vcr.ipsr
   1.108 +	// if interrupts pending and vcr.ipsr.i=1, do it the slow way
   1.109 +	adds r19=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18
   1.110 +	adds r23=XSI_METAPHYS_OFS-XSI_PSR_IC_OFS,r18
   1.111 +	cmp.ne p8,p0=r20,r0;;	// evtchn_upcall_pending != 0
   1.112  	// if (!(vpsr.dt && vpsr.rt && vpsr.it)), do it the slow way
   1.113 -	movl r20=(IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_IT);;
   1.114 -	and r22=r20,r21
   1.115 -	;;
   1.116 -	cmp.ne p7,p0=r22,r20
   1.117 -(p7)	br.spnt.few slow_vcpu_rfi ;;
   1.118 +	ld8 r21=[r19],XSI_IIP_OFS-XSI_IPSR_OFS // r21=vcr.ipsr
   1.119 +	movl r20=~(IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_IT);;
   1.120 +	or r20=r20,r21
   1.121 +	// p8 determines whether we might deliver an immediate extint
   1.122 +(p8)	tbit.nz p8,p0=r21,IA64_PSR_I_BIT;;
   1.123 +	cmp.ne p7,p0=-1,r20
   1.124 +	ld4 r23=[r23]	// r23=metaphysical_mode
   1.125 +#ifndef RFI_TO_INTERRUPT	// see beginning of file
   1.126 +(p8)	br.cond.spnt.few slow_vcpu_rfi
   1.127 +#endif
   1.128 +(p7)	br.spnt.few slow_vcpu_rfi;;
   1.129  	// if was in metaphys mode, do it the slow way (FIXME later?)
   1.130 -	adds r20=XSI_METAPHYS_OFS-XSI_PSR_IC_OFS,r18 ;;
   1.131 -	ld4 r20=[r20];;
   1.132 -	cmp.ne p7,p0=r20,r0
   1.133 -(p7)	br.spnt.few slow_vcpu_rfi ;;
   1.134 -#if 0
   1.135 -	// if domain hasn't already done virtual bank switch
   1.136 -	//  do it the slow way (FIXME later?)
   1.137 -	adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
   1.138 -	ld4 r20=[r20];;
   1.139 -	cmp.eq p7,p0=r20,r0
   1.140 -(p7)	br.spnt.few slow_vcpu_rfi ;;
   1.141 -#endif
   1.142 -	adds r20=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;;
   1.143 -	ld8 r22=[r20];;
   1.144 -1:	// OK now, let's do an rfi.
   1.145 +	cmp.ne p7,p0=r23,r0
   1.146 +	ld8 r22=[r19]	// r22=vcr.iip
   1.147 +(p7)	br.spnt.few slow_vcpu_rfi;;
   1.148 +	// OK now, let's do an rfi.
   1.149  #ifdef FAST_HYPERPRIVOP_CNT
   1.150  	movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_RFI);;
   1.151  	ld4 r23=[r20];;
   1.152 @@ -1070,8 +1045,7 @@ 1:	// OK now, let's do an rfi.
   1.153  #endif
   1.154  #ifdef RFI_TO_INTERRUPT
   1.155  	// maybe do an immediate interrupt delivery?
   1.156 -	cmp.ne p6,p0=r30,r0
   1.157 -(p6)	br.cond.spnt.few rfi_check_extint;;
   1.158 +(p8)	br.cond.spnt.few rfi_check_extint;;
   1.159  #endif
   1.160  
   1.161  just_do_rfi:
   1.162 @@ -1090,15 +1064,13 @@ just_do_rfi:
   1.163  	// vpsr.i = vcr.ipsr.i; vpsr.ic = vcr.ipsr.ic
   1.164  	ld8 r20=[r20]
   1.165  	mov r19=1 
   1.166 -	extr.u r23=r21,IA64_PSR_I_BIT,1 ;;
   1.167 -	cmp.ne p7,p6=r23,r0 ;;
   1.168 +	tbit.nz p7,p6=r21,IA64_PSR_I_BIT
   1.169 +	tbit.nz p9,p8=r21,IA64_PSR_IC_BIT;;
   1.170  	// not done yet
   1.171  (p7)	st1 [r20]=r0
   1.172 -(p6)	st1 [r20]=r19;;
   1.173 -	extr.u r23=r21,IA64_PSR_IC_BIT,1 ;;
   1.174 -	cmp.ne p7,p6=r23,r0 ;;
   1.175 -(p7)	st4 [r18]=r19;;
   1.176 -(p6)	st4 [r18]=r0;;
   1.177 +(p6)	st1 [r20]=r19
   1.178 +(p9)	st4 [r18]=r19
   1.179 +(p8)	st4 [r18]=r0
   1.180  	// force on psr.ic, i, dt, rt, it, bn
   1.181  	movl r20=(IA64_PSR_I|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT| \
   1.182  	          IA64_PSR_IT|IA64_PSR_BN)
   1.183 @@ -1228,9 +1200,8 @@ ENTRY(rfi_check_extint)
   1.184  	// r26 now contains the vector [0..255]
   1.185  	adds r20=XSI_TPR_OFS-XSI_PSR_IC_OFS,r18 ;;
   1.186  	ld8 r20=[r20] ;;
   1.187 -	extr.u r28=r20,16,1
   1.188 -	extr.u r29=r20,4,4 ;;
   1.189 -	cmp.ne p6,p0=r28,r0	// if tpr.mmi is set, just rfi
   1.190 +	extr.u r29=r20,4,4
   1.191 +	tbit.nz p6,p0=r20,16	// if tpr.mmi is set, just rfi
   1.192  (p6)	br.cond.spnt.few just_do_rfi;;
   1.193  	shl r29=r29,4;;
   1.194  	adds r29=15,r29;;