ia64/xen-unstable

changeset 7331:18390e605e19

Fast hyperprivop for itc.i and itc.d (works, but default off for stability)
author djm@kirby.fc.hp.com
date Wed Oct 12 10:56:14 2005 -0600 (2005-10-12)
parents c8eb9d82dd4b
children 54b112b314fe
files xen/arch/ia64/asm-offsets.c xen/arch/ia64/xen/hyperprivop.S
line diff
     1.1 --- a/xen/arch/ia64/asm-offsets.c	Wed Oct 12 09:26:23 2005 -0600
     1.2 +++ b/xen/arch/ia64/asm-offsets.c	Wed Oct 12 10:56:14 2005 -0600
     1.3 @@ -82,10 +82,13 @@ void foo(void)
     1.4  	DEFINE(IA64_TASK_THREAD_KSP_OFFSET, offsetof (struct vcpu, arch._thread.ksp));
     1.5  	DEFINE(IA64_TASK_THREAD_ON_USTACK_OFFSET, offsetof (struct vcpu, arch._thread.on_ustack));
     1.6  
     1.7 +	DEFINE(IA64_VCPU_DOMAIN_OFFSET, offsetof (struct vcpu, domain));
     1.8  	DEFINE(IA64_VCPU_META_RR0_OFFSET, offsetof (struct vcpu, arch.metaphysical_rr0));
     1.9  	DEFINE(IA64_VCPU_META_SAVED_RR0_OFFSET, offsetof (struct vcpu, arch.metaphysical_saved_rr0));
    1.10  	DEFINE(IA64_VCPU_BREAKIMM_OFFSET, offsetof (struct vcpu, arch.breakimm));
    1.11  	DEFINE(IA64_VCPU_IVA_OFFSET, offsetof (struct vcpu, arch.iva));
    1.12 +	DEFINE(IA64_VCPU_DTLB_PTE_OFFSET, offsetof (struct vcpu, arch.dtlb_pte));
    1.13 +	DEFINE(IA64_VCPU_ITLB_PTE_OFFSET, offsetof (struct vcpu, arch.itlb_pte));
    1.14  	DEFINE(IA64_VCPU_IRR0_OFFSET, offsetof (struct vcpu, arch.irr[0]));
    1.15  	DEFINE(IA64_VCPU_IRR3_OFFSET, offsetof (struct vcpu, arch.irr[3]));
    1.16  	DEFINE(IA64_VCPU_INSVC3_OFFSET, offsetof (struct vcpu, arch.insvc[3]));
     2.1 --- a/xen/arch/ia64/xen/hyperprivop.S	Wed Oct 12 09:26:23 2005 -0600
     2.2 +++ b/xen/arch/ia64/xen/hyperprivop.S	Wed Oct 12 10:56:14 2005 -0600
     2.3 @@ -14,6 +14,10 @@
     2.4  #include <asm/system.h>
     2.5  #include <public/arch-ia64.h>
     2.6  
     2.7 +#define	_PAGE_PPN_MASK	0x0003fffffffff000 //asm/pgtable.h doesn't do assembly
     2.8 +#define PAGE_PHYS	0x0010000000000761 //__pgprot(__DIRTY_BITS|_PAGE_PL_2|_PAGE_AR_RWX)
     2.9 +#define _PAGE_PL_2	(2<<7)
    2.10 +
    2.11  #if 1	 // change to 0 to turn off all fast paths
    2.12  #define FAST_HYPERPRIVOPS
    2.13  #define FAST_HYPERPRIVOP_CNT
    2.14 @@ -24,6 +28,7 @@
    2.15  #define FAST_RFI
    2.16  #define FAST_SSM_I
    2.17  #define FAST_PTC_GA
    2.18 +#undef FAST_ITC	// working but default off for now
    2.19  #undef RFI_TO_INTERRUPT // not working yet
    2.20  #endif
    2.21  
    2.22 @@ -1663,10 +1668,159 @@ 2:
    2.23  	;;
    2.24  END(hyper_ptc_ga)
    2.25  
    2.26 +//  Registers at entry
    2.27 +//	r17 = break immediate (XEN_HYPER_ITC_D or I)
    2.28 +//	r18 == XSI_PSR_IC_OFS
    2.29 +//	r31 == pr
    2.30 +GLOBAL_ENTRY(hyper_itc)
    2.31 +ENTRY(hyper_itc_i)
    2.32 +	// fall through, hyper_itc_d handles both i and d
    2.33  ENTRY(hyper_itc_d)
    2.34 +#ifndef FAST_ITC
    2.35  	br.spnt.many dispatch_break_fault ;;
    2.36 -END(hyper_itc_d)
    2.37 +#endif
    2.38 +	adds r23=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18 ;;
    2.39 +	ld8 r23=[r23];;
    2.40 +	extr.u r24=r23,2,6;;		// r24==logps
    2.41 +	cmp.gt p7,p0=PAGE_SHIFT,r24
    2.42 +(p7)	br.spnt.many dispatch_break_fault ;;
    2.43 +	// translate_domain_pte(r8=pteval,PSCB(ifa)=address,r24=itir)
    2.44 +	mov r19=1;;
    2.45 +	shl r20=r19,r24;;
    2.46 +	adds r20=-1,r20;;	// r20 == mask
    2.47 +	movl r19=_PAGE_PPN_MASK;;
    2.48 +	and r22=r8,r19;;	// r22 == pteval & _PAGE_PPN_MASK
    2.49 +	andcm r19=r22,r20;;
    2.50 +	adds r21=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;;
    2.51 +	ld8 r21=[r21];;
    2.52 +	and r20=r21,r20;;
    2.53 +	or r19=r19,r20;;	// r19 == mpaddr
    2.54 +	movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
    2.55 +	ld8 r27=[r27];;
    2.56 +	adds r27=IA64_VCPU_DOMAIN_OFFSET,r27;;
    2.57 +	ld8 r27=[r27];;
    2.58 +// FIXME: is the global var dom0 always pinned? assume so for now
    2.59 +	movl r28=dom0;;
    2.60 +	ld8 r28=[r28];;
    2.61 +// FIXME: for now, only handle dom0 (see lookup_domain_mpa below)
    2.62 +	cmp.ne p7,p0=r27,r28
    2.63 +(p7)	br.spnt.many dispatch_break_fault ;;
    2.64 +	// if region 6, go slow way
    2.65 +#ifdef FAST_HYPERPRIVOP_CNT
    2.66 +	cmp.eq p6,p7=XEN_HYPER_ITC_D,r17;;
    2.67 +(p6)	movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_ITC_D);;
    2.68 +(p7)	movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_ITC_I);;
    2.69 +	ld8 r21=[r20];;
    2.70 +	adds r21=1,r21;;
    2.71 +	st8 [r20]=r21;;
    2.72 +#endif
    2.73 +// FIXME: for now, just do domain0 and skip mpaddr range checks
    2.74 +	dep r20=r0,r19,0,PAGE_SHIFT
    2.75 +	movl r21=PAGE_PHYS ;;
    2.76 +	or r20=r20,r21 ;;	// r20==return value from lookup_domain_mpa
    2.77 +	// r8=pteval,r20=pteval2
    2.78 +	movl r19=_PAGE_PPN_MASK
    2.79 +	movl r21=_PAGE_PL_2;;
    2.80 +	andcm r25=r8,r19;;	// r25==pteval & ~_PAGE_PPN_MASK
    2.81 +	and r22=r20,r19;;
    2.82 +	or r22=r22,r21;;
    2.83 +	or r22=r22,r25;;	// r22==return value from translate_domain_pte
    2.84 +	// done with translate_domain_pte
    2.85 +	// now do vcpu_itc_no_srlz(vcpu,IorD,ifa,r22=pte,r8=mppte,r24=logps)
    2.86 +// FIXME: for now, just domain0 and skip range check
    2.87 +	// psr.ic already cleared
    2.88 +	// NOTE: r24 still contains ps (from above)
    2.89 +	shladd r24=r24,2,r0;;
    2.90 +	mov cr.itir=r24;;
    2.91 +	adds r23=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;;
    2.92 +	ld8 r23=[r23];;
    2.93 +	mov cr.ifa=r23;;
    2.94 +	cmp.eq p6,p7=XEN_HYPER_ITC_D,r17;;
    2.95 +(p6)	itc.d r22;;
    2.96 +(p7)	itc.i r22;;
    2.97 +	dv_serialize_data
    2.98 +	// FIXME: how do I make assembler warnings go away here?
    2.99 +	// vhpt_insert(r23=vaddr,r22=pte,r24=logps<<2)
   2.100 +	thash r28=r23
   2.101 +	or r26=1,r22;;
   2.102 +	ttag r21=r23
   2.103 +	adds r25=8,r28
   2.104 +	mov r19=r28;;
   2.105 +	st8 [r25]=r24
   2.106 +	adds r20=16,r28;;
   2.107 +	st8 [r19]=r26
   2.108 +	st8 [r20]=r21;;
   2.109 +	// vcpu_set_tr_entry(trp,r22=pte|1,r24=itir,r23=ifa)
   2.110 +	// TR_ENTRY = {page_flags,itir,addr,rid}
   2.111 +	cmp.eq p6,p7=XEN_HYPER_ITC_D,r17
   2.112 +	movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
   2.113 +	ld8 r27=[r27];;
   2.114 +	adds r28=IA64_VCPU_STARTING_RID_OFFSET,r27
   2.115 +(p6)	adds r27=IA64_VCPU_DTLB_OFFSET,r27
   2.116 +(p7)	adds r27=IA64_VCPU_ITLB_OFFSET,r27;;
   2.117 +	st8 [r27]=r22,8;;	// page_flags: already has pl >= 2 and p==1
   2.118 +	st8 [r27]=r24,8;;	// itir
   2.119 +	mov r19=-4096;;
   2.120 +	and r23=r23,r19;;
   2.121 +	st8 [r27]=r23,8;;	// ifa & ~0xfff
   2.122 +// ?? is virtualize_rid(v,get_rr(ifa))==vcpu_get_rr(ifa)?? YES!!
   2.123 +	adds r29 = XSI_RR0_OFS-XSI_PSR_IC_OFS,r18
   2.124 +	extr.u r25=r23,61,3;;
   2.125 +	shladd r29=r25,3,r29;;
   2.126 +	ld8 r29=[r29];;
   2.127 +	movl r20=0xffffff00;;
   2.128 +	and r29=r29,r20;;
   2.129 +	st8 [r27]=r29,-8;;		// rid
   2.130 +	//if ps > 12
   2.131 +	cmp.eq p7,p0=12<<2,r24
   2.132 +(p7)	br.cond.sptk.many 1f;;
   2.133 +	// if (ps > 12) {
   2.134 +	// trp->ppn &= ~((1UL<<(ps-12))-1); trp->vadr &= ~((1UL<<ps)-1); }
   2.135 +	extr.u r29=r24,2,6
   2.136 +	mov r28=1;;
   2.137 +	shl r26=r28,r29;;
   2.138 +	adds r29=-12,r29;;
   2.139 +	shl r25=r28,r29;;
   2.140 +	mov r29=-1
   2.141 +	adds r26=-1,r26
   2.142 +	adds r25=-1,r25;;
   2.143 +	andcm r26=r29,r26	// ~((1UL<<ps)-1)
   2.144 +	andcm r25=r29,r25;;	// ~((1UL<<(ps-12))-1)
   2.145 +	ld8 r29=[r27];;
   2.146 +	and r29=r29,r26;;
   2.147 +	st8 [r27]=r29,-16;;
   2.148 +	ld8 r29=[r27];;
   2.149 +	extr.u r28=r29,12,38;;
   2.150 +	movl r26=0xfffc000000000fff;;
   2.151 +	and r29=r29,r26
   2.152 +	and r28=r28,r25;;
   2.153 +	shl r28=r28,12;;
   2.154 +	or r29=r29,r28;;
   2.155 +	st8 [r27]=r29;;
   2.156 +1:	// done with vcpu_set_tr_entry
   2.157 +	//PSCBX(vcpu,i/dtlb_pte) = mp_pte
   2.158 +	movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
   2.159 +	ld8 r27=[r27];;
   2.160 +	cmp.eq p6,p7=XEN_HYPER_ITC_D,r17;;
   2.161 +(p6)	adds r27=IA64_VCPU_DTLB_PTE_OFFSET,r27
   2.162 +(p7)	adds r27=IA64_VCPU_ITLB_PTE_OFFSET,r27;;
   2.163 +	st8 [r27]=r8;;
   2.164 +	// done with vcpu_itc_no_srlz
   2.165  
   2.166 -ENTRY(hyper_itc_i)
   2.167 -	br.spnt.many dispatch_break_fault ;;
   2.168 -END(hyper_itc_i)
   2.169 +	// done, increment to point to next instruction
   2.170 +	mov r29=cr.ipsr
   2.171 +	mov r30=cr.iip;;
   2.172 +	extr.u r26=r29,41,2 ;;
   2.173 +	cmp.eq p6,p7=2,r26 ;;
   2.174 +(p6)	mov r26=0
   2.175 +(p6)	adds r30=16,r30
   2.176 +(p7)	adds r26=1,r26
   2.177 +	;;
   2.178 +	dep r29=r26,r29,41,2
   2.179 +	;;
   2.180 +	mov cr.ipsr=r29
   2.181 +	mov cr.iip=r30
   2.182 +	mov pr=r31,-1 ;;
   2.183 +	rfi
   2.184 +	;;
   2.185 +END(hyper_itc_d)