ia64/xen-unstable

changeset 9993:ea181d857712

[IA64] update xenivt.S and xenentry.S

Update xenentry.S and xenivt.S for linux 2.6.16.13.

Signed-off-by: Tristan Gingold <tristan.gingold@bull.net>
author awilliam@xenbuild.aw
date Wed May 10 17:07:06 2006 -0600 (2006-05-10)
parents 00141f6d15e0
children 08bc4e5ac625
files linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S
line diff
     1.1 --- a/linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S	Wed May 10 15:58:36 2006 -0600
     1.2 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S	Wed May 10 17:07:06 2006 -0600
     1.3 @@ -83,11 +83,7 @@ GLOBAL_ENTRY(ia64_switch_to)
     1.4  	mov r8=1
     1.5  	;;
     1.6  	st4 [r27]=r8			// psr.ic back on
     1.7 -	;;
     1.8  #else
     1.9 -(p6)	ssm psr.ic			// if we had to map, reenable the psr.ic bit FIRST!!!
    1.10 -	;;
    1.11 -(p6)	srlz.d
    1.12  	ld8 sp=[r21]			// load kernel stack pointer of new task
    1.13  	mov IA64_KR(CURRENT)=in0	// update "current" application register
    1.14  #endif
    1.15 @@ -136,6 +132,11 @@ GLOBAL_ENTRY(ia64_switch_to)
    1.16  #endif
    1.17  	;;
    1.18  	itr.d dtr[r25]=r23		// wire in new mapping...
    1.19 +#ifndef CONFIG_XEN
    1.20 +	ssm psr.ic			// reenable the psr.ic bit
    1.21 +	;;
    1.22 +	srlz.d
    1.23 +#endif
    1.24  	br.cond.sptk .done
    1.25  #ifdef CONFIG_XEN
    1.26  END(xen_switch_to)
    1.27 @@ -216,7 +217,9 @@ GLOBAL_ENTRY(ia64_trace_syscall)
    1.28  .mem.offset 0,0; st8.spill [r2]=r8		// store return value in slot for r8
    1.29  .mem.offset 8,0; st8.spill [r3]=r10		// clear error indication in slot for r10
    1.30  	br.call.sptk.many rp=syscall_trace_leave // give parent a chance to catch return value
    1.31 -.ret3:	br.cond.sptk .work_pending_syscall_end
    1.32 +.ret3:
    1.33 +(pUStk)	cmp.eq.unc p6,p0=r0,r0			// p6 <- pUStk
    1.34 +	br.cond.sptk .work_pending_syscall_end
    1.35  
    1.36  strace_error:
    1.37  	ld8 r3=[r2]				// load pt_regs.r8
    1.38 @@ -246,7 +249,7 @@ END(ia64_trace_syscall)
    1.39   *	      r8-r11: restored (syscall return value(s))
    1.40   *		 r12: restored (user-level stack pointer)
    1.41   *		 r13: restored (user-level thread pointer)
    1.42 - *		 r14: cleared
    1.43 + *		 r14: set to __kernel_syscall_via_epc
    1.44   *		 r15: restored (syscall #)
    1.45   *	     r16-r17: cleared
    1.46   *		 r18: user-level b6
    1.47 @@ -267,7 +270,7 @@ END(ia64_trace_syscall)
    1.48   *		  pr: restored (user-level pr)
    1.49   *		  b0: restored (user-level rp)
    1.50   *	          b6: restored
    1.51 - *		  b7: cleared
    1.52 + *		  b7: set to __kernel_syscall_via_epc
    1.53   *	     ar.unat: restored (user-level ar.unat)
    1.54   *	      ar.pfs: restored (user-level ar.pfs)
    1.55   *	      ar.rsc: restored (user-level ar.rsc)
    1.56 @@ -331,20 +334,20 @@ ENTRY(ia64_leave_syscall)
    1.57  	;;
    1.58  (p6)	ld4 r31=[r18]				// load current_thread_info()->flags
    1.59  	ld8 r19=[r2],PT(B6)-PT(LOADRS)		// load ar.rsc value for "loadrs"
    1.60 -	mov b7=r0		// clear b7
    1.61 +	nop.i 0
    1.62  	;;
    1.63 -	ld8 r23=[r3],PT(R11)-PT(AR_BSPSTORE)	// load ar.bspstore (may be garbage)
    1.64 +	mov r16=ar.bsp				// M2  get existing backing store pointer
    1.65  	ld8 r18=[r2],PT(R9)-PT(B6)		// load b6
    1.66  (p6)	and r15=TIF_WORK_MASK,r31		// any work other than TIF_SYSCALL_TRACE?
    1.67  	;;
    1.68 -	mov r16=ar.bsp				// M2  get existing backing store pointer
    1.69 +	ld8 r23=[r3],PT(R11)-PT(AR_BSPSTORE)	// load ar.bspstore (may be garbage)
    1.70  (p6)	cmp4.ne.unc p6,p0=r15, r0		// any special work pending?
    1.71  (p6)	br.cond.spnt .work_pending_syscall
    1.72  	;;
    1.73  	// start restoring the state saved on the kernel stack (struct pt_regs):
    1.74  	ld8 r9=[r2],PT(CR_IPSR)-PT(R9)
    1.75  	ld8 r11=[r3],PT(CR_IIP)-PT(R11)
    1.76 -	mov f6=f0		// clear f6
    1.77 +(pNonSys) break 0		//      bug check: we shouldn't be here if pNonSys is TRUE!
    1.78  	;;
    1.79  	invala			// M0|1 invalidate ALAT
    1.80  #ifdef CONFIG_XEN
    1.81 @@ -358,57 +361,68 @@ ENTRY(ia64_leave_syscall)
    1.82  	st4	[r29]=r0	// note: clears both vpsr.i and vpsr.ic!
    1.83  	;;
    1.84  #else
    1.85 -	rsm psr.i | psr.ic	// M2 initiate turning off of interrupt and interruption collection
    1.86 +	rsm psr.i | psr.ic	// M2   turn off interrupts and interruption collection
    1.87  #endif
    1.88 -	mov f9=f0		// clear f9
    1.89 +	cmp.eq p9,p0=r0,r0	// A    set p9 to indicate that we should restore cr.ifs
    1.90  
    1.91 -	ld8 r29=[r2],16		// load cr.ipsr
    1.92 -	ld8 r28=[r3],16			// load cr.iip
    1.93 -	mov f8=f0		// clear f8
    1.94 +	ld8 r29=[r2],16		// M0|1 load cr.ipsr
    1.95 +	ld8 r28=[r3],16		// M0|1 load cr.iip
    1.96 +	mov r22=r0		// A    clear r22
    1.97  	;;
    1.98  	ld8 r30=[r2],16		// M0|1 load cr.ifs
    1.99 -	mov.m ar.ssd=r0		// M2 clear ar.ssd
   1.100 -	cmp.eq p9,p0=r0,r0	// set p9 to indicate that we should restore cr.ifs
   1.101 -	;;
   1.102  	ld8 r25=[r3],16		// M0|1 load ar.unat
   1.103 -	mov.m ar.csd=r0		// M2 clear ar.csd
   1.104 -	mov r22=r0		// clear r22
   1.105 +(pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
   1.106  	;;
   1.107  	ld8 r26=[r2],PT(B0)-PT(AR_PFS)	// M0|1 load ar.pfs
   1.108 -(pKStk)	mov r22=psr		// M2 read PSR now that interrupts are disabled
   1.109 -	mov f10=f0		// clear f10
   1.110 +(pKStk)	mov r22=psr			// M2   read PSR now that interrupts are disabled
   1.111 +	nop 0
   1.112  	;;
   1.113 -	ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // load b0
   1.114 -	ld8 r27=[r3],PT(PR)-PT(AR_RSC)	// load ar.rsc
   1.115 -	mov f11=f0		// clear f11
   1.116 +	ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // M0|1 load b0
   1.117 +	ld8 r27=[r3],PT(PR)-PT(AR_RSC)	// M0|1 load ar.rsc
   1.118 +	mov f6=f0			// F    clear f6
   1.119  	;;
   1.120 -	ld8 r24=[r2],PT(AR_FPSR)-PT(AR_RNAT)	// load ar.rnat (may be garbage)
   1.121 -	ld8 r31=[r3],PT(R1)-PT(PR)		// load predicates
   1.122 -(pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
   1.123 +	ld8 r24=[r2],PT(AR_FPSR)-PT(AR_RNAT)	// M0|1 load ar.rnat (may be garbage)
   1.124 +	ld8 r31=[r3],PT(R1)-PT(PR)		// M0|1 load predicates
   1.125 +	mov f7=f0				// F    clear f7
   1.126  	;;
   1.127 -	ld8 r20=[r2],PT(R12)-PT(AR_FPSR)	// load ar.fpsr
   1.128 -	ld8.fill r1=[r3],16	// load r1
   1.129 -(pUStk) mov r17=1
   1.130 +	ld8 r20=[r2],PT(R12)-PT(AR_FPSR)	// M0|1 load ar.fpsr
   1.131 +	ld8.fill r1=[r3],16			// M0|1 load r1
   1.132 +(pUStk) mov r17=1				// A
   1.133 +	;;
   1.134 +(pUStk) st1 [r14]=r17				// M2|3
   1.135 +	ld8.fill r13=[r3],16			// M0|1
   1.136 +	mov f8=f0				// F    clear f8
   1.137  	;;
   1.138 -	srlz.d			// M0  ensure interruption collection is off
   1.139 -	ld8.fill r13=[r3],16
   1.140 -	mov f7=f0		// clear f7
   1.141 -	;;
   1.142 -	ld8.fill r12=[r2]	// restore r12 (sp)
   1.143 -	ld8.fill r15=[r3]	// restore r15
   1.144 -	addl r3=THIS_CPU(ia64_phys_stacked_size_p8),r0
   1.145 +	ld8.fill r12=[r2]			// M0|1 restore r12 (sp)
   1.146 +	ld8.fill r15=[r3]			// M0|1 restore r15
   1.147 +	mov b6=r18				// I0   restore b6
   1.148 +
   1.149 +	addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0 // A
   1.150 +	mov f9=f0					// F    clear f9
   1.151 +(pKStk) br.cond.dpnt.many skip_rbs_switch		// B
   1.152 +
   1.153 +	srlz.d				// M0   ensure interruption collection is off (for cover)
   1.154 +	shr.u r18=r19,16		// I0|1 get byte size of existing "dirty" partition
   1.155 +#ifdef CONFIG_XEN
   1.156 +	XEN_HYPER_COVER;
   1.157 +#else
   1.158 +	cover				// B    add current frame into dirty partition & set cr.ifs
   1.159 +#endif
   1.160  	;;
   1.161 -(pUStk)	ld4 r3=[r3]		// r3 = cpu_data->phys_stacked_size_p8
   1.162 -(pUStk) st1 [r14]=r17
   1.163 -	mov b6=r18		// I0  restore b6
   1.164 +(pUStk) ld4 r17=[r17]			// M0|1 r17 = cpu_data->phys_stacked_size_p8
   1.165 +	mov r19=ar.bsp			// M2   get new backing store pointer
   1.166 +	mov f10=f0			// F    clear f10
   1.167 +
   1.168 +	nop.m 0
   1.169 +	movl r14=__kernel_syscall_via_epc // X
   1.170  	;;
   1.171 -	mov r14=r0		// clear r14
   1.172 -	shr.u r18=r19,16	// I0|1 get byte size of existing "dirty" partition
   1.173 -(pKStk) br.cond.dpnt.many skip_rbs_switch
   1.174 +	mov.m ar.csd=r0			// M2   clear ar.csd
   1.175 +	mov.m ar.ccv=r0			// M2   clear ar.ccv
   1.176 +	mov b7=r14			// I0   clear b7 (hint with __kernel_syscall_via_epc)
   1.177  
   1.178 -	mov.m ar.ccv=r0		// clear ar.ccv
   1.179 -(pNonSys) br.cond.dpnt.many dont_preserve_current_frame
   1.180 -	br.cond.sptk.many rbs_switch
   1.181 +	mov.m ar.ssd=r0			// M2   clear ar.ssd
   1.182 +	mov f11=f0			// F    clear f11
   1.183 +	br.cond.sptk.many rbs_switch	// B
   1.184  #ifdef CONFIG_XEN
   1.185  END(xen_leave_syscall)
   1.186  #else
   1.187 @@ -546,7 +560,7 @@ GLOBAL_ENTRY(ia64_leave_kernel)
   1.188  	ldf.fill f7=[r2],PT(F11)-PT(F7)
   1.189  	ldf.fill f8=[r3],32
   1.190  	;;
   1.191 -	srlz.i			// ensure interruption collection is off
   1.192 +	srlz.d	// ensure that inter. collection is off (VHPT is don't care, since text is pinned)
   1.193  	mov ar.ccv=r15
   1.194  	;;
   1.195  	ldf.fill f11=[r2]
   1.196 @@ -556,29 +570,29 @@ GLOBAL_ENTRY(ia64_leave_kernel)
   1.197  	movl r2=XSI_BANK1_R16
   1.198  	movl r3=XSI_BANK1_R16+8
   1.199  	;;
   1.200 -	st8.spill [r2]=r16,16
   1.201 -	st8.spill [r3]=r17,16
   1.202 +.mem.offset 0,0; st8.spill [r2]=r16,16
   1.203 +.mem.offset 8,0; st8.spill [r3]=r17,16
   1.204  	;;
   1.205 -	st8.spill [r2]=r18,16
   1.206 -	st8.spill [r3]=r19,16
   1.207 +.mem.offset 0,0; st8.spill [r2]=r18,16
   1.208 +.mem.offset 8,0; st8.spill [r3]=r19,16
   1.209  	;;
   1.210 -	st8.spill [r2]=r20,16
   1.211 -	st8.spill [r3]=r21,16
   1.212 +.mem.offset 0,0; st8.spill [r2]=r20,16
   1.213 +.mem.offset 8,0; st8.spill [r3]=r21,16
   1.214  	;;
   1.215 -	st8.spill [r2]=r22,16
   1.216 -	st8.spill [r3]=r23,16
   1.217 +.mem.offset 0,0; st8.spill [r2]=r22,16
   1.218 +.mem.offset 8,0; st8.spill [r3]=r23,16
   1.219  	;;
   1.220 -	st8.spill [r2]=r24,16
   1.221 -	st8.spill [r3]=r25,16
   1.222 +.mem.offset 0,0; st8.spill [r2]=r24,16
   1.223 +.mem.offset 8,0; st8.spill [r3]=r25,16
   1.224  	;;
   1.225 -	st8.spill [r2]=r26,16
   1.226 -	st8.spill [r3]=r27,16
   1.227 +.mem.offset 0,0; st8.spill [r2]=r26,16
   1.228 +.mem.offset 8,0; st8.spill [r3]=r27,16
   1.229  	;;
   1.230 -	st8.spill [r2]=r28,16
   1.231 -	st8.spill [r3]=r29,16
   1.232 +.mem.offset 0,0; st8.spill [r2]=r28,16
   1.233 +.mem.offset 8,0; st8.spill [r3]=r29,16
   1.234  	;;
   1.235 -	st8.spill [r2]=r30,16
   1.236 -	st8.spill [r3]=r31,16
   1.237 +.mem.offset 0,0; st8.spill [r2]=r30,16
   1.238 +.mem.offset 8,0; st8.spill [r3]=r31,16
   1.239  	;;
   1.240  	movl r2=XSI_BANKNUM;;
   1.241  	st4 [r2]=r0;
   1.242 @@ -641,7 +655,6 @@ GLOBAL_ENTRY(ia64_leave_kernel)
   1.243  	 */
   1.244  (pNonSys) br.cond.dpnt dont_preserve_current_frame
   1.245  
   1.246 -rbs_switch:
   1.247  #ifdef CONFIG_XEN
   1.248  	XEN_HYPER_COVER;
   1.249  #else
   1.250 @@ -649,6 +662,7 @@ rbs_switch:
   1.251  #endif
   1.252  	;;
   1.253  	mov r19=ar.bsp			// get new backing store pointer
   1.254 +rbs_switch:
   1.255  	sub r16=r16,r18			// krbs = old bsp - size of dirty partition
   1.256  	cmp.ne p9,p0=r0,r0		// clear p9 to skip restore of cr.ifs
   1.257  	;;
   1.258 @@ -723,14 +737,14 @@ rse_clear_invalid:
   1.259  	mov loc5=0
   1.260  	mov loc6=0
   1.261  	mov loc7=0
   1.262 -(pRecurse) br.call.sptk.few b0=rse_clear_invalid
   1.263 +(pRecurse) br.call.dptk.few b0=rse_clear_invalid
   1.264  	;;
   1.265  	mov loc8=0
   1.266  	mov loc9=0
   1.267  	cmp.ne pReturn,p0=r0,in1	// if recursion count != 0, we need to do a br.ret
   1.268  	mov loc10=0
   1.269  	mov loc11=0
   1.270 -(pReturn) br.ret.sptk.many b0
   1.271 +(pReturn) br.ret.dptk.many b0
   1.272  #endif /* !CONFIG_ITANIUM */
   1.273  #	undef pRecurse
   1.274  #	undef pReturn
     2.1 --- a/linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S	Wed May 10 15:58:36 2006 -0600
     2.2 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S	Wed May 10 17:07:06 2006 -0600
     2.3 @@ -87,16 +87,17 @@ ENTRY(vhpt_miss)
     2.4  	 * (the "original") TLB miss, which may either be caused by an instruction
     2.5  	 * fetch or a data access (or non-access).
     2.6  	 *
     2.7 -	 * What we do here is normal TLB miss handing for the _original_ miss, followed
     2.8 -	 * by inserting the TLB entry for the virtual page table page that the VHPT
     2.9 -	 * walker was attempting to access.  The latter gets inserted as long
    2.10 -	 * as both L1 and L2 have valid mappings for the faulting address.
    2.11 -	 * The TLB entry for the original miss gets inserted only if
    2.12 -	 * the L3 entry indicates that the page is present.
    2.13 +	 * What we do here is normal TLB miss handing for the _original_ miss,
    2.14 +	 * followed by inserting the TLB entry for the virtual page table page
    2.15 +	 * that the VHPT walker was attempting to access.  The latter gets
    2.16 +	 * inserted as long as page table entry above pte level have valid
    2.17 +	 * mappings for the faulting address.  The TLB entry for the original
    2.18 +	 * miss gets inserted only if the pte entry indicates that the page is
    2.19 +	 * present.
    2.20  	 *
    2.21  	 * do_page_fault gets invoked in the following cases:
    2.22  	 *	- the faulting virtual address uses unimplemented address bits
    2.23 -	 *	- the faulting virtual address has no L1, L2, or L3 mapping
    2.24 +	 *	- the faulting virtual address has no valid page table mapping
    2.25  	 */
    2.26  #ifdef CONFIG_XEN
    2.27  	movl r16=XSI_IFA
    2.28 @@ -127,7 +128,7 @@ ENTRY(vhpt_miss)
    2.29  	shl r21=r16,3				// shift bit 60 into sign bit
    2.30  	shr.u r17=r16,61			// get the region number into r17
    2.31  	;;
    2.32 -	shr r22=r21,3
    2.33 +	shr.u r22=r21,3
    2.34  #ifdef CONFIG_HUGETLB_PAGE
    2.35  	extr.u r26=r25,2,6
    2.36  	;;
    2.37 @@ -139,7 +140,7 @@ ENTRY(vhpt_miss)
    2.38  #endif
    2.39  	;;
    2.40  	cmp.eq p6,p7=5,r17			// is IFA pointing into to region 5?
    2.41 -	shr.u r18=r22,PGDIR_SHIFT		// get bits 33-63 of the faulting address
    2.42 +	shr.u r18=r22,PGDIR_SHIFT		// get bottom portion of pgd index bit
    2.43  	;;
    2.44  (p7)	dep r17=r17,r19,(PAGE_SHIFT-3),3	// put region number bits in place
    2.45  
    2.46 @@ -150,41 +151,54 @@ ENTRY(vhpt_miss)
    2.47  (p6)	shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT
    2.48  (p7)	shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3
    2.49  	;;
    2.50 -(p6)	dep r17=r18,r19,3,(PAGE_SHIFT-3)	// r17=PTA + IFA(33,42)*8
    2.51 -(p7)	dep r17=r18,r17,3,(PAGE_SHIFT-6)	// r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8)
    2.52 +(p6)	dep r17=r18,r19,3,(PAGE_SHIFT-3)	// r17=pgd_offset for region 5
    2.53 +(p7)	dep r17=r18,r17,3,(PAGE_SHIFT-6)	// r17=pgd_offset for region[0-4]
    2.54  	cmp.eq p7,p6=0,r21			// unused address bits all zeroes?
    2.55 -	shr.u r18=r22,PMD_SHIFT			// shift L2 index into position
    2.56 +#ifdef CONFIG_PGTABLE_4
    2.57 +	shr.u r28=r22,PUD_SHIFT			// shift pud index into position
    2.58 +#else
    2.59 +	shr.u r18=r22,PMD_SHIFT			// shift pmd index into position
    2.60 +#endif
    2.61  	;;
    2.62 -	ld8 r17=[r17]				// fetch the L1 entry (may be 0)
    2.63 +	ld8 r17=[r17]				// get *pgd (may be 0)
    2.64 +	;;
    2.65 +(p7)	cmp.eq p6,p7=r17,r0			// was pgd_present(*pgd) == NULL?
    2.66 +#ifdef CONFIG_PGTABLE_4
    2.67 +	dep r28=r28,r17,3,(PAGE_SHIFT-3)	// r28=pud_offset(pgd,addr)
    2.68  	;;
    2.69 -(p7)	cmp.eq p6,p7=r17,r0			// was L1 entry NULL?
    2.70 -	dep r17=r18,r17,3,(PAGE_SHIFT-3)	// compute address of L2 page table entry
    2.71 +	shr.u r18=r22,PMD_SHIFT			// shift pmd index into position
    2.72 +(p7)	ld8 r29=[r28]				// get *pud (may be 0)
    2.73  	;;
    2.74 -(p7)	ld8 r20=[r17]				// fetch the L2 entry (may be 0)
    2.75 -	shr.u r19=r22,PAGE_SHIFT		// shift L3 index into position
    2.76 +(p7)	cmp.eq.or.andcm p6,p7=r29,r0		// was pud_present(*pud) == NULL?
    2.77 +	dep r17=r18,r29,3,(PAGE_SHIFT-3)	// r17=pmd_offset(pud,addr)
    2.78 +#else
    2.79 +	dep r17=r18,r17,3,(PAGE_SHIFT-3)	// r17=pmd_offset(pgd,addr)
    2.80 +#endif
    2.81  	;;
    2.82 -(p7)	cmp.eq.or.andcm p6,p7=r20,r0		// was L2 entry NULL?
    2.83 -	dep r21=r19,r20,3,(PAGE_SHIFT-3)	// compute address of L3 page table entry
    2.84 +(p7)	ld8 r20=[r17]				// get *pmd (may be 0)
    2.85 +	shr.u r19=r22,PAGE_SHIFT		// shift pte index into position
    2.86  	;;
    2.87 +(p7)	cmp.eq.or.andcm p6,p7=r20,r0		// was pmd_present(*pmd) == NULL?
    2.88 +	dep r21=r19,r20,3,(PAGE_SHIFT-3)	// r21=pte_offset(pmd,addr)
    2.89 +	;;
    2.90 +(p7)	ld8 r18=[r21]				// read *pte
    2.91  #ifdef CONFIG_XEN
    2.92 -(p7)	ld8 r18=[r21]				// read the L3 PTE
    2.93  	movl r19=XSI_ISR
    2.94  	;;
    2.95  	ld8 r19=[r19]
    2.96 +#else
    2.97 +	mov r19=cr.isr				// cr.isr bit 32 tells us if this is an insn miss
    2.98 +#endif
    2.99  	;;
   2.100  (p7)	tbit.z p6,p7=r18,_PAGE_P_BIT		// page present bit cleared?
   2.101 +#ifdef CONFIG_XEN
   2.102  	movl r22=XSI_IHA
   2.103  	;;
   2.104  	ld8 r22=[r22]
   2.105 -	;;
   2.106  #else
   2.107 -(p7)	ld8 r18=[r21]				// read the L3 PTE
   2.108 -	mov r19=cr.isr				// cr.isr bit 0 tells us if this is an insn miss
   2.109 -	;;
   2.110 -(p7)	tbit.z p6,p7=r18,_PAGE_P_BIT		// page present bit cleared?
   2.111  	mov r22=cr.iha				// get the VHPT address that caused the TLB miss
   2.112 +#endif
   2.113  	;;					// avoid RAW on p7
   2.114 -#endif
   2.115  (p7)	tbit.nz.unc p10,p11=r19,32		// is it an instruction TLB miss?
   2.116  	dep r23=0,r20,0,PAGE_SHIFT		// clear low bits to get page address
   2.117  	;;
   2.118 @@ -198,16 +212,17 @@ ENTRY(vhpt_miss)
   2.119  	;;
   2.120  	mov r8=r24
   2.121  	;;
   2.122 +#else
   2.123 +(p10)	itc.i r18				// insert the instruction TLB entry
   2.124 +(p11)	itc.d r18				// insert the data TLB entry
   2.125 +#endif
   2.126  (p6)	br.cond.spnt.many page_fault		// handle bad address/page not present (page fault)
   2.127 -	;;
   2.128 +#ifdef CONFIG_XEN
   2.129  	movl r24=XSI_IFA
   2.130  	;;
   2.131  	st8 [r24]=r22
   2.132  	;;
   2.133  #else
   2.134 -(p10)	itc.i r18				// insert the instruction TLB entry
   2.135 -(p11)	itc.d r18				// insert the data TLB entry
   2.136 -(p6)	br.cond.spnt.many page_fault		// handle bad address/page not present (page fault)
   2.137  	mov cr.ifa=r22
   2.138  #endif
   2.139  
   2.140 @@ -242,25 +257,41 @@ ENTRY(vhpt_miss)
   2.141  	dv_serialize_data
   2.142  
   2.143  	/*
   2.144 -	 * Re-check L2 and L3 pagetable.  If they changed, we may have received a ptc.g
   2.145 +	 * Re-check pagetable entry.  If they changed, we may have received a ptc.g
   2.146  	 * between reading the pagetable and the "itc".  If so, flush the entry we
   2.147 -	 * inserted and retry.
   2.148 +	 * inserted and retry.  At this point, we have:
   2.149 +	 *
   2.150 +	 * r28 = equivalent of pud_offset(pgd, ifa)
   2.151 +	 * r17 = equivalent of pmd_offset(pud, ifa)
   2.152 +	 * r21 = equivalent of pte_offset(pmd, ifa)
   2.153 +	 *
   2.154 +	 * r29 = *pud
   2.155 +	 * r20 = *pmd
   2.156 +	 * r18 = *pte
   2.157  	 */
   2.158 -	ld8 r25=[r21]				// read L3 PTE again
   2.159 -	ld8 r26=[r17]				// read L2 entry again
   2.160 +	ld8 r25=[r21]				// read *pte again
   2.161 +	ld8 r26=[r17]				// read *pmd again
   2.162 +#ifdef CONFIG_PGTABLE_4
   2.163 +	ld8 r19=[r28]				// read *pud again
   2.164 +#endif
   2.165 +	cmp.ne p6,p7=r0,r0
   2.166  	;;
   2.167 -	cmp.ne p6,p7=r26,r20			// did L2 entry change
   2.168 +	cmp.ne.or.andcm p6,p7=r26,r20		// did *pmd change
   2.169 +#ifdef CONFIG_PGTABLE_4
   2.170 +	cmp.ne.or.andcm p6,p7=r19,r29		// did *pud change
   2.171 +#endif
   2.172  	mov r27=PAGE_SHIFT<<2
   2.173  	;;
   2.174  (p6)	ptc.l r22,r27				// purge PTE page translation
   2.175 -(p7)	cmp.ne.or.andcm p6,p7=r25,r18		// did L3 PTE change
   2.176 +(p7)	cmp.ne.or.andcm p6,p7=r25,r18		// did *pte change
   2.177  	;;
   2.178  (p6)	ptc.l r16,r27				// purge translation
   2.179  #endif
   2.180  
   2.181  	mov pr=r31,-1				// restore predicate registers
   2.182  #ifdef CONFIG_XEN
   2.183 -	XEN_HYPER_RFI;
   2.184 +	XEN_HYPER_RFI
   2.185 +	dv_serialize_data
   2.186  #else
   2.187  	rfi
   2.188  #endif
   2.189 @@ -272,10 +303,10 @@ END(vhpt_miss)
   2.190  ENTRY(itlb_miss)
   2.191  	DBG_FAULT(1)
   2.192  	/*
   2.193 -	 * The ITLB handler accesses the L3 PTE via the virtually mapped linear
   2.194 +	 * The ITLB handler accesses the PTE via the virtually mapped linear
   2.195  	 * page table.  If a nested TLB miss occurs, we switch into physical
   2.196 -	 * mode, walk the page table, and then re-execute the L3 PTE read
   2.197 -	 * and go on normally after that.
   2.198 +	 * mode, walk the page table, and then re-execute the PTE read and
   2.199 +	 * go on normally after that.
   2.200  	 */
   2.201  #ifdef CONFIG_XEN
   2.202  	movl r16=XSI_IFA
   2.203 @@ -292,11 +323,11 @@ ENTRY(itlb_miss)
   2.204  	;;
   2.205  	ld8 r17=[r17]				// get virtual address of L3 PTE
   2.206  #else
   2.207 -	mov r17=cr.iha				// get virtual address of L3 PTE
   2.208 +	mov r17=cr.iha				// get virtual address of PTE
   2.209  #endif
   2.210  	movl r30=1f				// load nested fault continuation point
   2.211  	;;
   2.212 -1:	ld8 r18=[r17]				// read L3 PTE
   2.213 +1:	ld8 r18=[r17]				// read *pte
   2.214  	;;
   2.215  	mov b0=r29
   2.216  	tbit.z p6,p0=r18,_PAGE_P_BIT		// page present bit cleared?
   2.217 @@ -320,7 +351,7 @@ 1:	ld8 r18=[r17]				// read L3 PTE
   2.218  	 */
   2.219  	dv_serialize_data
   2.220  
   2.221 -	ld8 r19=[r17]				// read L3 PTE again and see if same
   2.222 +	ld8 r19=[r17]				// read *pte again and see if same
   2.223  	mov r20=PAGE_SHIFT<<2			// setup page size for purge
   2.224  	;;
   2.225  	cmp.ne p7,p0=r18,r19
   2.226 @@ -329,7 +360,8 @@ 1:	ld8 r18=[r17]				// read L3 PTE
   2.227  #endif
   2.228  	mov pr=r31,-1
   2.229  #ifdef CONFIG_XEN
   2.230 -	XEN_HYPER_RFI;
   2.231 +	XEN_HYPER_RFI
   2.232 +	dv_serialize_data
   2.233  #else
   2.234  	rfi
   2.235  #endif
   2.236 @@ -341,10 +373,10 @@ END(itlb_miss)
   2.237  ENTRY(dtlb_miss)
   2.238  	DBG_FAULT(2)
   2.239  	/*
   2.240 -	 * The DTLB handler accesses the L3 PTE via the virtually mapped linear
   2.241 +	 * The DTLB handler accesses the PTE via the virtually mapped linear
   2.242  	 * page table.  If a nested TLB miss occurs, we switch into physical
   2.243 -	 * mode, walk the page table, and then re-execute the L3 PTE read
   2.244 -	 * and go on normally after that.
   2.245 +	 * mode, walk the page table, and then re-execute the PTE read and
   2.246 +	 * go on normally after that.
   2.247  	 */
   2.248  #ifdef CONFIG_XEN
   2.249  	movl r16=XSI_IFA
   2.250 @@ -361,11 +393,11 @@ dtlb_fault:
   2.251  	;;
   2.252  	ld8 r17=[r17]				// get virtual address of L3 PTE
   2.253  #else
   2.254 -	mov r17=cr.iha				// get virtual address of L3 PTE
   2.255 +	mov r17=cr.iha				// get virtual address of PTE
   2.256  #endif
   2.257  	movl r30=1f				// load nested fault continuation point
   2.258  	;;
   2.259 -1:	ld8 r18=[r17]				// read L3 PTE
   2.260 +1:	ld8 r18=[r17]				// read *pte
   2.261  	;;
   2.262  	mov b0=r29
   2.263  	tbit.z p6,p0=r18,_PAGE_P_BIT		// page present bit cleared?
   2.264 @@ -390,7 +422,7 @@ 1:	ld8 r18=[r17]				// read L3 PTE
   2.265  	 */
   2.266  	dv_serialize_data
   2.267  
   2.268 -	ld8 r19=[r17]				// read L3 PTE again and see if same
   2.269 +	ld8 r19=[r17]				// read *pte again and see if same
   2.270  	mov r20=PAGE_SHIFT<<2			// setup page size for purge
   2.271  	;;
   2.272  	cmp.ne p7,p0=r18,r19
   2.273 @@ -399,7 +431,8 @@ 1:	ld8 r18=[r17]				// read L3 PTE
   2.274  #endif
   2.275  	mov pr=r31,-1
   2.276  #ifdef CONFIG_XEN
   2.277 -	XEN_HYPER_RFI;
   2.278 +	XEN_HYPER_RFI
   2.279 +	dv_serialize_data
   2.280  #else
   2.281  	rfi
   2.282  #endif
   2.283 @@ -416,19 +449,15 @@ ENTRY(alt_itlb_miss)
   2.284  	ld8 r21=[r31],XSI_IFA-XSI_IPSR	// get ipsr, point to ifa
   2.285  	movl r17=PAGE_KERNEL
   2.286  	;;
   2.287 -	movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
   2.288 -	;;
   2.289  	ld8 r16=[r31]		// get ifa
   2.290 -	mov r31=pr
   2.291 -	;;
   2.292  #else
   2.293  	mov r16=cr.ifa		// get address that caused the TLB miss
   2.294  	movl r17=PAGE_KERNEL
   2.295  	mov r21=cr.ipsr
   2.296 +#endif
   2.297  	movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
   2.298  	mov r31=pr
   2.299  	;;
   2.300 -#endif
   2.301  #ifdef CONFIG_DISABLE_VHPT
   2.302  	shr.u r22=r16,61			// get the region number into r21
   2.303  	;;
   2.304 @@ -486,17 +515,15 @@ ENTRY(alt_dtlb_miss)
   2.305  	movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
   2.306  	;;
   2.307  	ld8 r16=[r31]		// get ifa
   2.308 -	mov r31=pr
   2.309 -	;;
   2.310  #else
   2.311  	mov r16=cr.ifa		// get address that caused the TLB miss
   2.312  	movl r17=PAGE_KERNEL
   2.313  	mov r20=cr.isr
   2.314  	movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
   2.315  	mov r21=cr.ipsr
   2.316 +#endif
   2.317  	mov r31=pr
   2.318  	;;
   2.319 -#endif
   2.320  #ifdef CONFIG_DISABLE_VHPT
   2.321  	shr.u r22=r16,61			// get the region number into r21
   2.322  	;;
   2.323 @@ -565,12 +592,12 @@ ENTRY(nested_dtlb_miss)
   2.324  	 *		r30:	continuation address
   2.325  	 *		r31:	saved pr
   2.326  	 *
   2.327 -	 * Output:	r17:	physical address of L3 PTE of faulting address
   2.328 +	 * Output:	r17:	physical address of PTE of faulting address
   2.329  	 *		r29:	saved b0
   2.330  	 *		r30:	continuation address
   2.331  	 *		r31:	saved pr
   2.332  	 *
   2.333 -	 * Clobbered:	b0, r18, r19, r21, psr.dt (cleared)
   2.334 +	 * Clobbered:	b0, r18, r19, r21, r22, psr.dt (cleared)
   2.335  	 */
   2.336  #ifdef CONFIG_XEN
   2.337  	XEN_HYPER_RSM_PSR_DT;
   2.338 @@ -579,12 +606,23 @@ ENTRY(nested_dtlb_miss)
   2.339  #endif
   2.340  	mov r19=IA64_KR(PT_BASE)		// get the page table base address
   2.341  	shl r21=r16,3				// shift bit 60 into sign bit
   2.342 +#ifdef CONFIG_XEN
   2.343 +	movl r18=XSI_ITIR
   2.344 +	;;
   2.345 +	ld8 r18=[r18]
   2.346 +#else
   2.347 +	mov r18=cr.itir
   2.348 +#endif
   2.349  	;;
   2.350  	shr.u r17=r16,61			// get the region number into r17
   2.351 +	extr.u r18=r18,2,6			// get the faulting page size
   2.352  	;;
   2.353  	cmp.eq p6,p7=5,r17			// is faulting address in region 5?
   2.354 -	shr.u r18=r16,PGDIR_SHIFT		// get bits 33-63 of faulting address
   2.355 +	add r22=-PAGE_SHIFT,r18			// adjustment for hugetlb address
   2.356 +	add r18=PGDIR_SHIFT-PAGE_SHIFT,r18
   2.357  	;;
   2.358 +	shr.u r22=r16,r22
   2.359 +	shr.u r18=r16,r18
   2.360  (p7)	dep r17=r17,r19,(PAGE_SHIFT-3),3	// put region number bits in place
   2.361  
   2.362  	srlz.d
   2.363 @@ -594,21 +632,33 @@ ENTRY(nested_dtlb_miss)
   2.364  (p6)	shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT
   2.365  (p7)	shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3
   2.366  	;;
   2.367 -(p6)	dep r17=r18,r19,3,(PAGE_SHIFT-3)	// r17=PTA + IFA(33,42)*8
   2.368 -(p7)	dep r17=r18,r17,3,(PAGE_SHIFT-6)	// r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8)
   2.369 +(p6)	dep r17=r18,r19,3,(PAGE_SHIFT-3)	// r17=pgd_offset for region 5
   2.370 +(p7)	dep r17=r18,r17,3,(PAGE_SHIFT-6)	// r17=pgd_offset for region[0-4]
   2.371  	cmp.eq p7,p6=0,r21			// unused address bits all zeroes?
   2.372 -	shr.u r18=r16,PMD_SHIFT			// shift L2 index into position
   2.373 +#ifdef CONFIG_PGTABLE_4
   2.374 +	shr.u r18=r22,PUD_SHIFT			// shift pud index into position
   2.375 +#else
   2.376 +	shr.u r18=r22,PMD_SHIFT			// shift pmd index into position
   2.377 +#endif
   2.378  	;;
   2.379 -	ld8 r17=[r17]				// fetch the L1 entry (may be 0)
   2.380 +	ld8 r17=[r17]				// get *pgd (may be 0)
   2.381 +	;;
   2.382 +(p7)	cmp.eq p6,p7=r17,r0			// was pgd_present(*pgd) == NULL?
   2.383 +	dep r17=r18,r17,3,(PAGE_SHIFT-3)	// r17=p[u|m]d_offset(pgd,addr)
   2.384  	;;
   2.385 -(p7)	cmp.eq p6,p7=r17,r0			// was L1 entry NULL?
   2.386 -	dep r17=r18,r17,3,(PAGE_SHIFT-3)	// compute address of L2 page table entry
   2.387 +#ifdef CONFIG_PGTABLE_4
   2.388 +(p7)	ld8 r17=[r17]				// get *pud (may be 0)
   2.389 +	shr.u r18=r22,PMD_SHIFT			// shift pmd index into position
   2.390 +	;;
   2.391 +(p7)	cmp.eq.or.andcm p6,p7=r17,r0		// was pud_present(*pud) == NULL?
   2.392 +	dep r17=r18,r17,3,(PAGE_SHIFT-3)	// r17=pmd_offset(pud,addr)
   2.393  	;;
   2.394 -(p7)	ld8 r17=[r17]				// fetch the L2 entry (may be 0)
   2.395 -	shr.u r19=r16,PAGE_SHIFT		// shift L3 index into position
   2.396 +#endif
   2.397 +(p7)	ld8 r17=[r17]				// get *pmd (may be 0)
   2.398 +	shr.u r19=r22,PAGE_SHIFT		// shift pte index into position
   2.399  	;;
   2.400 -(p7)	cmp.eq.or.andcm p6,p7=r17,r0		// was L2 entry NULL?
   2.401 -	dep r17=r19,r17,3,(PAGE_SHIFT-3)	// compute address of L3 page table entry
   2.402 +(p7)	cmp.eq.or.andcm p6,p7=r17,r0		// was pmd_present(*pmd) == NULL?
   2.403 +	dep r17=r19,r17,3,(PAGE_SHIFT-3)	// r17=pte_offset(pmd,addr);
   2.404  (p6)	br.cond.spnt page_fault
   2.405  	mov b0=r30
   2.406  	br.sptk.many b0				// return to continuation point
   2.407 @@ -626,7 +676,7 @@ END(ikey_miss)
   2.408  	// call do_page_fault (predicates are in r31, psr.dt may be off, r16 is faulting address)
   2.409  ENTRY(page_fault)
   2.410  #ifdef CONFIG_XEN
   2.411 -	XEN_HYPER_SSM_PSR_DT;
   2.412 +	XEN_HYPER_SSM_PSR_DT
   2.413  #else
   2.414  	ssm psr.dt
   2.415  	;;
   2.416 @@ -742,11 +792,12 @@ 1:	ld8 r18=[r17]
   2.417  	;;					// avoid RAW on r18
   2.418  	mov ar.ccv=r18				// set compare value for cmpxchg
   2.419  	or r25=_PAGE_D|_PAGE_A,r18		// set the dirty and accessed bits
   2.420 +	tbit.z p7,p6 = r18,_PAGE_P_BIT		// Check present bit
   2.421  	;;
   2.422 -	cmpxchg8.acq r26=[r17],r25,ar.ccv
   2.423 +(p6)	cmpxchg8.acq r26=[r17],r25,ar.ccv	// Only update if page is present
   2.424  	mov r24=PAGE_SHIFT<<2
   2.425  	;;
   2.426 -	cmp.eq p6,p7=r26,r18
   2.427 +(p6)	cmp.eq p6,p7=r26,r18			// Only compare if page is present
   2.428  	;;
   2.429  (p6)	itc.d r25				// install updated PTE
   2.430  	;;
   2.431 @@ -775,7 +826,8 @@ 1:	ld8 r18=[r17]
   2.432  #endif
   2.433  	mov pr=r31,-1				// restore pr
   2.434  #ifdef CONFIG_XEN
   2.435 -	XEN_HYPER_RFI;
   2.436 +	XEN_HYPER_RFI
   2.437 +	dv_serialize_data
   2.438  #else
   2.439  	rfi
   2.440  #endif
   2.441 @@ -826,11 +878,12 @@ 1:	ld8 r18=[r17]
   2.442  	;;
   2.443  	mov ar.ccv=r18				// set compare value for cmpxchg
   2.444  	or r25=_PAGE_A,r18			// set the accessed bit
   2.445 +	tbit.z p7,p6 = r18,_PAGE_P_BIT	 	// Check present bit
   2.446  	;;
   2.447 -	cmpxchg8.acq r26=[r17],r25,ar.ccv
   2.448 +(p6)	cmpxchg8.acq r26=[r17],r25,ar.ccv	// Only if page present
   2.449  	mov r24=PAGE_SHIFT<<2
   2.450  	;;
   2.451 -	cmp.eq p6,p7=r26,r18
   2.452 +(p6)	cmp.eq p6,p7=r26,r18			// Only if page present
   2.453  	;;
   2.454  #ifdef CONFIG_XEN
   2.455  	mov r26=r8
   2.456 @@ -869,7 +922,8 @@ 1:	ld8 r18=[r17]
   2.457  #endif /* !CONFIG_SMP */
   2.458  	mov pr=r31,-1
   2.459  #ifdef CONFIG_XEN
   2.460 -	XEN_HYPER_RFI;
   2.461 +	XEN_HYPER_RFI
   2.462 +	dv_serialize_data
   2.463  #else
   2.464  	rfi
   2.465  #endif
   2.466 @@ -892,11 +946,13 @@ ENTRY(daccess_bit)
   2.467  	movl r30=1f				// load continuation point in case of nested fault
   2.468  	;;
   2.469  #ifdef CONFIG_XEN
   2.470 -	mov r18=r8;
   2.471 -	mov r8=r16;
   2.472 -	XEN_HYPER_THASH;;
   2.473 -	mov r17=r8;
   2.474 -	mov r8=r18;;
   2.475 +	mov r18=r8
   2.476 +	mov r8=r16
   2.477 +	XEN_HYPER_THASH
   2.478 +	;;
   2.479 +	mov r17=r8
   2.480 +	mov r8=r18
   2.481 +	;;
   2.482  #else
   2.483  	thash r17=r16				// compute virtual address of L3 PTE
   2.484  #endif
   2.485 @@ -909,11 +965,12 @@ 1:	ld8 r18=[r17]
   2.486  	;;					// avoid RAW on r18
   2.487  	mov ar.ccv=r18				// set compare value for cmpxchg
   2.488  	or r25=_PAGE_A,r18			// set the dirty bit
   2.489 +	tbit.z p7,p6 = r18,_PAGE_P_BIT		// Check present bit
   2.490  	;;
   2.491 -	cmpxchg8.acq r26=[r17],r25,ar.ccv
   2.492 +(p6)	cmpxchg8.acq r26=[r17],r25,ar.ccv	// Only if page is present
   2.493  	mov r24=PAGE_SHIFT<<2
   2.494  	;;
   2.495 -	cmp.eq p6,p7=r26,r18
   2.496 +(p6)	cmp.eq p6,p7=r26,r18			// Only if page is present
   2.497  	;;
   2.498  #ifdef CONFIG_XEN
   2.499  	mov r26=r8
   2.500 @@ -950,7 +1007,8 @@ 1:	ld8 r18=[r17]
   2.501  	mov b0=r29				// restore b0
   2.502  	mov pr=r31,-1
   2.503  #ifdef CONFIG_XEN
   2.504 -	XEN_HYPER_RFI;
   2.505 +	XEN_HYPER_RFI
   2.506 +	dv_serialize_data
   2.507  #else
   2.508  	rfi
   2.509  #endif
   2.510 @@ -976,143 +1034,157 @@ ENTRY(break_fault)
   2.511  	 * to prevent leaking bits from kernel to user level.
   2.512  	 */
   2.513  	DBG_FAULT(11)
   2.514 -	mov r16=IA64_KR(CURRENT)		// r16 = current task; 12 cycle read lat.
   2.515 +	mov.m r16=IA64_KR(CURRENT)		// M2 r16 <- current task (12 cyc)
   2.516  #ifdef CONFIG_XEN
   2.517 -	movl r31=XSI_IPSR
   2.518 -	;;
   2.519 -	ld8 r29=[r31],XSI_IIP-XSI_IPSR		// get ipsr, point to iip
   2.520 -	mov r18=__IA64_BREAK_SYSCALL
   2.521 -	mov r21=ar.fpsr
   2.522 -	;;
   2.523 -	ld8 r28=[r31],XSI_IIM-XSI_IIP		// get iip, point to iim
   2.524 -	mov r19=b6
   2.525 -	mov r25=ar.unat
   2.526 +	movl r22=XSI_IPSR
   2.527  	;;
   2.528 -	ld8 r17=[r31]				// get iim
   2.529 -	mov r27=ar.rsc
   2.530 -	mov r26=ar.pfs
   2.531 -	;;
   2.532 +	ld8 r29=[r22],XSI_IIM-XSI_IPSR		// get ipsr, point to iip
   2.533  #else
   2.534 -	mov r17=cr.iim
   2.535 -	mov r18=__IA64_BREAK_SYSCALL
   2.536 -	mov r21=ar.fpsr
   2.537 -	mov r29=cr.ipsr
   2.538 -	mov r19=b6
   2.539 -	mov r25=ar.unat
   2.540 -	mov r27=ar.rsc
   2.541 -	mov r26=ar.pfs
   2.542 -	mov r28=cr.iip
   2.543 +	mov r29=cr.ipsr				// M2 (12 cyc)
   2.544 +#endif
   2.545 +	mov r31=pr				// I0 (2 cyc)
   2.546 +
   2.547 +#ifdef CONFIG_XEN
   2.548 +	;;
   2.549 +	ld8 r17=[r22],XSI_IIP-XSI_IIM
   2.550 +#else
   2.551 +	mov r17=cr.iim				// M2 (2 cyc)
   2.552  #endif
   2.553 -	mov r31=pr				// prepare to save predicates
   2.554 -	mov r20=r1
   2.555 +	mov.m r27=ar.rsc			// M2 (12 cyc)
   2.556 +	mov r18=__IA64_BREAK_SYSCALL		// A
   2.557 +
   2.558 +	mov.m ar.rsc=0				// M2
   2.559 +	mov.m r21=ar.fpsr			// M2 (12 cyc)
   2.560 +	mov r19=b6				// I0 (2 cyc)
   2.561  	;;
   2.562 -	adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16
   2.563 -	cmp.eq p0,p7=r18,r17			// is this a system call? (p7 <- false, if so)
   2.564 -(p7)	br.cond.spnt non_syscall
   2.565 -	;;
   2.566 -	ld1 r17=[r16]				// load current->thread.on_ustack flag
   2.567 -	st1 [r16]=r0				// clear current->thread.on_ustack flag
   2.568 -	add r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16	// set r1 for MINSTATE_START_SAVE_MIN_VIRT
   2.569 -	;;
   2.570 -	invala
   2.571 +	mov.m r23=ar.bspstore			// M2 (12 cyc)
   2.572 +	mov.m r24=ar.rnat			// M2 (5 cyc)
   2.573 +	mov.i r26=ar.pfs			// I0 (2 cyc)
   2.574 +
   2.575 +	invala					// M0|1
   2.576 +	nop.m 0					// M
   2.577 +	mov r20=r1				// A			save r1
   2.578 +
   2.579 +	nop.m 0
   2.580 +	movl r30=sys_call_table			// X
   2.581  
   2.582 -	/* adjust return address so we skip over the break instruction: */
   2.583 +#ifdef CONFIG_XEN
   2.584 +	ld8 r28=[r22]
   2.585 +#else
   2.586 +	mov r28=cr.iip				// M2 (2 cyc)
   2.587 +#endif
   2.588 +	cmp.eq p0,p7=r18,r17			// I0 is this a system call?
   2.589 +(p7)	br.cond.spnt non_syscall		// B  no ->
   2.590 +	//
   2.591 +	// From this point on, we are definitely on the syscall-path
   2.592 +	// and we can use (non-banked) scratch registers.
   2.593 +	//
   2.594 +///////////////////////////////////////////////////////////////////////
   2.595 +	mov r1=r16				// A    move task-pointer to "addl"-addressable reg
   2.596 +	mov r2=r16				// A    setup r2 for ia64_syscall_setup
   2.597 +	add r9=TI_FLAGS+IA64_TASK_SIZE,r16	// A	r9 = &current_thread_info()->flags
   2.598  
   2.599 -	extr.u r8=r29,41,2			// extract ei field from cr.ipsr
   2.600 -	;;
   2.601 -	cmp.eq p6,p7=2,r8			// isr.ei==2?
   2.602 -	mov r2=r1				// setup r2 for ia64_syscall_setup
   2.603 +	adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16
   2.604 +	adds r15=-1024,r15			// A    subtract 1024 from syscall number
   2.605 +	mov r3=NR_syscalls - 1
   2.606  	;;
   2.607 -(p6)	mov r8=0				// clear ei to 0
   2.608 -(p6)	adds r28=16,r28				// switch cr.iip to next bundle cr.ipsr.ei wrapped
   2.609 -(p7)	adds r8=1,r8				// increment ei to next slot
   2.610 -	;;
   2.611 -	cmp.eq pKStk,pUStk=r0,r17		// are we in kernel mode already?
   2.612 -	dep r29=r8,r29,41,2			// insert new ei into cr.ipsr
   2.613 +	ld1.bias r17=[r16]			// M0|1 r17 = current->thread.on_ustack flag
   2.614 +	ld4 r9=[r9]				// M0|1 r9 = current_thread_info()->flags
   2.615 +	extr.u r8=r29,41,2			// I0   extract ei field from cr.ipsr
   2.616 +
   2.617 +	shladd r30=r15,3,r30			// A    r30 = sys_call_table + 8*(syscall-1024)
   2.618 +	addl r22=IA64_RBS_OFFSET,r1		// A    compute base of RBS
   2.619 +	cmp.leu p6,p7=r15,r3			// A    syscall number in range?
   2.620  	;;
   2.621  
   2.622 -	// switch from user to kernel RBS:
   2.623 -	MINSTATE_START_SAVE_MIN_VIRT
   2.624 -	br.call.sptk.many b7=ia64_syscall_setup
   2.625 +	lfetch.fault.excl.nt1 [r22]		// M0|1 prefetch RBS
   2.626 +(p6)	ld8 r30=[r30]				// M0|1 load address of syscall entry point
   2.627 +	tnat.nz.or p7,p0=r15			// I0	is syscall nr a NaT?
   2.628 +
   2.629 +	mov.m ar.bspstore=r22			// M2   switch to kernel RBS
   2.630 +	cmp.eq p8,p9=2,r8			// A    isr.ei==2?
   2.631 +	;;
   2.632 +
   2.633 +(p8)	mov r8=0				// A    clear ei to 0
   2.634 +(p7)	movl r30=sys_ni_syscall			// X
   2.635 +
   2.636 +(p8)	adds r28=16,r28				// A    switch cr.iip to next bundle
   2.637 +(p9)	adds r8=1,r8				// A    increment ei to next slot
   2.638 +	nop.i 0
   2.639  	;;
   2.640 +
   2.641 +	mov.m r25=ar.unat			// M2 (5 cyc)
   2.642 +	dep r29=r8,r29,41,2			// I0   insert new ei into cr.ipsr
   2.643 +	adds r15=1024,r15			// A    restore original syscall number
   2.644 +	//
   2.645 +	// If any of the above loads miss in L1D, we'll stall here until
   2.646 +	// the data arrives.
   2.647 +	//
   2.648 +///////////////////////////////////////////////////////////////////////
   2.649 +	st1 [r16]=r0				// M2|3 clear current->thread.on_ustack flag
   2.650 +	mov b6=r30				// I0   setup syscall handler branch reg early
   2.651 +	cmp.eq pKStk,pUStk=r0,r17		// A    were we on kernel stacks already?
   2.652 +
   2.653 +	and r9=_TIF_SYSCALL_TRACEAUDIT,r9	// A    mask trace or audit
   2.654 +	mov r18=ar.bsp				// M2 (12 cyc)
   2.655 +(pKStk)	br.cond.spnt .break_fixup		// B	we're already in kernel-mode -- fix up RBS
   2.656 +	;;
   2.657 +.back_from_break_fixup:
   2.658 +(pUStk)	addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1 // A    compute base of memory stack
   2.659 +	cmp.eq p14,p0=r9,r0			// A    are syscalls being traced/audited?
   2.660 +	br.call.sptk.many b7=ia64_syscall_setup	// B
   2.661 +1:
   2.662 +	mov ar.rsc=0x3				// M2   set eager mode, pl 0, LE, loadrs=0
   2.663 +	nop 0
   2.664  #ifdef CONFIG_XEN
   2.665  	mov r2=b0; br.call.sptk b0=xen_bsw1;; mov b0=r2;;
   2.666  #else
   2.667 -	MINSTATE_END_SAVE_MIN_VIRT		// switch to bank 1
   2.668 +	bsw.1					// B (6 cyc) regs are saved, switch to bank 1
   2.669  #endif
   2.670 +	;;
   2.671 +
   2.672  #ifdef CONFIG_XEN
   2.673 -	movl r3=XSI_PSR_IC
   2.674 -	mov r16=1
   2.675 +	movl r16=XSI_PSR_IC
   2.676 +	mov r3=1
   2.677  	;;
   2.678 -#if 1
   2.679 -	st4 [r3]=r16,XSI_PSR_I_ADDR-XSI_PSR_IC	// vpsr.ic = 1
   2.680 +	st4 [r16]=r3,XSI_PSR_I_ADDR-XSI_PSR_IC	// vpsr.ic = 1
   2.681 +#else
   2.682 +	ssm psr.ic | PSR_DEFAULT_BITS		// M2	now it's safe to re-enable intr.-collection
   2.683 +#endif
   2.684 +	movl r3=ia64_ret_from_syscall		// X
   2.685  	;;
   2.686 -(p15)	ld8 r3=[r3]
   2.687 +
   2.688 +	srlz.i					// M0   ensure interruption collection is on
   2.689 +	mov rp=r3				// I0   set the real return addr
   2.690 +(p10)	br.cond.spnt.many ia64_ret_from_syscall	// B    return if bad call-frame or r15 is a NaT
   2.691 +
   2.692 +#ifdef CONFIG_XEN
   2.693 +(p15)	ld8 r16=[r16]				// vpsr.i
   2.694  	;;
   2.695 -(p15)	st1 [r3]=r0,XSI_PEND-XSI_PSR_I_ADDR	// if (p15) vpsr.i = 1
   2.696 -	mov r16=r0
   2.697 +(p15)	st1 [r16]=r0,XSI_PEND-XSI_PSR_I_ADDR	// if (p15) vpsr.i = 1
   2.698 +	mov r2=r0
   2.699  	;;
   2.700 -(p15)	ld4 r16=[r3]				// if (pending_interrupts)
   2.701 +(p15)	ld4 r2=[r16]				// if (pending_interrupts)
   2.702  	;;
   2.703 -	cmp.ne	p6,p0=r16,r0
   2.704 +	cmp.ne	p6,p0=r2,r0
   2.705  	;;
   2.706  (p6)	ssm	psr.i				//   do a real ssm psr.i
   2.707 -	;;
   2.708  #else
   2.709 -//	st4 [r3]=r16,XSI_PSR_I_ADDR-XSI_PSR_IC	// vpsr.ic = 1
   2.710 -	adds r3=XSI_PSR_I_ADDR-XSI_PSR_IC,r3	// SKIP vpsr.ic = 1
   2.711 -	;;
   2.712 -(p15)	ld8 r3=[r3]
   2.713 -	;;
   2.714 -(p15)	st1 [r3]=r0,XSI_PEND-XSI_PSR_I_ADDR	// if (p15) vpsr.i = 1
   2.715 -	mov r16=r0
   2.716 -	;;
   2.717 -(p15)	ld4 r16=[r3]				// if (pending_interrupts)
   2.718 -	;;
   2.719 -	cmp.ne	p6,p0=r16,r0
   2.720 -	;;
   2.721 -//(p6)	ssm	psr.i				//   do a real ssm psr.i
   2.722 -//(p6)	XEN_HYPER_SSM_I;
   2.723 -(p6)	break 0x7;
   2.724 -	;;
   2.725 +(p15)	ssm psr.i				// M2   restore psr.i
   2.726  #endif
   2.727 -	mov r3=NR_syscalls - 1
   2.728 -	;;
   2.729 -#else
   2.730 -	ssm psr.ic | PSR_DEFAULT_BITS
   2.731 -	;;
   2.732 -	srlz.i					// guarantee that interruption collection is on
   2.733 -	mov r3=NR_syscalls - 1
   2.734 +(p14)	br.call.sptk.many b6=b6			// B    invoke syscall-handker (ignore return addr)
   2.735 +	br.cond.spnt.many ia64_trace_syscall	// B	do syscall-tracing thingamagic
   2.736 +	// NOT REACHED
   2.737 +///////////////////////////////////////////////////////////////////////
   2.738 +	// On entry, we optimistically assumed that we're coming from user-space.
   2.739 +	// For the rare cases where a system-call is done from within the kernel,
   2.740 +	// we fix things up at this point:
   2.741 +.break_fixup:
   2.742 +	add r1=-IA64_PT_REGS_SIZE,sp		// A    allocate space for pt_regs structure
   2.743 +	mov ar.rnat=r24				// M2	restore kernel's AR.RNAT
   2.744  	;;
   2.745 -(p15)	ssm psr.i				// restore psr.i
   2.746 -#endif
   2.747 -	// p10==true means out registers are more than 8 or r15's Nat is true
   2.748 -(p10)	br.cond.spnt.many ia64_ret_from_syscall
   2.749 -	;;
   2.750 -	movl r16=sys_call_table
   2.751 -
   2.752 -	adds r15=-1024,r15			// r15 contains the syscall number---subtract 1024
   2.753 -	movl r2=ia64_ret_from_syscall
   2.754 -	;;
   2.755 -	shladd r20=r15,3,r16			// r20 = sys_call_table + 8*(syscall-1024)
   2.756 -	cmp.leu p6,p7=r15,r3			// (syscall > 0 && syscall < 1024 + NR_syscalls) ?
   2.757 -	mov rp=r2				// set the real return addr
   2.758 -	;;
   2.759 -(p6)	ld8 r20=[r20]				// load address of syscall entry point
   2.760 -(p7)	movl r20=sys_ni_syscall
   2.761 -
   2.762 -	add r2=TI_FLAGS+IA64_TASK_SIZE,r13
   2.763 -	;;
   2.764 -	ld4 r2=[r2]				// r2 = current_thread_info()->flags
   2.765 -	;;
   2.766 -	and r2=_TIF_SYSCALL_TRACEAUDIT,r2	// mask trace or audit
   2.767 -	;;
   2.768 -	cmp.eq p8,p0=r2,r0
   2.769 -	mov b6=r20
   2.770 -	;;
   2.771 -(p8)	br.call.sptk.many b6=b6			// ignore this return addr
   2.772 -	br.cond.sptk ia64_trace_syscall
   2.773 -	// NOT REACHED
   2.774 +	mov ar.bspstore=r23			// M2	restore kernel's AR.BSPSTORE
   2.775 +	br.cond.sptk .back_from_break_fixup
   2.776  END(break_fault)
   2.777  
   2.778  	.org ia64_ivt+0x3000
   2.779 @@ -1201,8 +1273,6 @@ END(interrupt)
   2.780  	 *	- r31: saved pr
   2.781  	 *	-  b0: original contents (to be saved)
   2.782  	 * On exit:
   2.783 -	 *	- executing on bank 1 registers
   2.784 -	 *	- psr.ic enabled, interrupts restored
   2.785  	 *	-  p10: TRUE if syscall is invoked with more than 8 out
   2.786  	 *		registers or r15's Nat is true
   2.787  	 *	-  r1: kernel's gp
   2.788 @@ -1210,8 +1280,11 @@ END(interrupt)
   2.789  	 *	-  r8: -EINVAL if p10 is true
   2.790  	 *	- r12: points to kernel stack
   2.791  	 *	- r13: points to current task
   2.792 +	 *	- r14: preserved (same as on entry)
   2.793 +	 *	- p13: preserved
   2.794  	 *	- p15: TRUE if interrupts need to be re-enabled
   2.795  	 *	- ar.fpsr: set to kernel settings
   2.796 +	 *	-  b6: preserved (same as on entry)
   2.797  	 */
   2.798  #ifndef CONFIG_XEN
   2.799  GLOBAL_ENTRY(ia64_syscall_setup)
   2.800 @@ -1280,10 +1353,10 @@ GLOBAL_ENTRY(ia64_syscall_setup)
   2.801  (p13)	mov in5=-1
   2.802  	;;
   2.803  	st8 [r16]=r21,PT(R8)-PT(AR_FPSR)	// save ar.fpsr
   2.804 -	tnat.nz p14,p0=in6
   2.805 +	tnat.nz p13,p0=in6
   2.806  	cmp.lt p10,p9=r11,r8	// frame size can't be more than local+8
   2.807  	;;
   2.808 -	stf8 [r16]=f1		// ensure pt_regs.r8 != 0 (see handle_syscall_error)
   2.809 +	mov r8=1
   2.810  (p9)	tnat.nz p10,p0=r15
   2.811  	adds r12=-16,r1		// switch to kernel memory stack (with 16 bytes of scratch)
   2.812  
   2.813 @@ -1294,9 +1367,9 @@ GLOBAL_ENTRY(ia64_syscall_setup)
   2.814  	mov r13=r2				// establish `current'
   2.815  	movl r1=__gp				// establish kernel global pointer
   2.816  	;;
   2.817 -(p14)	mov in6=-1
   2.818 +	st8 [r16]=r8		// ensure pt_regs.r8 != 0 (see handle_syscall_error)
   2.819 +(p13)	mov in6=-1
   2.820  (p8)	mov in7=-1
   2.821 -	nop.i 0
   2.822  
   2.823  	cmp.eq pSys,pNonSys=r0,r0		// set pSys=1, pNonSys=0
   2.824  	movl r17=FPSR_DEFAULT
   2.825 @@ -1323,6 +1396,8 @@ END(ia64_syscall_setup)
   2.826  	 * element, followed by the arguments.
   2.827  	 */
   2.828  ENTRY(dispatch_illegal_op_fault)
   2.829 +	.prologue
   2.830 +	.body
   2.831  	SAVE_MIN_WITH_COVER
   2.832  	ssm psr.ic | PSR_DEFAULT_BITS
   2.833  	;;
   2.834 @@ -1335,6 +1410,7 @@ ENTRY(dispatch_illegal_op_fault)
   2.835  	mov out0=ar.ec
   2.836  	;;
   2.837  	SAVE_REST
   2.838 +	PT_REGS_UNWIND_INFO(0)
   2.839  	;;
   2.840  	br.call.sptk.many rp=ia64_illegal_op_fault
   2.841  .ret0:	;;
   2.842 @@ -1365,6 +1441,8 @@ END(dispatch_illegal_op_fault)
   2.843  	FAULT(17)
   2.844  
   2.845  ENTRY(non_syscall)
   2.846 +	mov ar.rsc=r27			// restore ar.rsc before SAVE_MIN_WITH_COVER
   2.847 +	;;
   2.848  	SAVE_MIN_WITH_COVER
   2.849  
   2.850  	// There is no particular reason for this code to be here, other than that
   2.851 @@ -1540,7 +1618,7 @@ ENTRY(daccess_rights)
   2.852  	;;
   2.853  	ld8 r16=[r16]
   2.854  	;;
   2.855 -	XEN_HYPER_RSM_PSR_DT;
   2.856 +	XEN_HYPER_RSM_PSR_DT
   2.857  #else
   2.858  	mov r16=cr.ifa
   2.859  	rsm psr.dt
   2.860 @@ -1584,6 +1662,25 @@ END(disabled_fp_reg)
   2.861  // 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50)
   2.862  ENTRY(nat_consumption)
   2.863  	DBG_FAULT(26)
   2.864 +
   2.865 +	mov r16=cr.ipsr
   2.866 +	mov r17=cr.isr
   2.867 +	mov r31=pr				// save PR
   2.868 +	;;
   2.869 +	and r18=0xf,r17				// r18 = cr.ipsr.code{3:0}
   2.870 +	tbit.z p6,p0=r17,IA64_ISR_NA_BIT
   2.871 +	;;
   2.872 +	cmp.ne.or p6,p0=IA64_ISR_CODE_LFETCH,r18
   2.873 +	dep r16=-1,r16,IA64_PSR_ED_BIT,1
   2.874 +(p6)	br.cond.spnt 1f		// branch if (cr.ispr.na == 0 || cr.ipsr.code{3:0} != LFETCH)
   2.875 +	;;
   2.876 +	mov cr.ipsr=r16		// set cr.ipsr.na
   2.877 +	mov pr=r31,-1
   2.878 +	;;
   2.879 +	rfi
   2.880 +
   2.881 +1:	mov pr=r31,-1
   2.882 +	;;
   2.883  	FAULT(26)
   2.884  END(nat_consumption)
   2.885  
   2.886 @@ -1624,7 +1721,7 @@ ENTRY(speculation_vector)
   2.887  #ifdef CONFIG_XEN
   2.888  	XEN_HYPER_RFI;
   2.889  #else
   2.890 -	rfi
   2.891 +	rfi				// and go back
   2.892  #endif
   2.893  END(speculation_vector)
   2.894  
   2.895 @@ -1647,7 +1744,6 @@ END(debug_vector)
   2.896  // 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57)
   2.897  ENTRY(unaligned_access)
   2.898  	DBG_FAULT(30)
   2.899 -	mov r16=cr.ipsr
   2.900  	mov r31=pr		// prepare to save predicates
   2.901  	;;
   2.902  	br.sptk.many dispatch_unaligned_handler