ia64/xen-unstable

changeset 9993:ea181d857712

[IA64] update xenivt.S and xenentry.S

Update xenentry.S and xenivt.S for linux 2.6.16.13.

Signed-off-by: Tristan Gingold <tristan.gingold@bull.net>
author awilliam@xenbuild.aw
date Wed May 10 17:07:06 2006 -0600 (2006-05-10)
parents 00141f6d15e0
children 08bc4e5ac625
files linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S
line diff
     1.1 --- a/linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S	Wed May 10 15:58:36 2006 -0600
     1.2 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S	Wed May 10 17:07:06 2006 -0600
     1.3 @@ -83,11 +83,7 @@ GLOBAL_ENTRY(ia64_switch_to)
     1.4  	mov r8=1
     1.5  	;;
     1.6  	st4 [r27]=r8			// psr.ic back on
     1.7 -	;;
     1.8  #else
     1.9 -(p6)	ssm psr.ic			// if we had to map, reenable the psr.ic bit FIRST!!!
    1.10 -	;;
    1.11 -(p6)	srlz.d
    1.12  	ld8 sp=[r21]			// load kernel stack pointer of new task
    1.13  	mov IA64_KR(CURRENT)=in0	// update "current" application register
    1.14  #endif
    1.15 @@ -136,6 +132,11 @@ GLOBAL_ENTRY(ia64_switch_to)
    1.16  #endif
    1.17  	;;
    1.18  	itr.d dtr[r25]=r23		// wire in new mapping...
    1.19 +#ifndef CONFIG_XEN
    1.20 +	ssm psr.ic			// reenable the psr.ic bit
    1.21 +	;;
    1.22 +	srlz.d
    1.23 +#endif
    1.24  	br.cond.sptk .done
    1.25  #ifdef CONFIG_XEN
    1.26  END(xen_switch_to)
    1.27 @@ -216,7 +217,9 @@ GLOBAL_ENTRY(ia64_trace_syscall)
    1.28  .mem.offset 0,0; st8.spill [r2]=r8		// store return value in slot for r8
    1.29  .mem.offset 8,0; st8.spill [r3]=r10		// clear error indication in slot for r10
    1.30  	br.call.sptk.many rp=syscall_trace_leave // give parent a chance to catch return value
    1.31 -.ret3:	br.cond.sptk .work_pending_syscall_end
    1.32 +.ret3:
    1.33 +(pUStk)	cmp.eq.unc p6,p0=r0,r0			// p6 <- pUStk
    1.34 +	br.cond.sptk .work_pending_syscall_end
    1.35  
    1.36  strace_error:
    1.37  	ld8 r3=[r2]				// load pt_regs.r8
    1.38 @@ -246,7 +249,7 @@ END(ia64_trace_syscall)
    1.39   *	      r8-r11: restored (syscall return value(s))
    1.40   *		 r12: restored (user-level stack pointer)
    1.41   *		 r13: restored (user-level thread pointer)
    1.42 - *		 r14: cleared
    1.43 + *		 r14: set to __kernel_syscall_via_epc
    1.44   *		 r15: restored (syscall #)
    1.45   *	     r16-r17: cleared
    1.46   *		 r18: user-level b6
    1.47 @@ -267,7 +270,7 @@ END(ia64_trace_syscall)
    1.48   *		  pr: restored (user-level pr)
    1.49   *		  b0: restored (user-level rp)
    1.50   *	          b6: restored
    1.51 - *		  b7: cleared
    1.52 + *		  b7: set to __kernel_syscall_via_epc
    1.53   *	     ar.unat: restored (user-level ar.unat)
    1.54   *	      ar.pfs: restored (user-level ar.pfs)
    1.55   *	      ar.rsc: restored (user-level ar.rsc)
    1.56 @@ -331,20 +334,20 @@ ENTRY(ia64_leave_syscall)
    1.57  	;;
    1.58  (p6)	ld4 r31=[r18]				// load current_thread_info()->flags
    1.59  	ld8 r19=[r2],PT(B6)-PT(LOADRS)		// load ar.rsc value for "loadrs"
    1.60 -	mov b7=r0		// clear b7
    1.61 +	nop.i 0
    1.62  	;;
    1.63 -	ld8 r23=[r3],PT(R11)-PT(AR_BSPSTORE)	// load ar.bspstore (may be garbage)
    1.64 +	mov r16=ar.bsp				// M2  get existing backing store pointer
    1.65  	ld8 r18=[r2],PT(R9)-PT(B6)		// load b6
    1.66  (p6)	and r15=TIF_WORK_MASK,r31		// any work other than TIF_SYSCALL_TRACE?
    1.67  	;;
    1.68 -	mov r16=ar.bsp				// M2  get existing backing store pointer
    1.69 +	ld8 r23=[r3],PT(R11)-PT(AR_BSPSTORE)	// load ar.bspstore (may be garbage)
    1.70  (p6)	cmp4.ne.unc p6,p0=r15, r0		// any special work pending?
    1.71  (p6)	br.cond.spnt .work_pending_syscall
    1.72  	;;
    1.73  	// start restoring the state saved on the kernel stack (struct pt_regs):
    1.74  	ld8 r9=[r2],PT(CR_IPSR)-PT(R9)
    1.75  	ld8 r11=[r3],PT(CR_IIP)-PT(R11)
    1.76 -	mov f6=f0		// clear f6
    1.77 +(pNonSys) break 0		//      bug check: we shouldn't be here if pNonSys is TRUE!
    1.78  	;;
    1.79  	invala			// M0|1 invalidate ALAT
    1.80  #ifdef CONFIG_XEN
    1.81 @@ -358,57 +361,68 @@ ENTRY(ia64_leave_syscall)
    1.82  	st4	[r29]=r0	// note: clears both vpsr.i and vpsr.ic!
    1.83  	;;
    1.84  #else
    1.85 -	rsm psr.i | psr.ic	// M2 initiate turning off of interrupt and interruption collection
    1.86 +	rsm psr.i | psr.ic	// M2   turn off interrupts and interruption collection
    1.87  #endif
    1.88 -	mov f9=f0		// clear f9
    1.89 +	cmp.eq p9,p0=r0,r0	// A    set p9 to indicate that we should restore cr.ifs
    1.90  
    1.91 -	ld8 r29=[r2],16		// load cr.ipsr
    1.92 -	ld8 r28=[r3],16			// load cr.iip
    1.93 -	mov f8=f0		// clear f8
    1.94 +	ld8 r29=[r2],16		// M0|1 load cr.ipsr
    1.95 +	ld8 r28=[r3],16		// M0|1 load cr.iip
    1.96 +	mov r22=r0		// A    clear r22
    1.97  	;;
    1.98  	ld8 r30=[r2],16		// M0|1 load cr.ifs
    1.99 -	mov.m ar.ssd=r0		// M2 clear ar.ssd
   1.100 -	cmp.eq p9,p0=r0,r0	// set p9 to indicate that we should restore cr.ifs
   1.101 -	;;
   1.102  	ld8 r25=[r3],16		// M0|1 load ar.unat
   1.103 -	mov.m ar.csd=r0		// M2 clear ar.csd
   1.104 -	mov r22=r0		// clear r22
   1.105 +(pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
   1.106  	;;
   1.107  	ld8 r26=[r2],PT(B0)-PT(AR_PFS)	// M0|1 load ar.pfs
   1.108 -(pKStk)	mov r22=psr		// M2 read PSR now that interrupts are disabled
   1.109 -	mov f10=f0		// clear f10
   1.110 -	;;
   1.111 -	ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // load b0
   1.112 -	ld8 r27=[r3],PT(PR)-PT(AR_RSC)	// load ar.rsc
   1.113 -	mov f11=f0		// clear f11
   1.114 -	;;
   1.115 -	ld8 r24=[r2],PT(AR_FPSR)-PT(AR_RNAT)	// load ar.rnat (may be garbage)
   1.116 -	ld8 r31=[r3],PT(R1)-PT(PR)		// load predicates
   1.117 -(pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
   1.118 -	;;
   1.119 -	ld8 r20=[r2],PT(R12)-PT(AR_FPSR)	// load ar.fpsr
   1.120 -	ld8.fill r1=[r3],16	// load r1
   1.121 -(pUStk) mov r17=1
   1.122 +(pKStk)	mov r22=psr			// M2   read PSR now that interrupts are disabled
   1.123 +	nop 0
   1.124  	;;
   1.125 -	srlz.d			// M0  ensure interruption collection is off
   1.126 -	ld8.fill r13=[r3],16
   1.127 -	mov f7=f0		// clear f7
   1.128 -	;;
   1.129 -	ld8.fill r12=[r2]	// restore r12 (sp)
   1.130 -	ld8.fill r15=[r3]	// restore r15
   1.131 -	addl r3=THIS_CPU(ia64_phys_stacked_size_p8),r0
   1.132 +	ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // M0|1 load b0
   1.133 +	ld8 r27=[r3],PT(PR)-PT(AR_RSC)	// M0|1 load ar.rsc
   1.134 +	mov f6=f0			// F    clear f6
   1.135  	;;
   1.136 -(pUStk)	ld4 r3=[r3]		// r3 = cpu_data->phys_stacked_size_p8
   1.137 -(pUStk) st1 [r14]=r17
   1.138 -	mov b6=r18		// I0  restore b6
   1.139 +	ld8 r24=[r2],PT(AR_FPSR)-PT(AR_RNAT)	// M0|1 load ar.rnat (may be garbage)
   1.140 +	ld8 r31=[r3],PT(R1)-PT(PR)		// M0|1 load predicates
   1.141 +	mov f7=f0				// F    clear f7
   1.142  	;;
   1.143 -	mov r14=r0		// clear r14
   1.144 -	shr.u r18=r19,16	// I0|1 get byte size of existing "dirty" partition
   1.145 -(pKStk) br.cond.dpnt.many skip_rbs_switch
   1.146 +	ld8 r20=[r2],PT(R12)-PT(AR_FPSR)	// M0|1 load ar.fpsr
   1.147 +	ld8.fill r1=[r3],16			// M0|1 load r1
   1.148 +(pUStk) mov r17=1				// A
   1.149 +	;;
   1.150 +(pUStk) st1 [r14]=r17				// M2|3
   1.151 +	ld8.fill r13=[r3],16			// M0|1
   1.152 +	mov f8=f0				// F    clear f8
   1.153 +	;;
   1.154 +	ld8.fill r12=[r2]			// M0|1 restore r12 (sp)
   1.155 +	ld8.fill r15=[r3]			// M0|1 restore r15
   1.156 +	mov b6=r18				// I0   restore b6
   1.157  
   1.158 -	mov.m ar.ccv=r0		// clear ar.ccv
   1.159 -(pNonSys) br.cond.dpnt.many dont_preserve_current_frame
   1.160 -	br.cond.sptk.many rbs_switch
   1.161 +	addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0 // A
   1.162 +	mov f9=f0					// F    clear f9
   1.163 +(pKStk) br.cond.dpnt.many skip_rbs_switch		// B
   1.164 +
   1.165 +	srlz.d				// M0   ensure interruption collection is off (for cover)
   1.166 +	shr.u r18=r19,16		// I0|1 get byte size of existing "dirty" partition
   1.167 +#ifdef CONFIG_XEN
   1.168 +	XEN_HYPER_COVER;
   1.169 +#else
   1.170 +	cover				// B    add current frame into dirty partition & set cr.ifs
   1.171 +#endif
   1.172 +	;;
   1.173 +(pUStk) ld4 r17=[r17]			// M0|1 r17 = cpu_data->phys_stacked_size_p8
   1.174 +	mov r19=ar.bsp			// M2   get new backing store pointer
   1.175 +	mov f10=f0			// F    clear f10
   1.176 +
   1.177 +	nop.m 0
   1.178 +	movl r14=__kernel_syscall_via_epc // X
   1.179 +	;;
   1.180 +	mov.m ar.csd=r0			// M2   clear ar.csd
   1.181 +	mov.m ar.ccv=r0			// M2   clear ar.ccv
   1.182 +	mov b7=r14			// I0   clear b7 (hint with __kernel_syscall_via_epc)
   1.183 +
   1.184 +	mov.m ar.ssd=r0			// M2   clear ar.ssd
   1.185 +	mov f11=f0			// F    clear f11
   1.186 +	br.cond.sptk.many rbs_switch	// B
   1.187  #ifdef CONFIG_XEN
   1.188  END(xen_leave_syscall)
   1.189  #else
   1.190 @@ -546,7 +560,7 @@ GLOBAL_ENTRY(ia64_leave_kernel)
   1.191  	ldf.fill f7=[r2],PT(F11)-PT(F7)
   1.192  	ldf.fill f8=[r3],32
   1.193  	;;
   1.194 -	srlz.i			// ensure interruption collection is off
   1.195 +	srlz.d	// ensure that inter. collection is off (VHPT is don't care, since text is pinned)
   1.196  	mov ar.ccv=r15
   1.197  	;;
   1.198  	ldf.fill f11=[r2]
   1.199 @@ -556,29 +570,29 @@ GLOBAL_ENTRY(ia64_leave_kernel)
   1.200  	movl r2=XSI_BANK1_R16
   1.201  	movl r3=XSI_BANK1_R16+8
   1.202  	;;
   1.203 -	st8.spill [r2]=r16,16
   1.204 -	st8.spill [r3]=r17,16
   1.205 -	;;
   1.206 -	st8.spill [r2]=r18,16
   1.207 -	st8.spill [r3]=r19,16
   1.208 -	;;
   1.209 -	st8.spill [r2]=r20,16
   1.210 -	st8.spill [r3]=r21,16
   1.211 +.mem.offset 0,0; st8.spill [r2]=r16,16
   1.212 +.mem.offset 8,0; st8.spill [r3]=r17,16
   1.213  	;;
   1.214 -	st8.spill [r2]=r22,16
   1.215 -	st8.spill [r3]=r23,16
   1.216 -	;;
   1.217 -	st8.spill [r2]=r24,16
   1.218 -	st8.spill [r3]=r25,16
   1.219 +.mem.offset 0,0; st8.spill [r2]=r18,16
   1.220 +.mem.offset 8,0; st8.spill [r3]=r19,16
   1.221  	;;
   1.222 -	st8.spill [r2]=r26,16
   1.223 -	st8.spill [r3]=r27,16
   1.224 +.mem.offset 0,0; st8.spill [r2]=r20,16
   1.225 +.mem.offset 8,0; st8.spill [r3]=r21,16
   1.226  	;;
   1.227 -	st8.spill [r2]=r28,16
   1.228 -	st8.spill [r3]=r29,16
   1.229 +.mem.offset 0,0; st8.spill [r2]=r22,16
   1.230 +.mem.offset 8,0; st8.spill [r3]=r23,16
   1.231  	;;
   1.232 -	st8.spill [r2]=r30,16
   1.233 -	st8.spill [r3]=r31,16
   1.234 +.mem.offset 0,0; st8.spill [r2]=r24,16
   1.235 +.mem.offset 8,0; st8.spill [r3]=r25,16
   1.236 +	;;
   1.237 +.mem.offset 0,0; st8.spill [r2]=r26,16
   1.238 +.mem.offset 8,0; st8.spill [r3]=r27,16
   1.239 +	;;
   1.240 +.mem.offset 0,0; st8.spill [r2]=r28,16
   1.241 +.mem.offset 8,0; st8.spill [r3]=r29,16
   1.242 +	;;
   1.243 +.mem.offset 0,0; st8.spill [r2]=r30,16
   1.244 +.mem.offset 8,0; st8.spill [r3]=r31,16
   1.245  	;;
   1.246  	movl r2=XSI_BANKNUM;;
   1.247  	st4 [r2]=r0;
   1.248 @@ -641,7 +655,6 @@ GLOBAL_ENTRY(ia64_leave_kernel)
   1.249  	 */
   1.250  (pNonSys) br.cond.dpnt dont_preserve_current_frame
   1.251  
   1.252 -rbs_switch:
   1.253  #ifdef CONFIG_XEN
   1.254  	XEN_HYPER_COVER;
   1.255  #else
   1.256 @@ -649,6 +662,7 @@ rbs_switch:
   1.257  #endif
   1.258  	;;
   1.259  	mov r19=ar.bsp			// get new backing store pointer
   1.260 +rbs_switch:
   1.261  	sub r16=r16,r18			// krbs = old bsp - size of dirty partition
   1.262  	cmp.ne p9,p0=r0,r0		// clear p9 to skip restore of cr.ifs
   1.263  	;;
   1.264 @@ -723,14 +737,14 @@ rse_clear_invalid:
   1.265  	mov loc5=0
   1.266  	mov loc6=0
   1.267  	mov loc7=0
   1.268 -(pRecurse) br.call.sptk.few b0=rse_clear_invalid
   1.269 +(pRecurse) br.call.dptk.few b0=rse_clear_invalid
   1.270  	;;
   1.271  	mov loc8=0
   1.272  	mov loc9=0
   1.273  	cmp.ne pReturn,p0=r0,in1	// if recursion count != 0, we need to do a br.ret
   1.274  	mov loc10=0
   1.275  	mov loc11=0
   1.276 -(pReturn) br.ret.sptk.many b0
   1.277 +(pReturn) br.ret.dptk.many b0
   1.278  #endif /* !CONFIG_ITANIUM */
   1.279  #	undef pRecurse
   1.280  #	undef pReturn
     2.1 --- a/linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S	Wed May 10 15:58:36 2006 -0600
     2.2 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S	Wed May 10 17:07:06 2006 -0600
     2.3 @@ -87,16 +87,17 @@ ENTRY(vhpt_miss)
     2.4  	 * (the "original") TLB miss, which may either be caused by an instruction
     2.5  	 * fetch or a data access (or non-access).
     2.6  	 *
     2.7 -	 * What we do here is normal TLB miss handing for the _original_ miss, followed
     2.8 -	 * by inserting the TLB entry for the virtual page table page that the VHPT
     2.9 -	 * walker was attempting to access.  The latter gets inserted as long
    2.10 -	 * as both L1 and L2 have valid mappings for the faulting address.
    2.11 -	 * The TLB entry for the original miss gets inserted only if
    2.12 -	 * the L3 entry indicates that the page is present.
    2.13 +	 * What we do here is normal TLB miss handing for the _original_ miss,
    2.14 +	 * followed by inserting the TLB entry for the virtual page table page
    2.15 +	 * that the VHPT walker was attempting to access.  The latter gets
    2.16 +	 * inserted as long as page table entry above pte level have valid
    2.17 +	 * mappings for the faulting address.  The TLB entry for the original
    2.18 +	 * miss gets inserted only if the pte entry indicates that the page is
    2.19 +	 * present.
    2.20  	 *
    2.21  	 * do_page_fault gets invoked in the following cases:
    2.22  	 *	- the faulting virtual address uses unimplemented address bits
    2.23 -	 *	- the faulting virtual address has no L1, L2, or L3 mapping
    2.24 +	 *	- the faulting virtual address has no valid page table mapping
    2.25  	 */
    2.26  #ifdef CONFIG_XEN
    2.27  	movl r16=XSI_IFA
    2.28 @@ -127,7 +128,7 @@ ENTRY(vhpt_miss)
    2.29  	shl r21=r16,3				// shift bit 60 into sign bit
    2.30  	shr.u r17=r16,61			// get the region number into r17
    2.31  	;;
    2.32 -	shr r22=r21,3
    2.33 +	shr.u r22=r21,3
    2.34  #ifdef CONFIG_HUGETLB_PAGE
    2.35  	extr.u r26=r25,2,6
    2.36  	;;
    2.37 @@ -139,7 +140,7 @@ ENTRY(vhpt_miss)
    2.38  #endif
    2.39  	;;
    2.40  	cmp.eq p6,p7=5,r17			// is IFA pointing into to region 5?
    2.41 -	shr.u r18=r22,PGDIR_SHIFT		// get bits 33-63 of the faulting address
    2.42 +	shr.u r18=r22,PGDIR_SHIFT		// get bottom portion of pgd index bit
    2.43  	;;
    2.44  (p7)	dep r17=r17,r19,(PAGE_SHIFT-3),3	// put region number bits in place
    2.45  
    2.46 @@ -150,41 +151,54 @@ ENTRY(vhpt_miss)
    2.47  (p6)	shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT
    2.48  (p7)	shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3
    2.49  	;;
    2.50 -(p6)	dep r17=r18,r19,3,(PAGE_SHIFT-3)	// r17=PTA + IFA(33,42)*8
    2.51 -(p7)	dep r17=r18,r17,3,(PAGE_SHIFT-6)	// r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8)
    2.52 +(p6)	dep r17=r18,r19,3,(PAGE_SHIFT-3)	// r17=pgd_offset for region 5
    2.53 +(p7)	dep r17=r18,r17,3,(PAGE_SHIFT-6)	// r17=pgd_offset for region[0-4]
    2.54  	cmp.eq p7,p6=0,r21			// unused address bits all zeroes?
    2.55 -	shr.u r18=r22,PMD_SHIFT			// shift L2 index into position
    2.56 -	;;
    2.57 -	ld8 r17=[r17]				// fetch the L1 entry (may be 0)
    2.58 +#ifdef CONFIG_PGTABLE_4
    2.59 +	shr.u r28=r22,PUD_SHIFT			// shift pud index into position
    2.60 +#else
    2.61 +	shr.u r18=r22,PMD_SHIFT			// shift pmd index into position
    2.62 +#endif
    2.63  	;;
    2.64 -(p7)	cmp.eq p6,p7=r17,r0			// was L1 entry NULL?
    2.65 -	dep r17=r18,r17,3,(PAGE_SHIFT-3)	// compute address of L2 page table entry
    2.66 +	ld8 r17=[r17]				// get *pgd (may be 0)
    2.67  	;;
    2.68 -(p7)	ld8 r20=[r17]				// fetch the L2 entry (may be 0)
    2.69 -	shr.u r19=r22,PAGE_SHIFT		// shift L3 index into position
    2.70 +(p7)	cmp.eq p6,p7=r17,r0			// was pgd_present(*pgd) == NULL?
    2.71 +#ifdef CONFIG_PGTABLE_4
    2.72 +	dep r28=r28,r17,3,(PAGE_SHIFT-3)	// r28=pud_offset(pgd,addr)
    2.73  	;;
    2.74 -(p7)	cmp.eq.or.andcm p6,p7=r20,r0		// was L2 entry NULL?
    2.75 -	dep r21=r19,r20,3,(PAGE_SHIFT-3)	// compute address of L3 page table entry
    2.76 +	shr.u r18=r22,PMD_SHIFT			// shift pmd index into position
    2.77 +(p7)	ld8 r29=[r28]				// get *pud (may be 0)
    2.78  	;;
    2.79 +(p7)	cmp.eq.or.andcm p6,p7=r29,r0		// was pud_present(*pud) == NULL?
    2.80 +	dep r17=r18,r29,3,(PAGE_SHIFT-3)	// r17=pmd_offset(pud,addr)
    2.81 +#else
    2.82 +	dep r17=r18,r17,3,(PAGE_SHIFT-3)	// r17=pmd_offset(pgd,addr)
    2.83 +#endif
    2.84 +	;;
    2.85 +(p7)	ld8 r20=[r17]				// get *pmd (may be 0)
    2.86 +	shr.u r19=r22,PAGE_SHIFT		// shift pte index into position
    2.87 +	;;
    2.88 +(p7)	cmp.eq.or.andcm p6,p7=r20,r0		// was pmd_present(*pmd) == NULL?
    2.89 +	dep r21=r19,r20,3,(PAGE_SHIFT-3)	// r21=pte_offset(pmd,addr)
    2.90 +	;;
    2.91 +(p7)	ld8 r18=[r21]				// read *pte
    2.92  #ifdef CONFIG_XEN
    2.93 -(p7)	ld8 r18=[r21]				// read the L3 PTE
    2.94  	movl r19=XSI_ISR
    2.95  	;;
    2.96  	ld8 r19=[r19]
    2.97 +#else
    2.98 +	mov r19=cr.isr				// cr.isr bit 32 tells us if this is an insn miss
    2.99 +#endif
   2.100  	;;
   2.101  (p7)	tbit.z p6,p7=r18,_PAGE_P_BIT		// page present bit cleared?
   2.102 +#ifdef CONFIG_XEN
   2.103  	movl r22=XSI_IHA
   2.104  	;;
   2.105  	ld8 r22=[r22]
   2.106 -	;;
   2.107  #else
   2.108 -(p7)	ld8 r18=[r21]				// read the L3 PTE
   2.109 -	mov r19=cr.isr				// cr.isr bit 0 tells us if this is an insn miss
   2.110 -	;;
   2.111 -(p7)	tbit.z p6,p7=r18,_PAGE_P_BIT		// page present bit cleared?
   2.112  	mov r22=cr.iha				// get the VHPT address that caused the TLB miss
   2.113 +#endif
   2.114  	;;					// avoid RAW on p7
   2.115 -#endif
   2.116  (p7)	tbit.nz.unc p10,p11=r19,32		// is it an instruction TLB miss?
   2.117  	dep r23=0,r20,0,PAGE_SHIFT		// clear low bits to get page address
   2.118  	;;
   2.119 @@ -198,16 +212,17 @@ ENTRY(vhpt_miss)
   2.120  	;;
   2.121  	mov r8=r24
   2.122  	;;
   2.123 +#else
   2.124 +(p10)	itc.i r18				// insert the instruction TLB entry
   2.125 +(p11)	itc.d r18				// insert the data TLB entry
   2.126 +#endif
   2.127  (p6)	br.cond.spnt.many page_fault		// handle bad address/page not present (page fault)
   2.128 -	;;
   2.129 +#ifdef CONFIG_XEN
   2.130  	movl r24=XSI_IFA
   2.131  	;;
   2.132  	st8 [r24]=r22
   2.133  	;;
   2.134  #else
   2.135 -(p10)	itc.i r18				// insert the instruction TLB entry
   2.136 -(p11)	itc.d r18				// insert the data TLB entry
   2.137 -(p6)	br.cond.spnt.many page_fault		// handle bad address/page not present (page fault)
   2.138  	mov cr.ifa=r22
   2.139  #endif
   2.140  
   2.141 @@ -242,25 +257,41 @@ ENTRY(vhpt_miss)
   2.142  	dv_serialize_data
   2.143  
   2.144  	/*
   2.145 -	 * Re-check L2 and L3 pagetable.  If they changed, we may have received a ptc.g
   2.146 +	 * Re-check pagetable entry.  If they changed, we may have received a ptc.g
   2.147  	 * between reading the pagetable and the "itc".  If so, flush the entry we
   2.148 -	 * inserted and retry.
   2.149 +	 * inserted and retry.  At this point, we have:
   2.150 +	 *
   2.151 +	 * r28 = equivalent of pud_offset(pgd, ifa)
   2.152 +	 * r17 = equivalent of pmd_offset(pud, ifa)
   2.153 +	 * r21 = equivalent of pte_offset(pmd, ifa)
   2.154 +	 *
   2.155 +	 * r29 = *pud
   2.156 +	 * r20 = *pmd
   2.157 +	 * r18 = *pte
   2.158  	 */
   2.159 -	ld8 r25=[r21]				// read L3 PTE again
   2.160 -	ld8 r26=[r17]				// read L2 entry again
   2.161 +	ld8 r25=[r21]				// read *pte again
   2.162 +	ld8 r26=[r17]				// read *pmd again
   2.163 +#ifdef CONFIG_PGTABLE_4
   2.164 +	ld8 r19=[r28]				// read *pud again
   2.165 +#endif
   2.166 +	cmp.ne p6,p7=r0,r0
   2.167  	;;
   2.168 -	cmp.ne p6,p7=r26,r20			// did L2 entry change
   2.169 +	cmp.ne.or.andcm p6,p7=r26,r20		// did *pmd change
   2.170 +#ifdef CONFIG_PGTABLE_4
   2.171 +	cmp.ne.or.andcm p6,p7=r19,r29		// did *pud change
   2.172 +#endif
   2.173  	mov r27=PAGE_SHIFT<<2
   2.174  	;;
   2.175  (p6)	ptc.l r22,r27				// purge PTE page translation
   2.176 -(p7)	cmp.ne.or.andcm p6,p7=r25,r18		// did L3 PTE change
   2.177 +(p7)	cmp.ne.or.andcm p6,p7=r25,r18		// did *pte change
   2.178  	;;
   2.179  (p6)	ptc.l r16,r27				// purge translation
   2.180  #endif
   2.181  
   2.182  	mov pr=r31,-1				// restore predicate registers
   2.183  #ifdef CONFIG_XEN
   2.184 -	XEN_HYPER_RFI;
   2.185 +	XEN_HYPER_RFI
   2.186 +	dv_serialize_data
   2.187  #else
   2.188  	rfi
   2.189  #endif
   2.190 @@ -272,10 +303,10 @@ END(vhpt_miss)
   2.191  ENTRY(itlb_miss)
   2.192  	DBG_FAULT(1)
   2.193  	/*
   2.194 -	 * The ITLB handler accesses the L3 PTE via the virtually mapped linear
   2.195 +	 * The ITLB handler accesses the PTE via the virtually mapped linear
   2.196  	 * page table.  If a nested TLB miss occurs, we switch into physical
   2.197 -	 * mode, walk the page table, and then re-execute the L3 PTE read
   2.198 -	 * and go on normally after that.
   2.199 +	 * mode, walk the page table, and then re-execute the PTE read and
   2.200 +	 * go on normally after that.
   2.201  	 */
   2.202  #ifdef CONFIG_XEN
   2.203  	movl r16=XSI_IFA
   2.204 @@ -292,11 +323,11 @@ ENTRY(itlb_miss)
   2.205  	;;
   2.206  	ld8 r17=[r17]				// get virtual address of L3 PTE
   2.207  #else
   2.208 -	mov r17=cr.iha				// get virtual address of L3 PTE
   2.209 +	mov r17=cr.iha				// get virtual address of PTE
   2.210  #endif
   2.211  	movl r30=1f				// load nested fault continuation point
   2.212  	;;
   2.213 -1:	ld8 r18=[r17]				// read L3 PTE
   2.214 +1:	ld8 r18=[r17]				// read *pte
   2.215  	;;
   2.216  	mov b0=r29
   2.217  	tbit.z p6,p0=r18,_PAGE_P_BIT		// page present bit cleared?
   2.218 @@ -320,7 +351,7 @@ 1:	ld8 r18=[r17]				// read L3 PTE
   2.219  	 */
   2.220  	dv_serialize_data
   2.221  
   2.222 -	ld8 r19=[r17]				// read L3 PTE again and see if same
   2.223 +	ld8 r19=[r17]				// read *pte again and see if same
   2.224  	mov r20=PAGE_SHIFT<<2			// setup page size for purge
   2.225  	;;
   2.226  	cmp.ne p7,p0=r18,r19
   2.227 @@ -329,7 +360,8 @@ 1:	ld8 r18=[r17]				// read L3 PTE
   2.228  #endif
   2.229  	mov pr=r31,-1
   2.230  #ifdef CONFIG_XEN
   2.231 -	XEN_HYPER_RFI;
   2.232 +	XEN_HYPER_RFI
   2.233 +	dv_serialize_data
   2.234  #else
   2.235  	rfi
   2.236  #endif
   2.237 @@ -341,10 +373,10 @@ END(itlb_miss)
   2.238  ENTRY(dtlb_miss)
   2.239  	DBG_FAULT(2)
   2.240  	/*
   2.241 -	 * The DTLB handler accesses the L3 PTE via the virtually mapped linear
   2.242 +	 * The DTLB handler accesses the PTE via the virtually mapped linear
   2.243  	 * page table.  If a nested TLB miss occurs, we switch into physical
   2.244 -	 * mode, walk the page table, and then re-execute the L3 PTE read
   2.245 -	 * and go on normally after that.
   2.246 +	 * mode, walk the page table, and then re-execute the PTE read and
   2.247 +	 * go on normally after that.
   2.248  	 */
   2.249  #ifdef CONFIG_XEN
   2.250  	movl r16=XSI_IFA
   2.251 @@ -361,11 +393,11 @@ dtlb_fault:
   2.252  	;;
   2.253  	ld8 r17=[r17]				// get virtual address of L3 PTE
   2.254  #else
   2.255 -	mov r17=cr.iha				// get virtual address of L3 PTE
   2.256 +	mov r17=cr.iha				// get virtual address of PTE
   2.257  #endif
   2.258  	movl r30=1f				// load nested fault continuation point
   2.259  	;;
   2.260 -1:	ld8 r18=[r17]				// read L3 PTE
   2.261 +1:	ld8 r18=[r17]				// read *pte
   2.262  	;;
   2.263  	mov b0=r29
   2.264  	tbit.z p6,p0=r18,_PAGE_P_BIT		// page present bit cleared?
   2.265 @@ -390,7 +422,7 @@ 1:	ld8 r18=[r17]				// read L3 PTE
   2.266  	 */
   2.267  	dv_serialize_data
   2.268  
   2.269 -	ld8 r19=[r17]				// read L3 PTE again and see if same
   2.270 +	ld8 r19=[r17]				// read *pte again and see if same
   2.271  	mov r20=PAGE_SHIFT<<2			// setup page size for purge
   2.272  	;;
   2.273  	cmp.ne p7,p0=r18,r19
   2.274 @@ -399,7 +431,8 @@ 1:	ld8 r18=[r17]				// read L3 PTE
   2.275  #endif
   2.276  	mov pr=r31,-1
   2.277  #ifdef CONFIG_XEN
   2.278 -	XEN_HYPER_RFI;
   2.279 +	XEN_HYPER_RFI
   2.280 +	dv_serialize_data
   2.281  #else
   2.282  	rfi
   2.283  #endif
   2.284 @@ -416,19 +449,15 @@ ENTRY(alt_itlb_miss)
   2.285  	ld8 r21=[r31],XSI_IFA-XSI_IPSR	// get ipsr, point to ifa
   2.286  	movl r17=PAGE_KERNEL
   2.287  	;;
   2.288 -	movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
   2.289 -	;;
   2.290  	ld8 r16=[r31]		// get ifa
   2.291 -	mov r31=pr
   2.292 -	;;
   2.293  #else
   2.294  	mov r16=cr.ifa		// get address that caused the TLB miss
   2.295  	movl r17=PAGE_KERNEL
   2.296  	mov r21=cr.ipsr
   2.297 +#endif
   2.298  	movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
   2.299  	mov r31=pr
   2.300  	;;
   2.301 -#endif
   2.302  #ifdef CONFIG_DISABLE_VHPT
   2.303  	shr.u r22=r16,61			// get the region number into r21
   2.304  	;;
   2.305 @@ -486,17 +515,15 @@ ENTRY(alt_dtlb_miss)
   2.306  	movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
   2.307  	;;
   2.308  	ld8 r16=[r31]		// get ifa
   2.309 -	mov r31=pr
   2.310 -	;;
   2.311  #else
   2.312  	mov r16=cr.ifa		// get address that caused the TLB miss
   2.313  	movl r17=PAGE_KERNEL
   2.314  	mov r20=cr.isr
   2.315  	movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
   2.316  	mov r21=cr.ipsr
   2.317 +#endif
   2.318  	mov r31=pr
   2.319  	;;
   2.320 -#endif
   2.321  #ifdef CONFIG_DISABLE_VHPT
   2.322  	shr.u r22=r16,61			// get the region number into r21
   2.323  	;;
   2.324 @@ -565,12 +592,12 @@ ENTRY(nested_dtlb_miss)
   2.325  	 *		r30:	continuation address
   2.326  	 *		r31:	saved pr
   2.327  	 *
   2.328 -	 * Output:	r17:	physical address of L3 PTE of faulting address
   2.329 +	 * Output:	r17:	physical address of PTE of faulting address
   2.330  	 *		r29:	saved b0
   2.331  	 *		r30:	continuation address
   2.332  	 *		r31:	saved pr
   2.333  	 *
   2.334 -	 * Clobbered:	b0, r18, r19, r21, psr.dt (cleared)
   2.335 +	 * Clobbered:	b0, r18, r19, r21, r22, psr.dt (cleared)
   2.336  	 */
   2.337  #ifdef CONFIG_XEN
   2.338  	XEN_HYPER_RSM_PSR_DT;
   2.339 @@ -579,12 +606,23 @@ ENTRY(nested_dtlb_miss)
   2.340  #endif
   2.341  	mov r19=IA64_KR(PT_BASE)		// get the page table base address
   2.342  	shl r21=r16,3				// shift bit 60 into sign bit
   2.343 +#ifdef CONFIG_XEN
   2.344 +	movl r18=XSI_ITIR
   2.345 +	;;
   2.346 +	ld8 r18=[r18]
   2.347 +#else
   2.348 +	mov r18=cr.itir
   2.349 +#endif
   2.350  	;;
   2.351  	shr.u r17=r16,61			// get the region number into r17
   2.352 +	extr.u r18=r18,2,6			// get the faulting page size
   2.353  	;;
   2.354  	cmp.eq p6,p7=5,r17			// is faulting address in region 5?
   2.355 -	shr.u r18=r16,PGDIR_SHIFT		// get bits 33-63 of faulting address
   2.356 +	add r22=-PAGE_SHIFT,r18			// adjustment for hugetlb address
   2.357 +	add r18=PGDIR_SHIFT-PAGE_SHIFT,r18
   2.358  	;;
   2.359 +	shr.u r22=r16,r22
   2.360 +	shr.u r18=r16,r18
   2.361  (p7)	dep r17=r17,r19,(PAGE_SHIFT-3),3	// put region number bits in place
   2.362  
   2.363  	srlz.d
   2.364 @@ -594,21 +632,33 @@ ENTRY(nested_dtlb_miss)
   2.365  (p6)	shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT
   2.366  (p7)	shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3
   2.367  	;;
   2.368 -(p6)	dep r17=r18,r19,3,(PAGE_SHIFT-3)	// r17=PTA + IFA(33,42)*8
   2.369 -(p7)	dep r17=r18,r17,3,(PAGE_SHIFT-6)	// r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8)
   2.370 +(p6)	dep r17=r18,r19,3,(PAGE_SHIFT-3)	// r17=pgd_offset for region 5
   2.371 +(p7)	dep r17=r18,r17,3,(PAGE_SHIFT-6)	// r17=pgd_offset for region[0-4]
   2.372  	cmp.eq p7,p6=0,r21			// unused address bits all zeroes?
   2.373 -	shr.u r18=r16,PMD_SHIFT			// shift L2 index into position
   2.374 -	;;
   2.375 -	ld8 r17=[r17]				// fetch the L1 entry (may be 0)
   2.376 +#ifdef CONFIG_PGTABLE_4
   2.377 +	shr.u r18=r22,PUD_SHIFT			// shift pud index into position
   2.378 +#else
   2.379 +	shr.u r18=r22,PMD_SHIFT			// shift pmd index into position
   2.380 +#endif
   2.381  	;;
   2.382 -(p7)	cmp.eq p6,p7=r17,r0			// was L1 entry NULL?
   2.383 -	dep r17=r18,r17,3,(PAGE_SHIFT-3)	// compute address of L2 page table entry
   2.384 +	ld8 r17=[r17]				// get *pgd (may be 0)
   2.385  	;;
   2.386 -(p7)	ld8 r17=[r17]				// fetch the L2 entry (may be 0)
   2.387 -	shr.u r19=r16,PAGE_SHIFT		// shift L3 index into position
   2.388 +(p7)	cmp.eq p6,p7=r17,r0			// was pgd_present(*pgd) == NULL?
   2.389 +	dep r17=r18,r17,3,(PAGE_SHIFT-3)	// r17=p[u|m]d_offset(pgd,addr)
   2.390  	;;
   2.391 -(p7)	cmp.eq.or.andcm p6,p7=r17,r0		// was L2 entry NULL?
   2.392 -	dep r17=r19,r17,3,(PAGE_SHIFT-3)	// compute address of L3 page table entry
   2.393 +#ifdef CONFIG_PGTABLE_4
   2.394 +(p7)	ld8 r17=[r17]				// get *pud (may be 0)
   2.395 +	shr.u r18=r22,PMD_SHIFT			// shift pmd index into position
   2.396 +	;;
   2.397 +(p7)	cmp.eq.or.andcm p6,p7=r17,r0		// was pud_present(*pud) == NULL?
   2.398 +	dep r17=r18,r17,3,(PAGE_SHIFT-3)	// r17=pmd_offset(pud,addr)
   2.399 +	;;
   2.400 +#endif
   2.401 +(p7)	ld8 r17=[r17]				// get *pmd (may be 0)
   2.402 +	shr.u r19=r22,PAGE_SHIFT		// shift pte index into position
   2.403 +	;;
   2.404 +(p7)	cmp.eq.or.andcm p6,p7=r17,r0		// was pmd_present(*pmd) == NULL?
   2.405 +	dep r17=r19,r17,3,(PAGE_SHIFT-3)	// r17=pte_offset(pmd,addr);
   2.406  (p6)	br.cond.spnt page_fault
   2.407  	mov b0=r30
   2.408  	br.sptk.many b0				// return to continuation point
   2.409 @@ -626,7 +676,7 @@ END(ikey_miss)
   2.410  	// call do_page_fault (predicates are in r31, psr.dt may be off, r16 is faulting address)
   2.411  ENTRY(page_fault)
   2.412  #ifdef CONFIG_XEN
   2.413 -	XEN_HYPER_SSM_PSR_DT;
   2.414 +	XEN_HYPER_SSM_PSR_DT
   2.415  #else
   2.416  	ssm psr.dt
   2.417  	;;
   2.418 @@ -742,11 +792,12 @@ 1:	ld8 r18=[r17]
   2.419  	;;					// avoid RAW on r18
   2.420  	mov ar.ccv=r18				// set compare value for cmpxchg
   2.421  	or r25=_PAGE_D|_PAGE_A,r18		// set the dirty and accessed bits
   2.422 +	tbit.z p7,p6 = r18,_PAGE_P_BIT		// Check present bit
   2.423  	;;
   2.424 -	cmpxchg8.acq r26=[r17],r25,ar.ccv
   2.425 +(p6)	cmpxchg8.acq r26=[r17],r25,ar.ccv	// Only update if page is present
   2.426  	mov r24=PAGE_SHIFT<<2
   2.427  	;;
   2.428 -	cmp.eq p6,p7=r26,r18
   2.429 +(p6)	cmp.eq p6,p7=r26,r18			// Only compare if page is present
   2.430  	;;
   2.431  (p6)	itc.d r25				// install updated PTE
   2.432  	;;
   2.433 @@ -775,7 +826,8 @@ 1:	ld8 r18=[r17]
   2.434  #endif
   2.435  	mov pr=r31,-1				// restore pr
   2.436  #ifdef CONFIG_XEN
   2.437 -	XEN_HYPER_RFI;
   2.438 +	XEN_HYPER_RFI
   2.439 +	dv_serialize_data
   2.440  #else
   2.441  	rfi
   2.442  #endif
   2.443 @@ -826,11 +878,12 @@ 1:	ld8 r18=[r17]
   2.444  	;;
   2.445  	mov ar.ccv=r18				// set compare value for cmpxchg
   2.446  	or r25=_PAGE_A,r18			// set the accessed bit
   2.447 +	tbit.z p7,p6 = r18,_PAGE_P_BIT	 	// Check present bit
   2.448  	;;
   2.449 -	cmpxchg8.acq r26=[r17],r25,ar.ccv
   2.450 +(p6)	cmpxchg8.acq r26=[r17],r25,ar.ccv	// Only if page present
   2.451  	mov r24=PAGE_SHIFT<<2
   2.452  	;;
   2.453 -	cmp.eq p6,p7=r26,r18
   2.454 +(p6)	cmp.eq p6,p7=r26,r18			// Only if page present
   2.455  	;;
   2.456  #ifdef CONFIG_XEN
   2.457  	mov r26=r8
   2.458 @@ -869,7 +922,8 @@ 1:	ld8 r18=[r17]
   2.459  #endif /* !CONFIG_SMP */
   2.460  	mov pr=r31,-1
   2.461  #ifdef CONFIG_XEN
   2.462 -	XEN_HYPER_RFI;
   2.463 +	XEN_HYPER_RFI
   2.464 +	dv_serialize_data
   2.465  #else
   2.466  	rfi
   2.467  #endif
   2.468 @@ -892,11 +946,13 @@ ENTRY(daccess_bit)
   2.469  	movl r30=1f				// load continuation point in case of nested fault
   2.470  	;;
   2.471  #ifdef CONFIG_XEN
   2.472 -	mov r18=r8;
   2.473 -	mov r8=r16;
   2.474 -	XEN_HYPER_THASH;;
   2.475 -	mov r17=r8;
   2.476 -	mov r8=r18;;
   2.477 +	mov r18=r8
   2.478 +	mov r8=r16
   2.479 +	XEN_HYPER_THASH
   2.480 +	;;
   2.481 +	mov r17=r8
   2.482 +	mov r8=r18
   2.483 +	;;
   2.484  #else
   2.485  	thash r17=r16				// compute virtual address of L3 PTE
   2.486  #endif
   2.487 @@ -909,11 +965,12 @@ 1:	ld8 r18=[r17]
   2.488  	;;					// avoid RAW on r18
   2.489  	mov ar.ccv=r18				// set compare value for cmpxchg
   2.490  	or r25=_PAGE_A,r18			// set the dirty bit
   2.491 +	tbit.z p7,p6 = r18,_PAGE_P_BIT		// Check present bit
   2.492  	;;
   2.493 -	cmpxchg8.acq r26=[r17],r25,ar.ccv
   2.494 +(p6)	cmpxchg8.acq r26=[r17],r25,ar.ccv	// Only if page is present
   2.495  	mov r24=PAGE_SHIFT<<2
   2.496  	;;
   2.497 -	cmp.eq p6,p7=r26,r18
   2.498 +(p6)	cmp.eq p6,p7=r26,r18			// Only if page is present
   2.499  	;;
   2.500  #ifdef CONFIG_XEN
   2.501  	mov r26=r8
   2.502 @@ -950,7 +1007,8 @@ 1:	ld8 r18=[r17]
   2.503  	mov b0=r29				// restore b0
   2.504  	mov pr=r31,-1
   2.505  #ifdef CONFIG_XEN
   2.506 -	XEN_HYPER_RFI;
   2.507 +	XEN_HYPER_RFI
   2.508 +	dv_serialize_data
   2.509  #else
   2.510  	rfi
   2.511  #endif
   2.512 @@ -976,143 +1034,157 @@ ENTRY(break_fault)
   2.513  	 * to prevent leaking bits from kernel to user level.
   2.514  	 */
   2.515  	DBG_FAULT(11)
   2.516 -	mov r16=IA64_KR(CURRENT)		// r16 = current task; 12 cycle read lat.
   2.517 +	mov.m r16=IA64_KR(CURRENT)		// M2 r16 <- current task (12 cyc)
   2.518  #ifdef CONFIG_XEN
   2.519 -	movl r31=XSI_IPSR
   2.520 -	;;
   2.521 -	ld8 r29=[r31],XSI_IIP-XSI_IPSR		// get ipsr, point to iip
   2.522 -	mov r18=__IA64_BREAK_SYSCALL
   2.523 -	mov r21=ar.fpsr
   2.524 +	movl r22=XSI_IPSR
   2.525  	;;
   2.526 -	ld8 r28=[r31],XSI_IIM-XSI_IIP		// get iip, point to iim
   2.527 -	mov r19=b6
   2.528 -	mov r25=ar.unat
   2.529 -	;;
   2.530 -	ld8 r17=[r31]				// get iim
   2.531 -	mov r27=ar.rsc
   2.532 -	mov r26=ar.pfs
   2.533 -	;;
   2.534 +	ld8 r29=[r22],XSI_IIM-XSI_IPSR		// get ipsr, point to iip
   2.535  #else
   2.536 -	mov r17=cr.iim
   2.537 -	mov r18=__IA64_BREAK_SYSCALL
   2.538 -	mov r21=ar.fpsr
   2.539 -	mov r29=cr.ipsr
   2.540 -	mov r19=b6
   2.541 -	mov r25=ar.unat
   2.542 -	mov r27=ar.rsc
   2.543 -	mov r26=ar.pfs
   2.544 -	mov r28=cr.iip
   2.545 +	mov r29=cr.ipsr				// M2 (12 cyc)
   2.546  #endif
   2.547 -	mov r31=pr				// prepare to save predicates
   2.548 -	mov r20=r1
   2.549 -	;;
   2.550 -	adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16
   2.551 -	cmp.eq p0,p7=r18,r17			// is this a system call? (p7 <- false, if so)
   2.552 -(p7)	br.cond.spnt non_syscall
   2.553 +	mov r31=pr				// I0 (2 cyc)
   2.554 +
   2.555 +#ifdef CONFIG_XEN
   2.556  	;;
   2.557 -	ld1 r17=[r16]				// load current->thread.on_ustack flag
   2.558 -	st1 [r16]=r0				// clear current->thread.on_ustack flag
   2.559 -	add r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16	// set r1 for MINSTATE_START_SAVE_MIN_VIRT
   2.560 -	;;
   2.561 -	invala
   2.562 -
   2.563 -	/* adjust return address so we skip over the break instruction: */
   2.564 +	ld8 r17=[r22],XSI_IIP-XSI_IIM
   2.565 +#else
   2.566 +	mov r17=cr.iim				// M2 (2 cyc)
   2.567 +#endif
   2.568 +	mov.m r27=ar.rsc			// M2 (12 cyc)
   2.569 +	mov r18=__IA64_BREAK_SYSCALL		// A
   2.570  
   2.571 -	extr.u r8=r29,41,2			// extract ei field from cr.ipsr
   2.572 -	;;
   2.573 -	cmp.eq p6,p7=2,r8			// isr.ei==2?
   2.574 -	mov r2=r1				// setup r2 for ia64_syscall_setup
   2.575 +	mov.m ar.rsc=0				// M2
   2.576 +	mov.m r21=ar.fpsr			// M2 (12 cyc)
   2.577 +	mov r19=b6				// I0 (2 cyc)
   2.578  	;;
   2.579 -(p6)	mov r8=0				// clear ei to 0
   2.580 -(p6)	adds r28=16,r28				// switch cr.iip to next bundle cr.ipsr.ei wrapped
   2.581 -(p7)	adds r8=1,r8				// increment ei to next slot
   2.582 +	mov.m r23=ar.bspstore			// M2 (12 cyc)
   2.583 +	mov.m r24=ar.rnat			// M2 (5 cyc)
   2.584 +	mov.i r26=ar.pfs			// I0 (2 cyc)
   2.585 +
   2.586 +	invala					// M0|1
   2.587 +	nop.m 0					// M
   2.588 +	mov r20=r1				// A			save r1
   2.589 +
   2.590 +	nop.m 0
   2.591 +	movl r30=sys_call_table			// X
   2.592 +
   2.593 +#ifdef CONFIG_XEN
   2.594 +	ld8 r28=[r22]
   2.595 +#else
   2.596 +	mov r28=cr.iip				// M2 (2 cyc)
   2.597 +#endif
   2.598 +	cmp.eq p0,p7=r18,r17			// I0 is this a system call?
   2.599 +(p7)	br.cond.spnt non_syscall		// B  no ->
   2.600 +	//
   2.601 +	// From this point on, we are definitely on the syscall-path
   2.602 +	// and we can use (non-banked) scratch registers.
   2.603 +	//
   2.604 +///////////////////////////////////////////////////////////////////////
   2.605 +	mov r1=r16				// A    move task-pointer to "addl"-addressable reg
   2.606 +	mov r2=r16				// A    setup r2 for ia64_syscall_setup
   2.607 +	add r9=TI_FLAGS+IA64_TASK_SIZE,r16	// A	r9 = &current_thread_info()->flags
   2.608 +
   2.609 +	adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16
   2.610 +	adds r15=-1024,r15			// A    subtract 1024 from syscall number
   2.611 +	mov r3=NR_syscalls - 1
   2.612  	;;
   2.613 -	cmp.eq pKStk,pUStk=r0,r17		// are we in kernel mode already?
   2.614 -	dep r29=r8,r29,41,2			// insert new ei into cr.ipsr
   2.615 +	ld1.bias r17=[r16]			// M0|1 r17 = current->thread.on_ustack flag
   2.616 +	ld4 r9=[r9]				// M0|1 r9 = current_thread_info()->flags
   2.617 +	extr.u r8=r29,41,2			// I0   extract ei field from cr.ipsr
   2.618 +
   2.619 +	shladd r30=r15,3,r30			// A    r30 = sys_call_table + 8*(syscall-1024)
   2.620 +	addl r22=IA64_RBS_OFFSET,r1		// A    compute base of RBS
   2.621 +	cmp.leu p6,p7=r15,r3			// A    syscall number in range?
   2.622  	;;
   2.623  
   2.624 -	// switch from user to kernel RBS:
   2.625 -	MINSTATE_START_SAVE_MIN_VIRT
   2.626 -	br.call.sptk.many b7=ia64_syscall_setup
   2.627 +	lfetch.fault.excl.nt1 [r22]		// M0|1 prefetch RBS
   2.628 +(p6)	ld8 r30=[r30]				// M0|1 load address of syscall entry point
   2.629 +	tnat.nz.or p7,p0=r15			// I0	is syscall nr a NaT?
   2.630 +
   2.631 +	mov.m ar.bspstore=r22			// M2   switch to kernel RBS
   2.632 +	cmp.eq p8,p9=2,r8			// A    isr.ei==2?
   2.633  	;;
   2.634 +
   2.635 +(p8)	mov r8=0				// A    clear ei to 0
   2.636 +(p7)	movl r30=sys_ni_syscall			// X
   2.637 +
   2.638 +(p8)	adds r28=16,r28				// A    switch cr.iip to next bundle
   2.639 +(p9)	adds r8=1,r8				// A    increment ei to next slot
   2.640 +	nop.i 0
   2.641 +	;;
   2.642 +
   2.643 +	mov.m r25=ar.unat			// M2 (5 cyc)
   2.644 +	dep r29=r8,r29,41,2			// I0   insert new ei into cr.ipsr
   2.645 +	adds r15=1024,r15			// A    restore original syscall number
   2.646 +	//
   2.647 +	// If any of the above loads miss in L1D, we'll stall here until
   2.648 +	// the data arrives.
   2.649 +	//
   2.650 +///////////////////////////////////////////////////////////////////////
   2.651 +	st1 [r16]=r0				// M2|3 clear current->thread.on_ustack flag
   2.652 +	mov b6=r30				// I0   setup syscall handler branch reg early
   2.653 +	cmp.eq pKStk,pUStk=r0,r17		// A    were we on kernel stacks already?
   2.654 +
   2.655 +	and r9=_TIF_SYSCALL_TRACEAUDIT,r9	// A    mask trace or audit
   2.656 +	mov r18=ar.bsp				// M2 (12 cyc)
   2.657 +(pKStk)	br.cond.spnt .break_fixup		// B	we're already in kernel-mode -- fix up RBS
   2.658 +	;;
   2.659 +.back_from_break_fixup:
   2.660 +(pUStk)	addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1 // A    compute base of memory stack
   2.661 +	cmp.eq p14,p0=r9,r0			// A    are syscalls being traced/audited?
   2.662 +	br.call.sptk.many b7=ia64_syscall_setup	// B
   2.663 +1:
   2.664 +	mov ar.rsc=0x3				// M2   set eager mode, pl 0, LE, loadrs=0
   2.665 +	nop 0
   2.666  #ifdef CONFIG_XEN
   2.667  	mov r2=b0; br.call.sptk b0=xen_bsw1;; mov b0=r2;;
   2.668  #else
   2.669 -	MINSTATE_END_SAVE_MIN_VIRT		// switch to bank 1
   2.670 +	bsw.1					// B (6 cyc) regs are saved, switch to bank 1
   2.671  #endif
   2.672 -#ifdef CONFIG_XEN
   2.673 -	movl r3=XSI_PSR_IC
   2.674 -	mov r16=1
   2.675  	;;
   2.676 -#if 1
   2.677 -	st4 [r3]=r16,XSI_PSR_I_ADDR-XSI_PSR_IC	// vpsr.ic = 1
   2.678 -	;;
   2.679 -(p15)	ld8 r3=[r3]
   2.680 +
   2.681 +#ifdef CONFIG_XEN
   2.682 +	movl r16=XSI_PSR_IC
   2.683 +	mov r3=1
   2.684  	;;
   2.685 -(p15)	st1 [r3]=r0,XSI_PEND-XSI_PSR_I_ADDR	// if (p15) vpsr.i = 1
   2.686 -	mov r16=r0
   2.687 +	st4 [r16]=r3,XSI_PSR_I_ADDR-XSI_PSR_IC	// vpsr.ic = 1
   2.688 +#else
   2.689 +	ssm psr.ic | PSR_DEFAULT_BITS		// M2	now it's safe to re-enable intr.-collection
   2.690 +#endif
   2.691 +	movl r3=ia64_ret_from_syscall		// X
   2.692  	;;
   2.693 -(p15)	ld4 r16=[r3]				// if (pending_interrupts)
   2.694 +
   2.695 +	srlz.i					// M0   ensure interruption collection is on
   2.696 +	mov rp=r3				// I0   set the real return addr
   2.697 +(p10)	br.cond.spnt.many ia64_ret_from_syscall	// B    return if bad call-frame or r15 is a NaT
   2.698 +
   2.699 +#ifdef CONFIG_XEN
   2.700 +(p15)	ld8 r16=[r16]				// vpsr.i
   2.701  	;;
   2.702 -	cmp.ne	p6,p0=r16,r0
   2.703 +(p15)	st1 [r16]=r0,XSI_PEND-XSI_PSR_I_ADDR	// if (p15) vpsr.i = 1
   2.704 +	mov r2=r0
   2.705 +	;;
   2.706 +(p15)	ld4 r2=[r16]				// if (pending_interrupts)
   2.707 +	;;
   2.708 +	cmp.ne	p6,p0=r2,r0
   2.709  	;;
   2.710  (p6)	ssm	psr.i				//   do a real ssm psr.i
   2.711 -	;;
   2.712  #else
   2.713 -//	st4 [r3]=r16,XSI_PSR_I_ADDR-XSI_PSR_IC	// vpsr.ic = 1
   2.714 -	adds r3=XSI_PSR_I_ADDR-XSI_PSR_IC,r3	// SKIP vpsr.ic = 1
   2.715 -	;;
   2.716 -(p15)	ld8 r3=[r3]
   2.717 -	;;
   2.718 -(p15)	st1 [r3]=r0,XSI_PEND-XSI_PSR_I_ADDR	// if (p15) vpsr.i = 1
   2.719 -	mov r16=r0
   2.720 -	;;
   2.721 -(p15)	ld4 r16=[r3]				// if (pending_interrupts)
   2.722 -	;;
   2.723 -	cmp.ne	p6,p0=r16,r0
   2.724 -	;;
   2.725 -//(p6)	ssm	psr.i				//   do a real ssm psr.i
   2.726 -//(p6)	XEN_HYPER_SSM_I;
   2.727 -(p6)	break 0x7;
   2.728 -	;;
   2.729 +(p15)	ssm psr.i				// M2   restore psr.i
   2.730  #endif
   2.731 -	mov r3=NR_syscalls - 1
   2.732 -	;;
   2.733 -#else
   2.734 -	ssm psr.ic | PSR_DEFAULT_BITS
   2.735 -	;;
   2.736 -	srlz.i					// guarantee that interruption collection is on
   2.737 -	mov r3=NR_syscalls - 1
   2.738 +(p14)	br.call.sptk.many b6=b6			// B    invoke syscall-handker (ignore return addr)
   2.739 +	br.cond.spnt.many ia64_trace_syscall	// B	do syscall-tracing thingamagic
   2.740 +	// NOT REACHED
   2.741 +///////////////////////////////////////////////////////////////////////
   2.742 +	// On entry, we optimistically assumed that we're coming from user-space.
   2.743 +	// For the rare cases where a system-call is done from within the kernel,
   2.744 +	// we fix things up at this point:
   2.745 +.break_fixup:
   2.746 +	add r1=-IA64_PT_REGS_SIZE,sp		// A    allocate space for pt_regs structure
   2.747 +	mov ar.rnat=r24				// M2	restore kernel's AR.RNAT
   2.748  	;;
   2.749 -(p15)	ssm psr.i				// restore psr.i
   2.750 -#endif
   2.751 -	// p10==true means out registers are more than 8 or r15's Nat is true
   2.752 -(p10)	br.cond.spnt.many ia64_ret_from_syscall
   2.753 -	;;
   2.754 -	movl r16=sys_call_table
   2.755 -
   2.756 -	adds r15=-1024,r15			// r15 contains the syscall number---subtract 1024
   2.757 -	movl r2=ia64_ret_from_syscall
   2.758 -	;;
   2.759 -	shladd r20=r15,3,r16			// r20 = sys_call_table + 8*(syscall-1024)
   2.760 -	cmp.leu p6,p7=r15,r3			// (syscall > 0 && syscall < 1024 + NR_syscalls) ?
   2.761 -	mov rp=r2				// set the real return addr
   2.762 -	;;
   2.763 -(p6)	ld8 r20=[r20]				// load address of syscall entry point
   2.764 -(p7)	movl r20=sys_ni_syscall
   2.765 -
   2.766 -	add r2=TI_FLAGS+IA64_TASK_SIZE,r13
   2.767 -	;;
   2.768 -	ld4 r2=[r2]				// r2 = current_thread_info()->flags
   2.769 -	;;
   2.770 -	and r2=_TIF_SYSCALL_TRACEAUDIT,r2	// mask trace or audit
   2.771 -	;;
   2.772 -	cmp.eq p8,p0=r2,r0
   2.773 -	mov b6=r20
   2.774 -	;;
   2.775 -(p8)	br.call.sptk.many b6=b6			// ignore this return addr
   2.776 -	br.cond.sptk ia64_trace_syscall
   2.777 -	// NOT REACHED
   2.778 +	mov ar.bspstore=r23			// M2	restore kernel's AR.BSPSTORE
   2.779 +	br.cond.sptk .back_from_break_fixup
   2.780  END(break_fault)
   2.781  
   2.782  	.org ia64_ivt+0x3000
   2.783 @@ -1201,8 +1273,6 @@ END(interrupt)
   2.784  	 *	- r31: saved pr
   2.785  	 *	-  b0: original contents (to be saved)
   2.786  	 * On exit:
   2.787 -	 *	- executing on bank 1 registers
   2.788 -	 *	- psr.ic enabled, interrupts restored
   2.789  	 *	-  p10: TRUE if syscall is invoked with more than 8 out
   2.790  	 *		registers or r15's Nat is true
   2.791  	 *	-  r1: kernel's gp
   2.792 @@ -1210,8 +1280,11 @@ END(interrupt)
   2.793  	 *	-  r8: -EINVAL if p10 is true
   2.794  	 *	- r12: points to kernel stack
   2.795  	 *	- r13: points to current task
   2.796 +	 *	- r14: preserved (same as on entry)
   2.797 +	 *	- p13: preserved
   2.798  	 *	- p15: TRUE if interrupts need to be re-enabled
   2.799  	 *	- ar.fpsr: set to kernel settings
   2.800 +	 *	-  b6: preserved (same as on entry)
   2.801  	 */
   2.802  #ifndef CONFIG_XEN
   2.803  GLOBAL_ENTRY(ia64_syscall_setup)
   2.804 @@ -1280,10 +1353,10 @@ GLOBAL_ENTRY(ia64_syscall_setup)
   2.805  (p13)	mov in5=-1
   2.806  	;;
   2.807  	st8 [r16]=r21,PT(R8)-PT(AR_FPSR)	// save ar.fpsr
   2.808 -	tnat.nz p14,p0=in6
   2.809 +	tnat.nz p13,p0=in6
   2.810  	cmp.lt p10,p9=r11,r8	// frame size can't be more than local+8
   2.811  	;;
   2.812 -	stf8 [r16]=f1		// ensure pt_regs.r8 != 0 (see handle_syscall_error)
   2.813 +	mov r8=1
   2.814  (p9)	tnat.nz p10,p0=r15
   2.815  	adds r12=-16,r1		// switch to kernel memory stack (with 16 bytes of scratch)
   2.816  
   2.817 @@ -1294,9 +1367,9 @@ GLOBAL_ENTRY(ia64_syscall_setup)
   2.818  	mov r13=r2				// establish `current'
   2.819  	movl r1=__gp				// establish kernel global pointer
   2.820  	;;
   2.821 -(p14)	mov in6=-1
   2.822 +	st8 [r16]=r8		// ensure pt_regs.r8 != 0 (see handle_syscall_error)
   2.823 +(p13)	mov in6=-1
   2.824  (p8)	mov in7=-1
   2.825 -	nop.i 0
   2.826  
   2.827  	cmp.eq pSys,pNonSys=r0,r0		// set pSys=1, pNonSys=0
   2.828  	movl r17=FPSR_DEFAULT
   2.829 @@ -1323,6 +1396,8 @@ END(ia64_syscall_setup)
   2.830  	 * element, followed by the arguments.
   2.831  	 */
   2.832  ENTRY(dispatch_illegal_op_fault)
   2.833 +	.prologue
   2.834 +	.body
   2.835  	SAVE_MIN_WITH_COVER
   2.836  	ssm psr.ic | PSR_DEFAULT_BITS
   2.837  	;;
   2.838 @@ -1335,6 +1410,7 @@ ENTRY(dispatch_illegal_op_fault)
   2.839  	mov out0=ar.ec
   2.840  	;;
   2.841  	SAVE_REST
   2.842 +	PT_REGS_UNWIND_INFO(0)
   2.843  	;;
   2.844  	br.call.sptk.many rp=ia64_illegal_op_fault
   2.845  .ret0:	;;
   2.846 @@ -1365,6 +1441,8 @@ END(dispatch_illegal_op_fault)
   2.847  	FAULT(17)
   2.848  
   2.849  ENTRY(non_syscall)
   2.850 +	mov ar.rsc=r27			// restore ar.rsc before SAVE_MIN_WITH_COVER
   2.851 +	;;
   2.852  	SAVE_MIN_WITH_COVER
   2.853  
   2.854  	// There is no particular reason for this code to be here, other than that
   2.855 @@ -1540,7 +1618,7 @@ ENTRY(daccess_rights)
   2.856  	;;
   2.857  	ld8 r16=[r16]
   2.858  	;;
   2.859 -	XEN_HYPER_RSM_PSR_DT;
   2.860 +	XEN_HYPER_RSM_PSR_DT
   2.861  #else
   2.862  	mov r16=cr.ifa
   2.863  	rsm psr.dt
   2.864 @@ -1584,6 +1662,25 @@ END(disabled_fp_reg)
   2.865  // 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50)
   2.866  ENTRY(nat_consumption)
   2.867  	DBG_FAULT(26)
   2.868 +
   2.869 +	mov r16=cr.ipsr
   2.870 +	mov r17=cr.isr
   2.871 +	mov r31=pr				// save PR
   2.872 +	;;
   2.873 +	and r18=0xf,r17				// r18 = cr.ipsr.code{3:0}
   2.874 +	tbit.z p6,p0=r17,IA64_ISR_NA_BIT
   2.875 +	;;
   2.876 +	cmp.ne.or p6,p0=IA64_ISR_CODE_LFETCH,r18
   2.877 +	dep r16=-1,r16,IA64_PSR_ED_BIT,1
   2.878 +(p6)	br.cond.spnt 1f		// branch if (cr.ispr.na == 0 || cr.ipsr.code{3:0} != LFETCH)
   2.879 +	;;
   2.880 +	mov cr.ipsr=r16		// set cr.ipsr.na
   2.881 +	mov pr=r31,-1
   2.882 +	;;
   2.883 +	rfi
   2.884 +
   2.885 +1:	mov pr=r31,-1
   2.886 +	;;
   2.887  	FAULT(26)
   2.888  END(nat_consumption)
   2.889  
   2.890 @@ -1624,7 +1721,7 @@ ENTRY(speculation_vector)
   2.891  #ifdef CONFIG_XEN
   2.892  	XEN_HYPER_RFI;
   2.893  #else
   2.894 -	rfi
   2.895 +	rfi				// and go back
   2.896  #endif
   2.897  END(speculation_vector)
   2.898  
   2.899 @@ -1647,7 +1744,6 @@ END(debug_vector)
   2.900  // 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57)
   2.901  ENTRY(unaligned_access)
   2.902  	DBG_FAULT(30)
   2.903 -	mov r16=cr.ipsr
   2.904  	mov r31=pr		// prepare to save predicates
   2.905  	;;
   2.906  	br.sptk.many dispatch_unaligned_handler