ia64/xen-unstable

changeset 16024:d6c09be8c5f5

[IA64] micro-optimize and comments in vmx_ivt.S

Comments added + micro-optimizations.
In PHY_D mode, alt tlb misses can call vmx_hpw_miss.

Signed-off-by: Tristan Gingold <tgingold@free.fr>
author Alex Williamson <alex.williamson@hp.com>
date Tue Oct 02 10:07:35 2007 -0600 (2007-10-02)
parents 0040e5afdb00
children 3165e43ce734
files xen/arch/ia64/vmx/vmx_ivt.S
line diff
     1.1 --- a/xen/arch/ia64/vmx/vmx_ivt.S	Tue Oct 02 10:04:56 2007 -0600
     1.2 +++ b/xen/arch/ia64/vmx/vmx_ivt.S	Tue Oct 02 10:07:35 2007 -0600
     1.3 @@ -59,6 +59,7 @@
     1.4  #include <asm/unistd.h>
     1.5  #include <asm/vhpt.h>
     1.6  #include <asm/virt_event.h>
     1.7 +#include <asm/vmx_phy_mode.h>
     1.8  #include <xen/errno.h>
     1.9  
    1.10  #if 1
    1.11 @@ -103,7 +104,7 @@
    1.12  
    1.13  #define VMX_FAULT(n)    \
    1.14  vmx_fault_##n:;          \
    1.15 -    mov r19=n;;          \
    1.16 +    mov r19=n;           \
    1.17      br.sptk.many dispatch_to_fault_handler;         \
    1.18      ;;                  \
    1.19  
    1.20 @@ -115,7 +116,7 @@ vmx_fault_##n:;          \
    1.21      ;;      \
    1.22      tbit.z p6,p7=r29,IA64_PSR_VM_BIT;       \
    1.23  (p7)br.sptk.many vmx_dispatch_reflection;        \
    1.24 -    br.sptk.many dispatch_to_fault_handler;      \
    1.25 +    br.sptk.many dispatch_to_fault_handler
    1.26  
    1.27  
    1.28  GLOBAL_ENTRY(vmx_panic)
    1.29 @@ -144,12 +145,11 @@ END(vmx_vhpt_miss)
    1.30  // 0x0400 Entry 1 (size 64 bundles) ITLB (21)
    1.31  ENTRY(vmx_itlb_miss)
    1.32      VMX_DBG_FAULT(1)
    1.33 +    mov r29=cr.ipsr
    1.34      mov r31 = pr
    1.35 -    mov r29=cr.ipsr;
    1.36      ;;
    1.37 -    tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
    1.38 -(p6) br.sptk vmx_alt_itlb_miss_1
    1.39 -//(p6) br.sptk vmx_fault_1
    1.40 +    tbit.z p6,p7=r29,IA64_PSR_VM_BIT
    1.41 +(p6) br.sptk vmx_alt_itlb_miss_vmm
    1.42      mov r16 = cr.ifa
    1.43      ;;
    1.44      thash r17 = r16
    1.45 @@ -159,47 +159,52 @@ ENTRY(vmx_itlb_miss)
    1.46      adds r28 = VLE_TITAG_OFFSET,r17
    1.47      adds r19 = VLE_CCHAIN_OFFSET, r17
    1.48      ;;
    1.49 -    ld8 r17 = [r19]
    1.50 +    ld8 r17 = [r19]	// Read chain
    1.51      ;;
    1.52  vmx_itlb_loop:
    1.53 -    cmp.eq p6,p0 = r0, r17
    1.54 +    cmp.eq p6,p0 = r0, r17 // End of chain ?
    1.55  (p6)br vmx_itlb_out
    1.56      ;;
    1.57      adds r16 = VLE_TITAG_OFFSET, r17
    1.58      adds r19 = VLE_CCHAIN_OFFSET, r17
    1.59      ;;
    1.60 -    ld8 r24 = [r16]
    1.61 -    ld8 r23 = [r19]
    1.62 +    ld8 r24 = [r16] // Read tag
    1.63 +    ld8 r23 = [r19] // Read chain
    1.64      ;;
    1.65      lfetch [r23]
    1.66 -    cmp.eq  p6,p7 = r20, r24
    1.67 +    cmp.eq  p6,p7 = r20, r24 // does tag match ?
    1.68      ;;
    1.69 -(p7)mov r17 = r23;
    1.70 -(p7)br.sptk vmx_itlb_loop
    1.71 +(p7)mov r17 = r23; // No: entry = chain
    1.72 +(p7)br.sptk vmx_itlb_loop // again
    1.73      ;;
    1.74 -    ld8 r25 = [r17]
    1.75 -    ld8 r27 = [r18]
    1.76 -    ld8 r29 = [r28]
    1.77 -    dep r22 = -1,r24,63,1    //set ti=1
    1.78 +    // Swap the first entry with the entry found in the collision chain
    1.79 +    // to speed up next hardware search (and keep LRU).
    1.80 +    // In comments 1 stands for the first entry and 2 for the found entry.
    1.81 +    ld8 r25 = [r17] // Read value of 2
    1.82 +    ld8 r27 = [r18] // Read value of 1
    1.83 +    ld8 r29 = [r28] // Read tag of 1
    1.84 +    dep r22 = -1,r24,63,1    // set ti=1 of 2 (to disable it during the swap)
    1.85      ;;
    1.86 -    st8 [r16] = r29, VLE_ITIR_OFFSET - VLE_TITAG_OFFSET
    1.87 -    st8 [r28] = r22, VLE_ITIR_OFFSET - VLE_TITAG_OFFSET
    1.88 -    extr.u r19 = r27, 56, 4
    1.89 +    st8 [r16] = r29, VLE_ITIR_OFFSET - VLE_TITAG_OFFSET // Write tag of 2
    1.90 +    st8 [r28] = r22, VLE_ITIR_OFFSET - VLE_TITAG_OFFSET // Write tag of 1
    1.91 +    extr.u r19 = r27, 56, 4 // Extract collision chain length
    1.92      mf
    1.93      ;;
    1.94 -    ld8 r29 = [r16]
    1.95 -    ld8 r22 = [r28]
    1.96 -    dep r27 = r0, r27, 56, 4
    1.97 -    dep r25 = r19, r25, 56, 4
    1.98 +    ld8 r29 = [r16] // read itir of 2
    1.99 +    ld8 r22 = [r28] // read itir of 1
   1.100 +    dep r27 = r0, r27, 56, 4 // Clear collision chain length for 2
   1.101 +    dep r25 = r19, r25, 56, 4 // Write collision chain length for 1
   1.102      ;;
   1.103 -    st8 [r16] = r22
   1.104 -    st8 [r28] = r29, VLE_TITAG_OFFSET - VLE_ITIR_OFFSET
   1.105 -    st8 [r18] = r25
   1.106 -    st8 [r17] = r27
   1.107 +    st8 [r16] = r22 // Write itir of 2
   1.108 +    st8 [r28] = r29, VLE_TITAG_OFFSET - VLE_ITIR_OFFSET // write itir of 1
   1.109 +    st8 [r18] = r25 // Write value of 1
   1.110 +    st8 [r17] = r27 // Write value of 2
   1.111      ;;
   1.112 -    st8.rel [r28] = r24
   1.113 +    st8.rel [r28] = r24 // Write tag of 1 (with ti=0)
   1.114 +    // Insert the translation entry
   1.115      itc.i r25
   1.116      dv_serialize_data
   1.117 +    // Resume
   1.118      mov r17=cr.isr
   1.119      mov r23=r31
   1.120      mov r22=b0
   1.121 @@ -226,11 +231,11 @@ END(vmx_itlb_miss)
   1.122  // 0x0800 Entry 2 (size 64 bundles) DTLB (9,48)
   1.123  ENTRY(vmx_dtlb_miss)
   1.124      VMX_DBG_FAULT(2)
   1.125 +    mov r29=cr.ipsr	
   1.126      mov r31 = pr
   1.127 -    mov r29=cr.ipsr;
   1.128      ;;
   1.129 -    tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
   1.130 -(p6)br.sptk vmx_alt_dtlb_miss_1
   1.131 +    tbit.z p6,p7=r29,IA64_PSR_VM_BIT
   1.132 +(p6)br.sptk vmx_alt_dtlb_miss_vmm
   1.133      mov r16 = cr.ifa
   1.134      ;;
   1.135      thash r17 = r16
   1.136 @@ -307,14 +312,14 @@ END(vmx_dtlb_miss)
   1.137  // 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19)
   1.138  ENTRY(vmx_alt_itlb_miss)
   1.139      VMX_DBG_FAULT(3)
   1.140 +    mov r29=cr.ipsr
   1.141      mov r31 = pr
   1.142 -    mov r29=cr.ipsr
   1.143 +    adds r22=IA64_VCPU_MMU_MODE_OFFSET, r21
   1.144      ;;
   1.145 -    tbit.z p6,p7=r29,IA64_PSR_VM_BIT
   1.146 -(p7)br.spnt vmx_fault_3
   1.147 -vmx_alt_itlb_miss_1:
   1.148 +    tbit.nz p7,p0=r29,IA64_PSR_VM_BIT
   1.149 +(p7)br.spnt vmx_alt_itlb_miss_dom
   1.150 +vmx_alt_itlb_miss_vmm:
   1.151      mov r16=cr.ifa    // get address that caused the TLB miss
   1.152 -    ;;
   1.153      movl r17=PAGE_KERNEL
   1.154      mov r24=cr.ipsr
   1.155      movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
   1.156 @@ -331,6 +336,15 @@ vmx_alt_itlb_miss_1:
   1.157      itc.i r19		// insert the TLB entry
   1.158      mov pr=r31,-1
   1.159      rfi
   1.160 +    ;;
   1.161 +vmx_alt_itlb_miss_dom:
   1.162 +    ld1 r23=[r22]  // Load mmu_mode
   1.163 +    ;;
   1.164 +    cmp.eq p6,p7=VMX_MMU_PHY_D,r23
   1.165 +(p7)br.sptk vmx_fault_3
   1.166 +    ;;
   1.167 +    mov r19=3
   1.168 +    br.sptk vmx_dispatch_tlb_miss
   1.169      VMX_FAULT(3);
   1.170  END(vmx_alt_itlb_miss)
   1.171  
   1.172 @@ -340,12 +354,13 @@ END(vmx_alt_itlb_miss)
   1.173  // 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46)
   1.174  ENTRY(vmx_alt_dtlb_miss)
   1.175      VMX_DBG_FAULT(4)
   1.176 +    mov r29=cr.ipsr
   1.177      mov r31=pr
   1.178 -    mov r29=cr.ipsr;
   1.179 +    adds r22=IA64_VCPU_MMU_MODE_OFFSET, r21
   1.180      ;;
   1.181 -    tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
   1.182 -(p7)br.spnt vmx_fault_4
   1.183 -vmx_alt_dtlb_miss_1:
   1.184 +    tbit.nz p7,p0=r29,IA64_PSR_VM_BIT
   1.185 +(p7)br.spnt vmx_alt_dtlb_miss_dom
   1.186 +vmx_alt_dtlb_miss_vmm:
   1.187      mov r16=cr.ifa		// get address that caused the TLB miss
   1.188      ;;
   1.189  #ifdef CONFIG_VIRTUAL_FRAME_TABLE
   1.190 @@ -377,6 +392,15 @@ vmx_alt_dtlb_miss_1:
   1.191  (p7)itc.d r19		// insert the TLB entry
   1.192      mov pr=r31,-1
   1.193      rfi
   1.194 +    ;;
   1.195 +vmx_alt_dtlb_miss_dom:
   1.196 +    ld1 r23=[r22]  // Load mmu_mode
   1.197 +    ;;
   1.198 +    cmp.eq p6,p7=VMX_MMU_PHY_D,r23
   1.199 +(p7)br.sptk vmx_fault_4
   1.200 +    ;;
   1.201 +    mov r19=4
   1.202 +    br.sptk vmx_dispatch_tlb_miss
   1.203      VMX_FAULT(4);
   1.204  END(vmx_alt_dtlb_miss)
   1.205