ia64/xen-unstable

changeset 17607:ef290f39ae6b

[IA64] Build new infrastructure for fast fault handling path.

1. use jump table to dispatch virtualization faults.
2. for virtualization faults, handler is executed with psr.i=0, psr.ic=0,
psr.bn=0. less context switch.
3. use register stack instead of memory stack to switch context.
4. Use C code to handle faults as possible, to reduce maintanance efforts,
remove assembly handlers for rsm , ssm, mov to psr, mov to rr.
5. add fast path C handler for rsm, ssm, mov to psr, rfi.

Signed-off-by: Anthony Xu <anthony.xu@intel.com>
author Isaku Yamahata <yamahata@valinux.co.jp>
date Thu May 15 14:53:48 2008 +0900 (2008-05-15)
parents b03e24f9c1d8
children c96507e0c83d
files xen/arch/ia64/asm-offsets.c xen/arch/ia64/vmx/optvfault.S xen/arch/ia64/vmx/vmx_ivt.S xen/arch/ia64/vmx/vmx_phy_mode.c xen/arch/ia64/vmx/vmx_vcpu.c xen/include/asm-ia64/vmx_phy_mode.h xen/include/asm-ia64/vmx_vcpu.h
line diff
     1.1 --- a/xen/arch/ia64/asm-offsets.c	Thu May 15 14:18:38 2008 +0900
     1.2 +++ b/xen/arch/ia64/asm-offsets.c	Thu May 15 14:53:48 2008 +0900
     1.3 @@ -204,6 +204,11 @@ void foo(void)
     1.4  
     1.5  	DEFINE(IA64_VPD_BASE_OFFSET, offsetof (struct vcpu, arch.privregs));
     1.6  	DEFINE(IA64_VPD_VIFS_OFFSET, offsetof (mapped_regs_t, ifs));
     1.7 +	DEFINE(IA64_VPD_VHPI_OFFSET, offsetof (mapped_regs_t, vhpi));
     1.8 +	DEFINE(IA64_VPD_VB1REG_OFFSET, offsetof (mapped_regs_t, bank1_regs[0]));
     1.9 +	DEFINE(IA64_VPD_VB0REG_OFFSET, offsetof (mapped_regs_t, bank0_regs[0]));
    1.10 +	DEFINE(IA64_VPD_VB1NAT_OFFSET, offsetof (mapped_regs_t, vnat));
    1.11 +	DEFINE(IA64_VPD_VB0NAT_OFFSET, offsetof (mapped_regs_t, vbnat));
    1.12   	DEFINE(IA64_VLSAPIC_INSVC_BASE_OFFSET, offsetof (struct vcpu, arch.insvc[0]));
    1.13  	DEFINE(IA64_VPD_VPTA_OFFSET, offsetof (struct mapped_regs, pta));
    1.14  	DEFINE(XXX_THASH_SIZE, sizeof (thash_data_t));
     2.1 --- a/xen/arch/ia64/vmx/optvfault.S	Thu May 15 14:18:38 2008 +0900
     2.2 +++ b/xen/arch/ia64/vmx/optvfault.S	Thu May 15 14:53:48 2008 +0900
     2.3 @@ -3,10 +3,10 @@
     2.4   * optimize virtualization fault handler
     2.5   *
     2.6   * Copyright (C) 2006 Intel Co
     2.7 - *	Xuefei Xu (Anthony Xu) <anthony.xu@intel.com>
     2.8 + * Xuefei Xu (Anthony Xu) <anthony.xu@intel.com>
     2.9   */
    2.10  
    2.11 -#include <linux/config.h>	
    2.12 +#include <linux/config.h>
    2.13  #include <asm/config.h>
    2.14  #include <asm/pgtable.h>
    2.15  #include <asm/asmmacro.h>
    2.16 @@ -20,26 +20,230 @@
    2.17  #include <asm/virt_event.h>
    2.18  #include <asm-ia64/vmx_mm_def.h>
    2.19  #include <asm-ia64/vmx_phy_mode.h>
    2.20 +#include "entry.h"
    2.21  
    2.22 -#define ACCE_MOV_FROM_AR
    2.23 -#define ACCE_MOV_FROM_RR
    2.24 -#define ACCE_MOV_TO_RR
    2.25 -#define ACCE_RSM
    2.26 -#define ACCE_SSM
    2.27 -#define ACCE_MOV_TO_PSR
    2.28 -#define ACCE_THASH
    2.29 +// r21 : current
    2.30 +// r23 : b0
    2.31 +// r31 : pr
    2.32 +
    2.33 +#define VMX_VIRT_SAVE                                                       \
    2.34 +    mov r27=ar.rsc;     /* M */                                             \
    2.35 +    ;;                                                                      \
    2.36 +    cover;              /* B;; (or nothing) */                              \
    2.37 +    ;;                                                                      \
    2.38 +    /* switch from user to kernel RBS: */                                   \
    2.39 +    invala;             /* M */                                             \
    2.40 +    ;;                                                                      \
    2.41 +    mov ar.rsc=0;       /* set enforced lazy mode  */                       \
    2.42 +    ;;                                                                      \
    2.43 +    mov.m r26=ar.rnat;                                                      \
    2.44 +    movl r28=IA64_RBS_OFFSET;        /* compute base of RBS */              \
    2.45 +    ;;                                                                      \
    2.46 +    mov r22=ar.bspstore;             /* save ar.bspstore */                 \
    2.47 +    add r28=r28,r21;                                                        \
    2.48 +    ;;                                                                      \
    2.49 +    mov ar.bspstore=r28;    /* switch to kernel RBS */                      \
    2.50 +    ;;                                                                      \
    2.51 +    mov r18=ar.bsp;                                                         \
    2.52 +    mov ar.rsc=0x3;         /* set eager mode */                            \
    2.53 +    ;;                                                                      \
    2.54 +    alloc r32=ar.pfs,24,0,3,0    /* save pfs */                             \
    2.55 +    ;;                                                                      \
    2.56 +    sub r18=r18,r28;    /* r18=RSE.ndirty*8 */                              \
    2.57 +    ;;                                                                      \
    2.58 +    shl r33=r18,16;     /* save loadrs */                                   \
    2.59 +    mov r35=b6;         /* save b6 */                                       \
    2.60 +    mov r36=b7;         /* save b7 */                                       \
    2.61 +    mov r37=ar.csd;     /* save ar.csd */                                   \
    2.62 +    mov r38=ar.ssd;     /* save ar.ssd */                                   \
    2.63 +    mov r39=r8;         /* save r8 */                               \
    2.64 +    mov r40=r9;         /* save r9 */                               \
    2.65 +    mov r41=r10;        /* save r10 */                              \
    2.66 +    mov r42=r11;        /* save r11 */                              \
    2.67 +    mov r43=r27;        /* save ar.rsc */                           \
    2.68 +    mov r44=r26;        /* save ar.rnat */                          \
    2.69 +    mov r45=r22;        /* save ar.bspstore */                      \
    2.70 +    mov r46=r31;        /* save pr */                               \
    2.71 +    mov r47=r23;        /* save b0 */                               \
    2.72 +    mov r48=r1;         /* save r1 */                               \
    2.73 +    mov r49=r12;        /* save r12 */                              \
    2.74 +    mov r50=r13;        /* save r13 */                              \
    2.75 +    mov r51=r15;        /* save r15 */                              \
    2.76 +    mov r52=r14;        /* save r14 */                              \
    2.77 +    mov r53=r2;         /* save r2 */                               \
    2.78 +    mov r54=r3;         /* save r3 */                               \
    2.79 +    mov r34=ar.ccv;     /* save ar.ccv */                           \
    2.80 +    ;;                                                              \
    2.81 +    movl r1=__gp;                                                   \
    2.82 +    movl r29=IA64_STK_OFFSET-IA64_PT_REGS_SIZE-16;                  \
    2.83 +    ;;                                                              \
    2.84 +    add r12=r29,r21;   /* compute base of memory stack */           \
    2.85 +    mov r13=r21;                                                    \
    2.86 +    ;;                                                              \
    2.87 +{ .mii;       /* call vps sync read */                              \
    2.88 +    add r25=IA64_VPD_BASE_OFFSET, r21;                              \
    2.89 +    nop 0x0;                                                        \
    2.90 +    mov r24=ip;                                                     \
    2.91 +    ;;                                                              \
    2.92 +};                                                                  \
    2.93 +{ .mmb;                                                             \
    2.94 +    add r24 = 0x20, r24;                                            \
    2.95 +    ld8 r25=[r25];          /* read vpd base */                     \
    2.96 +    br.cond.sptk vmx_vps_sync_read;   /*  call the service */       \
    2.97 +    ;;                                                              \
    2.98 +};
    2.99 +
   2.100 +
   2.101 +ENTRY(ia64_leave_hypervisor_virt)
   2.102 +    invala              /* M */
   2.103 +    ;;
   2.104 +    mov r21=r13         /* get current */
   2.105 +    mov b6=r35          /* restore b6 */
   2.106 +    mov b7=r36          /* restore b7 */
   2.107 +    mov ar.csd=r37      /* restore ar.csd */
   2.108 +    mov ar.ssd=r38      /* restore ar.ssd */
   2.109 +    mov r8=r39          /* restore r8 */
   2.110 +    mov r9=r40          /* restore r9 */
   2.111 +    mov r10=r41         /* restore r10 */
   2.112 +    mov r11=r42         /* restore r11 */
   2.113 +    mov ar.pfs=r32      /* restore ar.pfs */
   2.114 +    mov r27=r43         /* restore ar.rsc */
   2.115 +    mov r26=r44         /* restore ar.rnat */
   2.116 +    mov r25=r45         /* restore ar.bspstore */
   2.117 +    mov r23=r46         /* restore predicates */
   2.118 +    mov r22=r47         /* restore b0 */
   2.119 +    mov r1=r48          /* restore r1 */
   2.120 +    mov r12=r49         /* restore r12 */
   2.121 +    mov r13=r50         /* restore r13 */
   2.122 +    mov r15=r51         /* restore r15 */
   2.123 +    mov r14=r52         /* restore r14 */
   2.124 +    mov r2=r53          /* restore r2 */
   2.125 +    mov r3=r54          /* restore r3 */
   2.126 +    mov ar.ccv=r34      /* restore ar.ccv */
   2.127 +    mov ar.rsc=r33      /* load ar.rsc to be used for "loadrs" */
   2.128 +    ;;
   2.129 +    alloc r16=ar.pfs,0,0,0,0    /* drop current register frame */
   2.130 +    ;;
   2.131 +    loadrs
   2.132 +    ;;
   2.133 +    mov ar.bspstore=r25
   2.134 +    ;;
   2.135 +    mov ar.rnat=r26
   2.136 +    ;;
   2.137 +    mov ar.rsc=r27
   2.138 +    adds r18=IA64_VPD_BASE_OFFSET,r21
   2.139 +    ;;
   2.140 +    ld8 r25=[r18]       // load vpd
   2.141 +    mov r17=r0
   2.142 +    ;;
   2.143 +//vsa_sync_write_start
   2.144 +    ;;
   2.145 +    movl r24=ia64_leave_hypervisor_virt_1   // calculate return address
   2.146 +    br.cond.sptk vmx_vps_sync_write         // call the service
   2.147 +    ;;
   2.148 +ia64_leave_hypervisor_virt_1:
   2.149 +    mov r24=r22
   2.150 +    mov r31=r23
   2.151 +    br.cond.sptk vmx_resume_to_guest
   2.152 +END(ia64_leave_hypervisor_virt)
   2.153 +
   2.154 +
   2.155  
   2.156  // Inputs are: r21 (= current), r24 (= cause), r25 (= insn), r31 (=saved pr)
   2.157  
   2.158 +#define BACK_TO_SLOW_PATH                   \
   2.159 +{;                                          \
   2.160 +    nop.m 0x0;                              \
   2.161 +    mov b0=r23;                             \
   2.162 +    br.many vmx_virtualization_fault_back;  \
   2.163 +};                                          \
   2.164 +
   2.165 +GLOBAL_ENTRY(virtualization_fault_table)
   2.166 +    BACK_TO_SLOW_PATH
   2.167 +    BACK_TO_SLOW_PATH
   2.168 +    BACK_TO_SLOW_PATH
   2.169 +{   /* Entry 3 */
   2.170 +    cmp.eq p2,p0=r0,r0
   2.171 +    mov b0=r23
   2.172 +    br.many vmx_asm_mov_from_ar
   2.173 +}
   2.174 +    BACK_TO_SLOW_PATH
   2.175 +    BACK_TO_SLOW_PATH
   2.176 +{   /* Entry 6 */
   2.177 +    cmp.eq p2,p0=r0,r0
   2.178 +    mov b0=r23
   2.179 +    br.many vmx_asm_mov_to_psr
   2.180 +}
   2.181 +    BACK_TO_SLOW_PATH
   2.182 +    BACK_TO_SLOW_PATH
   2.183 +    BACK_TO_SLOW_PATH
   2.184 +{   /* Entry 10 */
   2.185 +    cmp.eq p2,p0=r0,r0
   2.186 +    mov b0=r23
   2.187 +    br.many vmx_asm_mov_to_rr
   2.188 +}
   2.189 +    BACK_TO_SLOW_PATH
   2.190 +    BACK_TO_SLOW_PATH
   2.191 +    BACK_TO_SLOW_PATH
   2.192 +    BACK_TO_SLOW_PATH
   2.193 +    BACK_TO_SLOW_PATH
   2.194 +    BACK_TO_SLOW_PATH
   2.195 +    BACK_TO_SLOW_PATH
   2.196 +{   /* Entry 18 */
   2.197 +    cmp.eq p2,p0=r0,r0
   2.198 +    mov b0=r23
   2.199 +    br.many vmx_asm_mov_from_rr
   2.200 +}
   2.201 +    BACK_TO_SLOW_PATH
   2.202 +    BACK_TO_SLOW_PATH
   2.203 +    BACK_TO_SLOW_PATH
   2.204 +    BACK_TO_SLOW_PATH
   2.205 +    BACK_TO_SLOW_PATH
   2.206 +{   /* Entry 24 */
   2.207 +    cmp.eq p2,p0=r0,r0
   2.208 +    mov b0=r23
   2.209 +    br.many vmx_asm_ssm
   2.210 +}
   2.211 +{   /* Entry 25 */
   2.212 +    cmp.eq p2,p0=r0,r0
   2.213 +    mov b0=r23
   2.214 +    br.many vmx_asm_rsm
   2.215 +}
   2.216 +    BACK_TO_SLOW_PATH
   2.217 +    BACK_TO_SLOW_PATH
   2.218 +    BACK_TO_SLOW_PATH
   2.219 +    BACK_TO_SLOW_PATH
   2.220 +    BACK_TO_SLOW_PATH
   2.221 +{   /* Entry 31 */
   2.222 +    cmp.eq p2,p0=r0,r0
   2.223 +    mov b0=r23
   2.224 +    br.many vmx_asm_thash
   2.225 +}
   2.226 +    BACK_TO_SLOW_PATH
   2.227 +    BACK_TO_SLOW_PATH
   2.228 +    BACK_TO_SLOW_PATH
   2.229 +    BACK_TO_SLOW_PATH
   2.230 +    BACK_TO_SLOW_PATH
   2.231 +{   /* Entry 37 */
   2.232 +    cmp.ne p2,p0=r0,r0
   2.233 +    mov b0=r23
   2.234 +    br.many vmx_asm_rfi
   2.235 +}
   2.236 +    BACK_TO_SLOW_PATH
   2.237 +    BACK_TO_SLOW_PATH
   2.238 +    BACK_TO_SLOW_PATH
   2.239 +END(virtualization_fault_table)
   2.240 +
   2.241 +
   2.242  ENTRY(vmx_dummy_function)
   2.243      br.sptk.many vmx_dummy_function
   2.244  END(vmx_dummy_function)
   2.245  
   2.246  /*
   2.247 - *	Inputs:
   2.248 - *		r24 : return address
   2.249 - *  	r25 : vpd
   2.250 - *		r29 : scratch
   2.251 + *  Inputs:
   2.252 + *  r24 : return address
   2.253 + *  r25 : vpd
   2.254 + *  r29 : scratch
   2.255   *
   2.256   */
   2.257  GLOBAL_ENTRY(vmx_vps_sync_read)
   2.258 @@ -50,11 +254,10 @@ GLOBAL_ENTRY(vmx_vps_sync_read)
   2.259  END(vmx_vps_sync_read)
   2.260  
   2.261  /*
   2.262 - *	Inputs:
   2.263 - *		r24 : return address
   2.264 - *  	r25 : vpd
   2.265 - *		r29 : scratch
   2.266 - *
   2.267 + *  Inputs:
   2.268 + *  r24 : return address
   2.269 + *  r25 : vpd
   2.270 + *  r29 : scratch
   2.271   */
   2.272  GLOBAL_ENTRY(vmx_vps_sync_write)
   2.273      movl r29 = vmx_dummy_function
   2.274 @@ -64,11 +267,10 @@ GLOBAL_ENTRY(vmx_vps_sync_write)
   2.275  END(vmx_vps_sync_write)
   2.276  
   2.277  /*
   2.278 - *	Inputs:
   2.279 - *		r23 : pr
   2.280 - *		r24 : guest b0
   2.281 - *  	r25 : vpd
   2.282 - *
   2.283 + *  Inputs:
   2.284 + *  r23 : pr
   2.285 + *  r24 : guest b0
   2.286 + *  r25 : vpd
   2.287   */
   2.288  GLOBAL_ENTRY(vmx_vps_resume_normal)
   2.289      movl r29 = vmx_dummy_function
   2.290 @@ -79,11 +281,11 @@ GLOBAL_ENTRY(vmx_vps_resume_normal)
   2.291  END(vmx_vps_resume_normal)
   2.292  
   2.293  /*
   2.294 - *	Inputs:
   2.295 - *		r23 : pr
   2.296 - *		r24 : guest b0
   2.297 - *  	r25 : vpd
   2.298 - *		r17 : isr
   2.299 + *  Inputs:
   2.300 + *  r23 : pr
   2.301 + *  r24 : guest b0
   2.302 + *  r25 : vpd
   2.303 + *  r17 : isr
   2.304   */
   2.305  GLOBAL_ENTRY(vmx_vps_resume_handler)
   2.306      movl r29 = vmx_dummy_function
   2.307 @@ -97,12 +299,203 @@ GLOBAL_ENTRY(vmx_vps_resume_handler)
   2.308      br.sptk.many b0
   2.309  END(vmx_vps_resume_handler)
   2.310  
   2.311 +//r13 ->vcpu
   2.312 +//call with psr.bn = 0
   2.313 +GLOBAL_ENTRY(vmx_asm_bsw0)
   2.314 +    mov r15=ar.unat
   2.315 +    ;;
   2.316 +    adds r14=IA64_VPD_BASE_OFFSET,r13
   2.317 +    ;;
   2.318 +    ld8 r14=[r14]
   2.319 +    bsw.1
   2.320 +    ;;
   2.321 +    adds r2=IA64_VPD_VB1REG_OFFSET, r14
   2.322 +    adds r3=IA64_VPD_VB1REG_OFFSET+8, r14
   2.323 +    ;;
   2.324 +    .mem.offset 0,0; st8.spill [r2]=r16,16
   2.325 +    .mem.offset 8,0; st8.spill [r3]=r17,16
   2.326 +    ;;
   2.327 +    .mem.offset 0,0; st8.spill [r2]=r18,16
   2.328 +    .mem.offset 8,0; st8.spill [r3]=r19,16
   2.329 +    ;;
   2.330 +    .mem.offset 0,0; st8.spill [r2]=r20,16
   2.331 +    .mem.offset 8,0; st8.spill [r3]=r21,16
   2.332 +    ;;
   2.333 +    .mem.offset 0,0; st8.spill [r2]=r22,16
   2.334 +    .mem.offset 8,0; st8.spill [r3]=r23,16
   2.335 +    ;;
   2.336 +    .mem.offset 0,0; st8.spill [r2]=r24,16
   2.337 +    .mem.offset 8,0; st8.spill [r3]=r25,16
   2.338 +    ;;
   2.339 +    .mem.offset 0,0; st8.spill [r2]=r26,16
   2.340 +    .mem.offset 8,0; st8.spill [r3]=r27,16
   2.341 +    ;;
   2.342 +    .mem.offset 0,0; st8.spill [r2]=r28,16
   2.343 +    .mem.offset 8,0; st8.spill [r3]=r29,16
   2.344 +    ;;
   2.345 +    .mem.offset 0,0; st8.spill [r2]=r30,16
   2.346 +    .mem.offset 8,0; st8.spill [r3]=r31,16
   2.347 +    ;;
   2.348 +    mov r9=ar.unat
   2.349 +    adds r8=IA64_VPD_VB1NAT_OFFSET, r14
   2.350 +    ;;
   2.351 +    st8 [r8]=r9
   2.352 +    adds r8=IA64_VPD_VB0NAT_OFFSET, r14
   2.353 +    ;;
   2.354 +    ld8 r9=[r8]
   2.355 +    adds r2= IA64_VPD_VB0REG_OFFSET, r14
   2.356 +    adds r3= IA64_VPD_VB0REG_OFFSET+8, r14
   2.357 +    ;;
   2.358 +    mov ar.unat=r9
   2.359 +    ;;
   2.360 +    ld8.fill r16=[r2],16
   2.361 +    ld8.fill r17=[r3],16
   2.362 +    ;;
   2.363 +    ld8.fill r18=[r2],16
   2.364 +    ld8.fill r19=[r3],16
   2.365 +    ;;
   2.366 +    ld8.fill r20=[r2],16
   2.367 +    ld8.fill r21=[r3],16
   2.368 +    ;;
   2.369 +    ld8.fill r22=[r2],16
   2.370 +    ld8.fill r23=[r3],16
   2.371 +    ;;
   2.372 +    ld8.fill r24=[r2],16
   2.373 +    ld8.fill r25=[r3],16
   2.374 +    ;;
   2.375 +    ld8.fill r26=[r2],16
   2.376 +    ld8.fill r27=[r3],16
   2.377 +    ;;
   2.378 +    ld8.fill r28=[r2],16
   2.379 +    ld8.fill r29=[r3],16
   2.380 +    ;;
   2.381 +    ld8.fill r30=[r2],16
   2.382 +    ld8.fill r31=[r3],16
   2.383 +    ;;
   2.384 +    mov ar.unat=r15
   2.385 +    ;;
   2.386 +    bsw.0
   2.387 +    ;;
   2.388 +    br.ret.sptk.many b0
   2.389 +END(vmx_asm_bsw0)
   2.390 +
   2.391 +//r13 ->vcpu
   2.392 +//call with psr.bn = 0
   2.393 +GLOBAL_ENTRY(vmx_asm_bsw1)
   2.394 +    mov r15=ar.unat
   2.395 +    ;;
   2.396 +    adds r14=IA64_VPD_BASE_OFFSET,r13
   2.397 +    ;;
   2.398 +    ld8 r14=[r14]
   2.399 +    bsw.1
   2.400 +    ;;
   2.401 +    adds r2=IA64_VPD_VB0REG_OFFSET, r14
   2.402 +    adds r3=IA64_VPD_VB0REG_OFFSET+8, r14
   2.403 +    ;;
   2.404 +    .mem.offset 0,0; st8.spill [r2]=r16,16
   2.405 +    .mem.offset 8,0; st8.spill [r3]=r17,16
   2.406 +    ;;
   2.407 +    .mem.offset 0,0; st8.spill [r2]=r18,16
   2.408 +    .mem.offset 8,0; st8.spill [r3]=r19,16
   2.409 +    ;;
   2.410 +    .mem.offset 0,0; st8.spill [r2]=r20,16
   2.411 +    .mem.offset 8,0; st8.spill [r3]=r21,16
   2.412 +    ;;
   2.413 +    .mem.offset 0,0; st8.spill [r2]=r22,16
   2.414 +    .mem.offset 8,0; st8.spill [r3]=r23,16
   2.415 +    ;;
   2.416 +    .mem.offset 0,0; st8.spill [r2]=r24,16
   2.417 +    .mem.offset 8,0; st8.spill [r3]=r25,16
   2.418 +    ;;
   2.419 +    .mem.offset 0,0; st8.spill [r2]=r26,16
   2.420 +    .mem.offset 8,0; st8.spill [r3]=r27,16
   2.421 +    ;;
   2.422 +    .mem.offset 0,0; st8.spill [r2]=r28,16
   2.423 +    .mem.offset 8,0; st8.spill [r3]=r29,16
   2.424 +    ;;
   2.425 +    .mem.offset 0,0; st8.spill [r2]=r30,16
   2.426 +    .mem.offset 8,0; st8.spill [r3]=r31,16
   2.427 +    ;;
   2.428 +    mov r9=ar.unat
   2.429 +    adds r8=IA64_VPD_VB0NAT_OFFSET, r14
   2.430 +    ;;
   2.431 +    st8 [r8]=r9
   2.432 +    adds r8=IA64_VPD_VB1NAT_OFFSET, r14
   2.433 +    ;;
   2.434 +    ld8 r9=[r8]
   2.435 +    adds r2=IA64_VPD_VB1REG_OFFSET, r14
   2.436 +    adds r3=IA64_VPD_VB1REG_OFFSET+8, r14
   2.437 +    ;;
   2.438 +    mov ar.unat=r9
   2.439 +    ;;
   2.440 +    ld8.fill r16=[r2],16
   2.441 +    ld8.fill r17=[r3],16
   2.442 +    ;;
   2.443 +    ld8.fill r18=[r2],16
   2.444 +    ld8.fill r19=[r3],16
   2.445 +    ;;
   2.446 +    ld8.fill r20=[r2],16
   2.447 +    ld8.fill r21=[r3],16
   2.448 +    ;;
   2.449 +    ld8.fill r22=[r2],16
   2.450 +    ld8.fill r23=[r3],16
   2.451 +    ;;
   2.452 +    ld8.fill r24=[r2],16
   2.453 +    ld8.fill r25=[r3],16
   2.454 +    ;;
   2.455 +    ld8.fill r26=[r2],16
   2.456 +    ld8.fill r27=[r3],16
   2.457 +    ;;
   2.458 +    ld8.fill r28=[r2],16
   2.459 +    ld8.fill r29=[r3],16
   2.460 +    ;;
   2.461 +    ld8.fill r30=[r2],16
   2.462 +    ld8.fill r31=[r3],16
   2.463 +    ;;
   2.464 +    mov ar.unat=r15
   2.465 +    ;;
   2.466 +    bsw.0
   2.467 +    ;;
   2.468 +    br.ret.sptk.many b0
   2.469 +END(vmx_asm_bsw1)
   2.470 +
   2.471 +
   2.472 +// rfi
   2.473 +ENTRY(vmx_asm_rfi)
   2.474 +    adds r18=IA64_VPD_BASE_OFFSET,r21
   2.475 +    ;;
   2.476 +    ld8 r18=[r18]
   2.477 +    ;;
   2.478 +    adds r26=IA64_VPD_VIFS_OFFSET,r18
   2.479 +    ;;
   2.480 +    ld8 r26=[r26]
   2.481 +    ;;
   2.482 +    tbit.z p6,p0=r26,63
   2.483 +    (p6) br.cond.dptk.few vmx_asm_rfi_1
   2.484 +    ;;
   2.485 +    //if vifs.v=1 desert current register frame
   2.486 +    alloc r27=ar.pfs,0,0,0,0
   2.487 +    ;;
   2.488 +vmx_asm_rfi_1:
   2.489 +    adds r26=IA64_VPD_VHPI_OFFSET,r18
   2.490 +    ;;
   2.491 +    ld8 r26=[r26]
   2.492 +    ;;
   2.493 +    cmp.ne p6,p0=r26,r0
   2.494 +    (p6) br.cond.dpnt.many vmx_virtualization_fault_back
   2.495 +    ;;
   2.496 +    VMX_VIRT_SAVE
   2.497 +    ;;
   2.498 +    mov out0=r21
   2.499 +    movl r14=ia64_leave_hypervisor_virt
   2.500 +    ;;
   2.501 +    mov rp=r14
   2.502 +    br.call.sptk.many b6=vmx_vcpu_rfi_fast
   2.503 +END(vmx_asm_rfi)
   2.504 +
   2.505  
   2.506  //mov r1=ar3 (only itc is virtualized)
   2.507 -GLOBAL_ENTRY(vmx_asm_mov_from_ar)
   2.508 -#ifndef ACCE_MOV_FROM_AR
   2.509 -    br.many vmx_virtualization_fault_back
   2.510 -#endif
   2.511 +ENTRY(vmx_asm_mov_from_ar)
   2.512      add r18=VCPU_VTM_OFFSET_OFS,r21
   2.513      add r16=VCPU_VTM_LAST_ITC_OFS,r21
   2.514      extr.u r17=r25,6,7
   2.515 @@ -127,10 +520,7 @@ END(vmx_asm_mov_from_ar)
   2.516  
   2.517  
   2.518  // mov r1=rr[r3]
   2.519 -GLOBAL_ENTRY(vmx_asm_mov_from_rr)
   2.520 -#ifndef ACCE_MOV_FROM_RR
   2.521 -    br.many vmx_virtualization_fault_back
   2.522 -#endif
   2.523 +ENTRY(vmx_asm_mov_from_rr)
   2.524      extr.u r16=r25,20,7
   2.525      extr.u r17=r25,6,7
   2.526      movl r20=asm_mov_from_reg
   2.527 @@ -142,8 +532,8 @@ GLOBAL_ENTRY(vmx_asm_mov_from_rr)
   2.528      add r27=VCPU_VRR0_OFS,r21
   2.529      mov b0=r16
   2.530      br.many b0
   2.531 -    ;;   
   2.532 -vmx_asm_mov_from_rr_back_1:  
   2.533 +    ;;
   2.534 +vmx_asm_mov_from_rr_back_1:
   2.535      adds r30=vmx_resume_to_guest-asm_mov_from_reg,r20
   2.536      adds r22=asm_mov_to_reg-asm_mov_from_reg,r20
   2.537      shr.u r26=r19,61
   2.538 @@ -158,475 +548,204 @@ END(vmx_asm_mov_from_rr)
   2.539  
   2.540  
   2.541  // mov rr[r3]=r2
   2.542 -GLOBAL_ENTRY(vmx_asm_mov_to_rr)
   2.543 -#ifndef ACCE_MOV_TO_RR
   2.544 -    br.many vmx_virtualization_fault_back
   2.545 -#endif
   2.546 -    add r22=IA64_VCPU_RID_BITS_OFFSET,r21
   2.547 -    extr.u r16=r25,20,7		// r3
   2.548 -    extr.u r17=r25,13,7		// r2
   2.549 -    ;;
   2.550 +ENTRY(vmx_asm_mov_to_rr)
   2.551 +    extr.u r16=r25,20,7         // r3
   2.552 +    extr.u r17=r25,13,7         // r2
   2.553      movl r20=asm_mov_from_reg
   2.554      ;;
   2.555      adds r30=vmx_asm_mov_to_rr_back_1-asm_mov_from_reg,r20
   2.556 -    shladd r16=r16,4,r20	// get r3
   2.557 -    mov r18=b0			// save b0
   2.558 +    shladd r16=r16,4,r20        // get r3
   2.559      ;;
   2.560 -    add r27=VCPU_VRR0_OFS,r21
   2.561      mov b0=r16
   2.562      br.many b0
   2.563 -    ;;   
   2.564 +    ;;
   2.565  vmx_asm_mov_to_rr_back_1:
   2.566      adds r30=vmx_asm_mov_to_rr_back_2-asm_mov_from_reg,r20
   2.567 -    shr.u r23=r19,61		// get RR #
   2.568 -    shladd r17=r17,4,r20	// get r2
   2.569 +    shr.u r16=r19,61            // get RR #
   2.570      ;;
   2.571      //if rr7, go back
   2.572 -    cmp.eq p6,p0=7,r23
   2.573 -    mov b0=r18			// restore b0
   2.574 +    cmp.eq p6,p0=7,r16
   2.575 +    mov b0=r23// restore b0
   2.576      (p6) br.cond.dpnt.many vmx_virtualization_fault_back
   2.577      ;;
   2.578 -    mov r28=r19			// save r3
   2.579 +    mov r16=r19
   2.580 +    shladd r17=r17,4,r20        // get r2
   2.581 +    ;;
   2.582      mov b0=r17
   2.583      br.many b0
   2.584 -vmx_asm_mov_to_rr_back_2: 
   2.585 -    adds r30=vmx_resume_to_guest-asm_mov_from_reg,r20
   2.586 -    shladd r27=r23,3,r27	// address of VRR
   2.587 -    ;;
   2.588 -    ld1 r22=[r22]		// Load rid_bits from domain
   2.589 -    mov b0=r18			// restore b0
   2.590 -    adds r16=IA64_VCPU_STARTING_RID_OFFSET,r21
   2.591 -    ;;
   2.592 -    ld4 r16=[r16]		// load starting_rid
   2.593 -    extr.u r17=r19,8,24		// Extract RID
   2.594 -    ;;
   2.595 -    shr r17=r17,r22		// Shift out used bits
   2.596 -    shl r16=r16,8
   2.597 -    ;;
   2.598 -    add r20=r19,r16
   2.599 -    cmp.ne p6,p0=0,r17	// If reserved RID bits are set, use C fall back.
   2.600 -    (p6) br.cond.dpnt.many vmx_virtualization_fault_back
   2.601 -    ;; //mangling rid 1 and 3
   2.602 -    extr.u r16=r20,8,8
   2.603 -    extr.u r17=r20,24,8
   2.604 -    mov r24=r18		// saved b0 for resume
   2.605 -    ;;
   2.606 -    extr.u r18=r20,2,6 // page size
   2.607 -    dep r20=r16,r20,24,8
   2.608 -    mov b0=r30
   2.609 +vmx_asm_mov_to_rr_back_2:
   2.610 +    mov r17=r19                 // get value
   2.611      ;;
   2.612 -    dep r20=r17,r20,8,8
   2.613 -    ;; //set ve 1
   2.614 -    dep r20=-1,r20,0,1
   2.615 -    // If ps > PAGE_SHIFT, use PAGE_SHIFT
   2.616 -    cmp.lt p6,p0=PAGE_SHIFT,r18
   2.617 -    ;;
   2.618 -    (p6) mov r18=PAGE_SHIFT
   2.619 -    ;;
   2.620 -    (p6) dep r20=r18,r20,2,6
   2.621 -    ;;	
   2.622 -    st8 [r27]=r19	// Write to vrr.
   2.623 -    // Write to save_rr if rr=0 or rr=4.
   2.624 -    cmp.eq p6,p0=0,r23
   2.625 +    // if invalid value , go back
   2.626 +    adds r26=IA64_VCPU_RID_BITS_OFFSET,r21
   2.627 +    mov r27=r0
   2.628      ;;
   2.629 -    cmp.eq.or p6,p0=4,r23
   2.630 -    ;;
   2.631 -    adds r16=IA64_VCPU_MMU_MODE_OFFSET,r21
   2.632 -    (p6) adds r17=IA64_VCPU_META_SAVED_RR0_OFFSET,r21
   2.633 +    ld1 r27=[r26]
   2.634      ;;
   2.635 -    ld1 r16=[r16]
   2.636 -    cmp.eq p7,p0=r0,r0
   2.637 -    (p6) shladd r17=r23,1,r17
   2.638 +    shr r19=r19,r27
   2.639      ;;
   2.640 -    (p6) st8 [r17]=r20
   2.641 -    (p6) cmp.eq p7,p0=VMX_MMU_VIRTUAL,r16 // Set physical rr if in virt mode
   2.642 +    cmp.ne p6,p0=r19,r0
   2.643 +    mov b0=r23// restore b0
   2.644 +    (p6) br.cond.dpnt.many vmx_virtualization_fault_back
   2.645      ;;
   2.646 -    (p7) mov rr[r28]=r20
   2.647 -    br.many b0
   2.648 +    VMX_VIRT_SAVE
   2.649 +    ;;
   2.650 +    mov out0=r21
   2.651 +    mov out1=r16
   2.652 +    mov out2=r17
   2.653 +    movl r14=ia64_leave_hypervisor_virt
   2.654 +    ;;
   2.655 +    mov rp=r14
   2.656 +    br.call.sptk.many b6=vmx_vcpu_set_rr_fast
   2.657  END(vmx_asm_mov_to_rr)
   2.658  
   2.659  
   2.660 -//rsm 
   2.661 -GLOBAL_ENTRY(vmx_asm_rsm)
   2.662 -#ifndef ACCE_RSM
   2.663 -    br.many vmx_virtualization_fault_back
   2.664 -#endif
   2.665 -    mov r23=r31
   2.666 -    add r16=IA64_VPD_BASE_OFFSET,r21
   2.667 +//rsm 25
   2.668 +ENTRY(vmx_asm_rsm)
   2.669      extr.u r26=r25,6,21 // Imm21
   2.670      extr.u r27=r25,31,2 // I2d
   2.671      ;;
   2.672 -    ld8 r16=[r16]
   2.673      extr.u r28=r25,36,1 // I
   2.674      dep r26=r27,r26,21,2
   2.675      ;;
   2.676 -    add r17=VPD_VPSR_START_OFFSET,r16
   2.677      //r18 is imm24
   2.678 -    dep r18=r28,r26,23,1
   2.679 -    ;;
   2.680 -    //sync read
   2.681 -    mov r25=r16
   2.682 -    movl r24=vmx_asm_rsm_sync_read_return
   2.683 -    mov r20=b0
   2.684 -    br.sptk.many vmx_vps_sync_read
   2.685 -    ;;
   2.686 -vmx_asm_rsm_sync_read_return:
   2.687 -    ld8 r26=[r17]
   2.688 -    // xenoprof
   2.689 -    // Don't change mPSR.pp.
   2.690 -    // It is manipulated by xenoprof.
   2.691 -    movl r28=IA64_PSR_IC+IA64_PSR_I+IA64_PSR_DT+IA64_PSR_SI+IA64_PSR_PP
   2.692 -
   2.693 -    sub r27=-1,r18 // ~imm24
   2.694 -    ;;
   2.695 -    or r28=r27,r28 // Keep IC,I,DT,SI
   2.696 -    and r19=r26,r27 // Update vpsr
   2.697 -    ;;
   2.698 -    st8 [r17]=r19
   2.699 -    mov r24=cr.ipsr
   2.700 -    ;;
   2.701 -    and r24=r24,r28 // Update ipsr
   2.702 -    adds r27=IA64_VCPU_FP_PSR_OFFSET,r21
   2.703 -    ;;
   2.704 -    ld8 r27=[r27]
   2.705 -    ;;
   2.706 -    tbit.nz p8,p0=r27,IA64_PSR_DFH_BIT
   2.707 -    ;;
   2.708 -    (p8) dep r24=-1,r24,IA64_PSR_DFH_BIT,1  // Keep dfh
   2.709 +    dep r16=r28,r26,23,1
   2.710      ;;
   2.711 -    mov cr.ipsr=r24
   2.712 -    //sync write
   2.713 -    mov r25=r16
   2.714 -    movl r24=vmx_asm_rsm_sync_write_return
   2.715 -    br.sptk.many vmx_vps_sync_write
   2.716 -    ;;
   2.717 -vmx_asm_rsm_sync_write_return:
   2.718 -    add r29=IA64_VCPU_MMU_MODE_OFFSET,r21
   2.719 -    ;;
   2.720 -    ld1 r27=[r29]
   2.721 -    ;;
   2.722 -    cmp.ne p6,p0=VMX_MMU_VIRTUAL,r27
   2.723 -    ;;
   2.724 -    tbit.z.or p6,p0=r18,IA64_PSR_DT_BIT
   2.725 -    (p6) br.dptk vmx_asm_rsm_out
   2.726 -    // DT not cleared or already in phy mode
   2.727 +    VMX_VIRT_SAVE
   2.728      ;;
   2.729 -    // Switch to meta physical mode D.
   2.730 -    add r26=IA64_VCPU_META_RID_D_OFFSET,r21
   2.731 -    mov r27=VMX_MMU_PHY_D
   2.732 -    ;;
   2.733 -    ld8 r26=[r26]
   2.734 -    st1 [r29]=r27 
   2.735 -    dep.z r28=4,61,3
   2.736 +    mov out0=r21
   2.737 +    mov out1=r16
   2.738 +    movl r14=ia64_leave_hypervisor_virt
   2.739      ;;
   2.740 -    mov rr[r0]=r26
   2.741 -    ;;
   2.742 -    mov rr[r28]=r26
   2.743 -    ;;
   2.744 -    srlz.d
   2.745 -vmx_asm_rsm_out:	
   2.746 -    mov r31=r23
   2.747 -    mov r24=r20
   2.748 -    br.many vmx_resume_to_guest
   2.749 +    mov rp=r14
   2.750 +    br.call.sptk.many b6=vmx_vcpu_rsm_fast
   2.751  END(vmx_asm_rsm)
   2.752  
   2.753  
   2.754 -//ssm 
   2.755 -GLOBAL_ENTRY(vmx_asm_ssm)
   2.756 -#ifndef ACCE_SSM
   2.757 -    br.many vmx_virtualization_fault_back
   2.758 -#endif
   2.759 -    mov r23=r31
   2.760 -    add r16=IA64_VPD_BASE_OFFSET,r21
   2.761 +//ssm 24
   2.762 +ENTRY(vmx_asm_ssm)
   2.763 +    adds r18=IA64_VPD_BASE_OFFSET,r21
   2.764 +    ;;
   2.765 +    ld8 r18=[r18]
   2.766 +    ;;
   2.767 +    adds r26=IA64_VPD_VHPI_OFFSET,r18
   2.768 +    ;;
   2.769 +    ld8 r26=[r26]
   2.770 +    ;;
   2.771 +    cmp.ne p6,p0=r26,r0
   2.772 +    (p6) br.cond.dpnt.many vmx_virtualization_fault_back
   2.773 +    ;;
   2.774      extr.u r26=r25,6,21
   2.775      extr.u r27=r25,31,2
   2.776      ;;
   2.777 -    ld8 r16=[r16]
   2.778      extr.u r28=r25,36,1
   2.779      dep r26=r27,r26,21,2
   2.780      ;;  //r18 is imm24
   2.781 -    dep r18=r28,r26,23,1
   2.782 -    ;;  
   2.783 -    //sync read
   2.784 -    mov r25=r16
   2.785 -    movl r24=vmx_asm_ssm_sync_read_return
   2.786 -    mov r20=b0
   2.787 -    br.sptk.many vmx_vps_sync_read
   2.788 -    ;;
   2.789 -vmx_asm_ssm_sync_read_return:
   2.790 -    add r27=VPD_VPSR_START_OFFSET,r16
   2.791 -    ;;
   2.792 -    ld8 r17=[r27]		//r17 old vpsr
   2.793 -    dep r28=0,r18,IA64_PSR_PP_BIT,1 // For xenoprof
   2.794 -                                    // Don't change mPSR.pp
   2.795 -                                    // It is maintained by xenoprof.
   2.796 -    ;;
   2.797 -    or r19=r17,r18		//r19 new vpsr
   2.798 -    ;;
   2.799 -    st8 [r27]=r19 // update vpsr
   2.800 -    mov r24=cr.ipsr
   2.801 -    ;;
   2.802 -    or r24=r24,r28
   2.803 -    ;;
   2.804 -    mov cr.ipsr=r24
   2.805 -    //sync_write
   2.806 -    mov r25=r16
   2.807 -    movl r24=vmx_asm_ssm_sync_write_return
   2.808 -    br.sptk.many vmx_vps_sync_write
   2.809 -    ;;
   2.810 -vmx_asm_ssm_sync_write_return:	
   2.811 -    add r29=IA64_VCPU_MMU_MODE_OFFSET,r21
   2.812 -    movl r28=IA64_PSR_DT+IA64_PSR_RT+IA64_PSR_IT
   2.813 -    ;;
   2.814 -    ld1 r30=[r29] // mmu_mode
   2.815 -    ;;
   2.816 -    and r27=r28,r19
   2.817 -    cmp.eq p6,p0=VMX_MMU_VIRTUAL,r30
   2.818 +    dep r16=r28,r26,23,1
   2.819      ;;
   2.820 -    cmp.ne.or p6,p0=r28,r27 // (vpsr & (it+dt+rt)) /= (it+dt+rt) ie stay in phy
   2.821 -    (p6) br.dptk vmx_asm_ssm_1
   2.822 -    ;;
   2.823 -    add r26=IA64_VCPU_META_SAVED_RR0_OFFSET,r21
   2.824 -    add r27=IA64_VCPU_META_SAVED_RR0_OFFSET+8,r21
   2.825 -    mov r30=VMX_MMU_VIRTUAL
   2.826 -    ;;
   2.827 -    ld8 r26=[r26]
   2.828 -    ld8 r27=[r27]
   2.829 -    st1 [r29]=r30
   2.830 -    dep.z r28=4,61,3
   2.831 -    ;;
   2.832 -    mov rr[r0]=r26
   2.833 -    ;;
   2.834 -    mov rr[r28]=r27
   2.835 -    ;;
   2.836 -    srlz.d
   2.837 -    ;;
   2.838 -vmx_asm_ssm_1:
   2.839 -    tbit.nz p6,p0=r17,IA64_PSR_I_BIT
   2.840 +    VMX_VIRT_SAVE
   2.841      ;;
   2.842 -    tbit.z.or p6,p0=r19,IA64_PSR_I_BIT
   2.843 -    (p6) br.dptk vmx_asm_ssm_out
   2.844 -    ;;
   2.845 -    add r29=VPD_VTPR_START_OFFSET,r16
   2.846 -    add r30=VPD_VHPI_START_OFFSET,r16
   2.847 -    ;;
   2.848 -    ld8 r29=[r29]
   2.849 -    ld8 r30=[r30]
   2.850 +    mov out0=r21
   2.851 +    mov out1=r16
   2.852 +    movl r14=ia64_leave_hypervisor_virt
   2.853      ;;
   2.854 -    extr.u r17=r29,4,4
   2.855 -    extr.u r18=r29,16,1
   2.856 -    ;;
   2.857 -    dep r17=r18,r17,4,1
   2.858 -    mov r31=r23
   2.859 -    mov b0=r20
   2.860 -    ;;
   2.861 -    cmp.gt p6,p0=r30,r17
   2.862 -    (p6) br.dpnt.few vmx_asm_dispatch_vexirq
   2.863 -vmx_asm_ssm_out:	
   2.864 -    mov r31=r23
   2.865 -    mov r24=r20
   2.866 -    br.many vmx_resume_to_guest
   2.867 +    mov rp=r14
   2.868 +    br.call.sptk.many b6=vmx_vcpu_ssm_fast
   2.869  END(vmx_asm_ssm)
   2.870  
   2.871  
   2.872 -//mov psr.l=r2 
   2.873 -GLOBAL_ENTRY(vmx_asm_mov_to_psr)
   2.874 -#ifndef ACCE_MOV_TO_PSR
   2.875 -    br.many vmx_virtualization_fault_back
   2.876 -#endif
   2.877 -    mov r23=r31
   2.878 -    add r16=IA64_VPD_BASE_OFFSET,r21
   2.879 +//mov psr.l=r2
   2.880 +ENTRY(vmx_asm_mov_to_psr)
   2.881      extr.u r26=r25,13,7 //r2
   2.882 +    movl r27=asm_mov_from_reg
   2.883      ;;
   2.884 -    ld8 r16=[r16]
   2.885 -    movl r24=asm_mov_from_reg
   2.886 -    ;;
   2.887 -    adds r30=vmx_asm_mov_to_psr_back-asm_mov_from_reg,r24
   2.888 -    shladd r26=r26,4,r24
   2.889 -    mov r20=b0
   2.890 +    adds r30=vmx_asm_mov_to_psr_back-asm_mov_from_reg,r27
   2.891 +    shladd r26=r26,4,r27
   2.892      ;;
   2.893      mov b0=r26
   2.894      br.many b0
   2.895 -    ;;   
   2.896 -vmx_asm_mov_to_psr_back:
   2.897 -    //sync read
   2.898 -    mov r25=r16
   2.899 -    movl r24=vmx_asm_mov_to_psr_sync_read_return
   2.900 -    br.sptk.many vmx_vps_sync_read
   2.901 -    ;;
   2.902 -vmx_asm_mov_to_psr_sync_read_return:
   2.903 -    add r27=VPD_VPSR_START_OFFSET,r16
   2.904 -    ;;
   2.905 -    ld8 r17=[r27] // r17 old vpsr
   2.906 -    dep r19=0,r19,32,32 // Clear bits 32-63
   2.907 -    ;;   
   2.908 -    dep r18=0,r17,0,32
   2.909 -    ;; 
   2.910 -    or r18=r18,r19 //r18 new vpsr
   2.911      ;;
   2.912 -    st8 [r27]=r18 // set vpsr
   2.913 -    //sync write
   2.914 -    mov r25=r16
   2.915 -    movl r24=vmx_asm_mov_to_psr_sync_write_return
   2.916 -    br.sptk.many vmx_vps_sync_write
   2.917 -    ;;
   2.918 -vmx_asm_mov_to_psr_sync_write_return:
   2.919 -    add r22=IA64_VCPU_MMU_MODE_OFFSET,r21
   2.920 -    movl r28=IA64_PSR_DT+IA64_PSR_RT+IA64_PSR_IT
   2.921 -    ;;
   2.922 -    and r27=r28,r18
   2.923 -    and r29=r28,r17
   2.924 +vmx_asm_mov_to_psr_back:
   2.925 +    adds r18=IA64_VPD_BASE_OFFSET,r21
   2.926 +    tbit.nz p6,p0 = r19, IA64_PSR_I_BIT
   2.927      ;;
   2.928 -    cmp.eq p5,p0=r29,r27 // (old_vpsr & (dt+rt+it)) == (new_vpsr & (dt+rt+it))
   2.929 -    cmp.eq p6,p7=r28,r27 // (new_vpsr & (dt+rt+it)) == (dt+rt+it)
   2.930 -    (p5) br.many vmx_asm_mov_to_psr_1 // no change
   2.931 +    ld8 r18=[r18]
   2.932      ;;
   2.933 -    //virtual to physical D
   2.934 -    (p7) add r26=IA64_VCPU_META_RID_D_OFFSET,r21
   2.935 -    (p7) add r27=IA64_VCPU_META_RID_D_OFFSET,r21
   2.936 -    (p7) mov r30=VMX_MMU_PHY_D
   2.937 -    ;;
   2.938 -    //physical to virtual
   2.939 -    (p6) add r26=IA64_VCPU_META_SAVED_RR0_OFFSET,r21
   2.940 -    (p6) add r27=IA64_VCPU_META_SAVED_RR0_OFFSET+8,r21
   2.941 -    (p6) mov r30=VMX_MMU_VIRTUAL
   2.942 +    adds r26=IA64_VPD_VHPI_OFFSET,r18
   2.943      ;;
   2.944      ld8 r26=[r26]
   2.945 -    ld8 r27=[r27]
   2.946 -    st1 [r22]=r30
   2.947 -    dep.z r28=4,61,3
   2.948 -    ;;
   2.949 -    mov rr[r0]=r26
   2.950 -    ;;
   2.951 -    mov rr[r28]=r27
   2.952 -    ;;
   2.953 -    srlz.d
   2.954 -    ;;
   2.955 -vmx_asm_mov_to_psr_1:
   2.956 -    mov r24=cr.ipsr
   2.957 -    movl r28=IA64_PSR_IC+IA64_PSR_I+IA64_PSR_DT+IA64_PSR_SI+IA64_PSR_RT
   2.958 -    ;;
   2.959 -    tbit.nz p7,p0=r24,IA64_PSR_PP_BIT           // For xenoprof
   2.960 -    or r27=r19,r28
   2.961 -    dep r24=0,r24,0,32
   2.962 -    ;;
   2.963 -    add r24=r27,r24
   2.964 -    ;;
   2.965 -    adds r27=IA64_VCPU_FP_PSR_OFFSET,r21
   2.966 -    (p7) dep r24=-1,r24,IA64_PSR_PP_BIT,1       // For xenoprof
   2.967 -                                                // Dom't change mPSR.pp
   2.968 -                                                // It is maintaned by xenoprof
   2.969 -    ;;
   2.970 -    ld8 r27=[r27]
   2.971      ;;
   2.972 -    tbit.nz p8,p0=r27,IA64_PSR_DFH_BIT
   2.973 -    ;;
   2.974 -    (p8) dep r24=-1,r24,IA64_PSR_DFH_BIT,1
   2.975 -    ;;
   2.976 -    mov cr.ipsr=r24
   2.977 -    tbit.nz p6,p0=r17,IA64_PSR_I_BIT
   2.978 -    ;;
   2.979 -    tbit.z.or p6,p0=r18,IA64_PSR_I_BIT
   2.980 -    (p6) br.dpnt.few vmx_asm_mov_to_psr_out
   2.981 -    ;;
   2.982 -    add r29=VPD_VTPR_START_OFFSET,r16
   2.983 -    add r30=VPD_VHPI_START_OFFSET,r16
   2.984 +    // if enable interrupt and vhpi has value, return
   2.985 +    cmp.ne.and p6,p0=r26,r0
   2.986 +    (p6) br.cond.dpnt.many vmx_virtualization_fault_back
   2.987      ;;
   2.988 -    ld8 r29=[r29]
   2.989 -    ld8 r30=[r30]
   2.990 -    ;;
   2.991 -    extr.u r17=r29,4,4
   2.992 -    extr.u r18=r29,16,1
   2.993 +    mov r16=r19
   2.994      ;;
   2.995 -    dep r17=r18,r17,4,1
   2.996 -    mov r31=r23
   2.997 -    mov b0=r20
   2.998 +    VMX_VIRT_SAVE
   2.999      ;;
  2.1000 -    cmp.gt p6,p0=r30,r17
  2.1001 -    (p6) br.dpnt.few vmx_asm_dispatch_vexirq
  2.1002 -vmx_asm_mov_to_psr_out:
  2.1003 -    mov r31=r23
  2.1004 -    mov r24=r20
  2.1005 -    br.many vmx_resume_to_guest
  2.1006 +    mov out0=r21
  2.1007 +    mov out1=r16
  2.1008 +    movl r14=ia64_leave_hypervisor_virt
  2.1009 +    ;;
  2.1010 +    mov rp=r14
  2.1011 +    br.call.sptk.many b6=vmx_vcpu_mov_to_psr_fast
  2.1012  END(vmx_asm_mov_to_psr)
  2.1013  
  2.1014  
  2.1015 -ENTRY(vmx_asm_dispatch_vexirq)
  2.1016 -//increment iip
  2.1017 -    mov r16=cr.ipsr
  2.1018 -    ;;
  2.1019 -    extr.u r17=r16,IA64_PSR_RI_BIT,2
  2.1020 -    tbit.nz p6,p7=r16,IA64_PSR_RI_BIT+1
  2.1021 -    ;;	
  2.1022 -    (p6) mov r18=cr.iip
  2.1023 -    (p6) mov r17=r0
  2.1024 -    (p7) add r17=1,r17
  2.1025 -    ;;    
  2.1026 -    (p6) add r18=0x10,r18
  2.1027 -    dep r16=r17,r16,IA64_PSR_RI_BIT,2
  2.1028 -    ;;		
  2.1029 -    (p6) mov cr.iip=r18
  2.1030 -    mov cr.ipsr=r16
  2.1031 -    br.many vmx_dispatch_vexirq
  2.1032 -END(vmx_asm_dispatch_vexirq)
  2.1033 -
  2.1034  // thash r1=r3
  2.1035  // TODO: add support when pta.vf = 1
  2.1036 -GLOBAL_ENTRY(vmx_asm_thash)
  2.1037 -#ifndef ACCE_THASH
  2.1038 -    br.many vmx_virtualization_fault_back
  2.1039 -#endif
  2.1040 -    extr.u r17=r25,20,7		// get r3 from opcode in r25 
  2.1041 -    extr.u r18=r25,6,7		// get r1 from opcode in r25
  2.1042 +ENTRY(vmx_asm_thash)
  2.1043 +    extr.u r17=r25,20,7                 // get r3 from opcode in r25
  2.1044 +    extr.u r18=r25,6,7                  // get r1 from opcode in r25
  2.1045      movl r20=asm_mov_from_reg
  2.1046      ;;
  2.1047      adds r30=vmx_asm_thash_back1-asm_mov_from_reg,r20
  2.1048 -    shladd r17=r17,4,r20	// get addr of MOVE_FROM_REG(r17)
  2.1049 -    adds r16=IA64_VPD_BASE_OFFSET,r21	// get vcpu.arch.priveregs
  2.1050 -    mov r24=b0			// save b0
  2.1051 +    shladd r17=r17,4,r20                // get addr of MOVE_FROM_REG(r17)
  2.1052 +    adds r16=IA64_VPD_BASE_OFFSET,r21   // get vcpu.arch.priveregs
  2.1053 +    mov r24=b0                          // save b0
  2.1054      ;;
  2.1055 -    ld8 r16=[r16]		// get VPD addr
  2.1056 +    ld8 r16=[r16]                       // get VPD addr
  2.1057      mov b0=r17
  2.1058 -    br.many b0			// r19 return value
  2.1059 -    ;;                                                     
  2.1060 +    br.many b0                          // r19 return value
  2.1061 +    ;;
  2.1062  vmx_asm_thash_back1:
  2.1063 -    shr.u r23=r19,61		// get RR number
  2.1064 -    adds r28=VCPU_VRR0_OFS,r21	// get vcpu->arch.arch_vmx.vrr[0]'s addr
  2.1065 -    adds r16=IA64_VPD_VPTA_OFFSET,r16	// get virtual pta 
  2.1066 +    shr.u r23=r19,61                    // get RR number
  2.1067 +    adds r28=VCPU_VRR0_OFS,r21  // get vcpu->arch.arch_vmx.vrr[0]'s addr
  2.1068 +    adds r16=IA64_VPD_VPTA_OFFSET,r16   // get virtual pta
  2.1069      ;;
  2.1070 -    shladd r27=r23,3,r28	// get vcpu->arch.arch_vmx.vrr[r23]'s addr
  2.1071 -    ld8 r17=[r16]		// get virtual PTA
  2.1072 +    shladd r27=r23,3,r28        // get vcpu->arch.arch_vmx.vrr[r23]'s addr
  2.1073 +    ld8 r17=[r16]               // get virtual PTA
  2.1074      mov r26=1
  2.1075      ;;
  2.1076 -    extr.u r29=r17,2,6		// get pta.size
  2.1077 -    ld8 r28=[r27]		// get vcpu->arch.arch_vmx.vrr[r23]'s value
  2.1078 +    extr.u r29=r17,2,6// get pta.size
  2.1079 +    ld8 r28=[r27]               // get vcpu->arch.arch_vmx.vrr[r23]'s value
  2.1080      ;;
  2.1081      // Fall-back to C if VF (long format) is set
  2.1082      tbit.nz p6,p0=r17,8
  2.1083      mov b0=r24
  2.1084      ;;
  2.1085 -(p6) mov r24=EVENT_THASH
  2.1086 -(p6) br.cond.dpnt.many vmx_virtualization_fault_back
  2.1087 -    extr.u r28=r28,2,6		// get rr.ps
  2.1088 -    shl r22=r26,r29		// 1UL << pta.size
  2.1089 +    (p6) mov r24=EVENT_THASH
  2.1090 +    (p6) br.cond.dpnt.many vmx_virtualization_fault_back
  2.1091 +    extr.u r28=r28,2,6      // get rr.ps
  2.1092 +    shl r22=r26,r29         // 1UL << pta.size
  2.1093      ;;
  2.1094 -    shr.u r23=r19,r28		// vaddr >> rr.ps
  2.1095 -    adds r26=3,r29		// pta.size + 3 
  2.1096 -    shl r27=r17,3		// pta << 3 
  2.1097 +    shr.u r23=r19,r28       // vaddr >> rr.ps
  2.1098 +    adds r26=3,r29          // pta.size + 3
  2.1099 +    shl r27=r17,3           // pta << 3
  2.1100      ;;
  2.1101 -    shl r23=r23,3		// (vaddr >> rr.ps) << 3
  2.1102 -    shr.u r27=r27,r26		// (pta << 3) >> (pta.size+3)
  2.1103 +    shl r23=r23,3           // (vaddr >> rr.ps) << 3
  2.1104 +    shr.u r27=r27,r26       // (pta << 3) >> (pta.size+3)
  2.1105      movl r16=VRN_MASK
  2.1106      ;;
  2.1107 -    adds r22=-1,r22		// (1UL << pta.size) - 1
  2.1108 -    shl r27=r27,r29		// ((pta<<3)>>(pta.size+3))<<pta.size
  2.1109 -    and r19=r19,r16		// vaddr & VRN_MASK
  2.1110 +    adds r22=-1,r22         // (1UL << pta.size) - 1
  2.1111 +    shl r27=r27,r29         // ((pta<<3)>>(pta.size+3))<<pta.size
  2.1112 +    and r19=r19,r16         // vaddr & VRN_MASK
  2.1113      ;;
  2.1114 -    and r22=r22,r23		// vhpt_offset 
  2.1115 -    or r19=r19,r27		// (vadr&VRN_MASK) |(((pta<<3)>>(pta.size + 3))<<pta.size) 
  2.1116 +    and r22=r22,r23         // vhpt_offset
  2.1117 +    or r19=r19,r27          // (vadr&VRN_MASK) |(((pta<<3)>>(pta.size + 3))<<pta.size)
  2.1118      adds r26=asm_mov_to_reg-asm_mov_from_reg,r20
  2.1119      ;;
  2.1120 -    or r19=r19,r22		// calc pval
  2.1121 +    or r19=r19,r22          // calc pval
  2.1122      shladd r17=r18,4,r26
  2.1123      adds r30=vmx_resume_to_guest-asm_mov_from_reg,r20
  2.1124      ;;
  2.1125 @@ -634,99 +753,101 @@ vmx_asm_thash_back1:
  2.1126      br.many b0
  2.1127  END(vmx_asm_thash)
  2.1128  
  2.1129 -#define MOV_TO_REG0	\
  2.1130 -{;			\
  2.1131 -    nop.b 0x0;		\
  2.1132 -    nop.b 0x0;		\
  2.1133 -    nop.b 0x0;		\
  2.1134 -    ;;			\
  2.1135 -};
  2.1136  
  2.1137  
  2.1138 -#define MOV_TO_REG(n)	\
  2.1139 -{;			\
  2.1140 -    mov r##n##=r19;	\
  2.1141 -    mov b0=r30;		\
  2.1142 -    br.sptk.many b0;	\
  2.1143 -    ;;			\
  2.1144 -};
  2.1145 -
  2.1146 -
  2.1147 -#define MOV_FROM_REG(n)	\
  2.1148 -{;			\
  2.1149 -    mov r19=r##n##;	\
  2.1150 -    mov b0=r30;		\
  2.1151 -    br.sptk.many b0;	\
  2.1152 -    ;;			\
  2.1153 +#define MOV_TO_REG0     \
  2.1154 +{;                      \
  2.1155 +    nop.b 0x0;          \
  2.1156 +    nop.b 0x0;          \
  2.1157 +    nop.b 0x0;          \
  2.1158 +    ;;                  \
  2.1159  };
  2.1160  
  2.1161  
  2.1162 -#define MOV_TO_BANK0_REG(n)			\
  2.1163 -ENTRY_MIN_ALIGN(asm_mov_to_bank0_reg##n##);	\
  2.1164 -{;						\
  2.1165 -    mov r26=r2;					\
  2.1166 -    mov r2=r19;					\
  2.1167 -    bsw.1;					\
  2.1168 -    ;;						\
  2.1169 -};						\
  2.1170 -{;						\
  2.1171 -    mov r##n##=r2;				\
  2.1172 -    nop.b 0x0;					\
  2.1173 -    bsw.0;					\
  2.1174 -    ;;						\
  2.1175 -};						\
  2.1176 -{;						\
  2.1177 -    mov r2=r26;					\
  2.1178 -    mov b0=r30;					\
  2.1179 -    br.sptk.many b0;				\
  2.1180 -    ;;						\
  2.1181 -};						\
  2.1182 +#define MOV_TO_REG(n)   \
  2.1183 +{;                      \
  2.1184 +    mov r##n##=r19;     \
  2.1185 +    mov b0=r30;         \
  2.1186 +    br.sptk.many b0;    \
  2.1187 +    ;;                  \
  2.1188 +};
  2.1189 +
  2.1190 +
  2.1191 +#define MOV_FROM_REG(n) \
  2.1192 +{;                      \
  2.1193 +    mov r19=r##n##;     \
  2.1194 +    mov b0=r30;         \
  2.1195 +    br.sptk.many b0;    \
  2.1196 +    ;;                  \
  2.1197 +};
  2.1198 +
  2.1199 +
  2.1200 +#define MOV_TO_BANK0_REG(n)                 \
  2.1201 +ENTRY_MIN_ALIGN(asm_mov_to_bank0_reg##n##); \
  2.1202 +{;                                          \
  2.1203 +    mov r26=r2;                             \
  2.1204 +    mov r2=r19;                             \
  2.1205 +    bsw.1;                                  \
  2.1206 +    ;;                                      \
  2.1207 +};                                          \
  2.1208 +{;                                          \
  2.1209 +    mov r##n##=r2;                          \
  2.1210 +    nop.b 0x0;                              \
  2.1211 +    bsw.0;                                  \
  2.1212 +    ;;                                      \
  2.1213 +};                                          \
  2.1214 +{;                                          \
  2.1215 +    mov r2=r26;                             \
  2.1216 +    mov b0=r30;                             \
  2.1217 +    br.sptk.many b0;                        \
  2.1218 +    ;;                                      \
  2.1219 +};                                          \
  2.1220  END(asm_mov_to_bank0_reg##n##)
  2.1221  
  2.1222  
  2.1223 -#define MOV_FROM_BANK0_REG(n)			\
  2.1224 -ENTRY_MIN_ALIGN(asm_mov_from_bank0_reg##n##);	\
  2.1225 -{;						\
  2.1226 -    mov r26=r2;					\
  2.1227 -    nop.b 0x0;					\
  2.1228 -    bsw.1;					\
  2.1229 -    ;;						\
  2.1230 -};						\
  2.1231 -{;						\
  2.1232 -    mov r2=r##n##;				\
  2.1233 -    nop.b 0x0;					\
  2.1234 -    bsw.0;					\
  2.1235 -    ;;						\
  2.1236 -};						\
  2.1237 -{;						\
  2.1238 -    mov r19=r2;					\
  2.1239 -    mov r2=r26;					\
  2.1240 -    mov b0=r30;					\
  2.1241 -};						\
  2.1242 -{;						\
  2.1243 -    nop.b 0x0;					\
  2.1244 -    nop.b 0x0;					\
  2.1245 -    br.sptk.many b0;				\
  2.1246 -    ;;						\
  2.1247 -};						\
  2.1248 +#define MOV_FROM_BANK0_REG(n)                   \
  2.1249 +ENTRY_MIN_ALIGN(asm_mov_from_bank0_reg##n##);   \
  2.1250 +{;                                              \
  2.1251 +    mov r26=r2;                                 \
  2.1252 +    nop.b 0x0;                                  \
  2.1253 +    bsw.1;                                      \
  2.1254 +    ;;                                          \
  2.1255 +};                                              \
  2.1256 +{;                                              \
  2.1257 +    mov r2=r##n##;                              \
  2.1258 +    nop.b 0x0;                                  \
  2.1259 +    bsw.0;                                      \
  2.1260 +    ;;                                          \
  2.1261 +};                                              \
  2.1262 +{;                                              \
  2.1263 +    mov r19=r2;                                 \
  2.1264 +    mov r2=r26;                                 \
  2.1265 +    mov b0=r30;                                 \
  2.1266 +};                                              \
  2.1267 +{;                                              \
  2.1268 +    nop.b 0x0;                                  \
  2.1269 +    nop.b 0x0;                                  \
  2.1270 +    br.sptk.many b0;                            \
  2.1271 +    ;;                                          \
  2.1272 +};                                              \
  2.1273  END(asm_mov_from_bank0_reg##n##)
  2.1274  
  2.1275  
  2.1276 -#define JMP_TO_MOV_TO_BANK0_REG(n)		\
  2.1277 -{;						\
  2.1278 -    nop.b 0x0;					\
  2.1279 -    nop.b 0x0;					\
  2.1280 -    br.sptk.many asm_mov_to_bank0_reg##n##;	\
  2.1281 -    ;;						\
  2.1282 -}    
  2.1283 +#define JMP_TO_MOV_TO_BANK0_REG(n)              \
  2.1284 +{;                                              \
  2.1285 +    nop.b 0x0;                                  \
  2.1286 +    nop.b 0x0;                                  \
  2.1287 +    br.sptk.many asm_mov_to_bank0_reg##n##;     \
  2.1288 +    ;;                                          \
  2.1289 +}
  2.1290  
  2.1291  
  2.1292 -#define JMP_TO_MOV_FROM_BANK0_REG(n)		\
  2.1293 -{;						\
  2.1294 -    nop.b 0x0;					\
  2.1295 -    nop.b 0x0;					\
  2.1296 -    br.sptk.many asm_mov_from_bank0_reg##n##;	\
  2.1297 -    ;;						\
  2.1298 +#define JMP_TO_MOV_FROM_BANK0_REG(n)            \
  2.1299 +{;                                              \
  2.1300 +    nop.b 0x0;                                  \
  2.1301 +    nop.b 0x0;                                  \
  2.1302 +    br.sptk.many asm_mov_from_bank0_reg##n##;   \
  2.1303 +    ;;                                          \
  2.1304  }
  2.1305  
  2.1306  
  2.1307 @@ -749,7 +870,7 @@ MOV_FROM_BANK0_REG(31)
  2.1308  
  2.1309  
  2.1310  // mov from reg table
  2.1311 -// r19:	value, r30: return address
  2.1312 +// r19:value, r30: return address
  2.1313  // r26 may be destroyed
  2.1314  ENTRY(asm_mov_from_reg)
  2.1315      MOV_FROM_REG(0)
  2.1316 @@ -884,29 +1005,30 @@ END(asm_mov_from_reg)
  2.1317  
  2.1318  
  2.1319  /* must be in bank 0
  2.1320 - * parameter:
  2.1321 - * r31: pr
  2.1322 - * r24: b0
  2.1323 + *  parameter:
  2.1324 + *  r31: pr
  2.1325 + *  r24: b0
  2.1326 + *  p2: whether increase IP
  2.1327 + *  p3: whether check vpsr.ic
  2.1328   */
  2.1329  ENTRY(vmx_resume_to_guest)
  2.1330 -    mov r16=cr.ipsr
  2.1331 -    ;;
  2.1332 +    // ip ++
  2.1333 +    (p2) mov r16=cr.ipsr
  2.1334 +    (p2)dep.z r30=1,IA64_PSR_RI_BIT,1
  2.1335      adds r19=IA64_VPD_BASE_OFFSET,r21
  2.1336 -    extr.u r17=r16,IA64_PSR_RI_BIT,2
  2.1337      ;;
  2.1338      ld8 r25=[r19]
  2.1339 -    add r17=1,r17
  2.1340 +    (p2) add r16=r30,r16
  2.1341      ;;
  2.1342 +    (p2) mov cr.ipsr=r16
  2.1343      adds r19= VPD_VPSR_START_OFFSET,r25
  2.1344 -    dep r16=r17,r16,IA64_PSR_RI_BIT,2
  2.1345      ;;
  2.1346 -    mov cr.ipsr=r16
  2.1347      ld8 r19=[r19]
  2.1348      ;;
  2.1349      mov r23=r31
  2.1350      mov r17=r0
  2.1351      //vps_resume_normal/handler
  2.1352 -    tbit.z p6,p7 = r19,IA64_PSR_IC_BIT		// p1=vpsr.ic
  2.1353 +    tbit.z p6,p7 = r19,IA64_PSR_IC_BIT  // p7=vpsr.ic
  2.1354      (p6) br.cond.sptk.many vmx_vps_resume_handler
  2.1355      (p7) br.cond.sptk.few vmx_vps_resume_normal
  2.1356  END(vmx_resume_to_guest)
  2.1357 @@ -931,7 +1053,7 @@ MOV_TO_BANK0_REG(31)
  2.1358  
  2.1359  
  2.1360  // mov to reg table
  2.1361 -// r19:	value, r30: return address
  2.1362 +// r19:value, r30: return address
  2.1363  ENTRY(asm_mov_to_reg)
  2.1364      MOV_TO_REG0
  2.1365      MOV_TO_REG(1)
     3.1 --- a/xen/arch/ia64/vmx/vmx_ivt.S	Thu May 15 14:18:38 2008 +0900
     3.2 +++ b/xen/arch/ia64/vmx/vmx_ivt.S	Thu May 15 14:53:48 2008 +0900
     3.3 @@ -967,21 +967,13 @@ END(vmx_single_step_trap)
     3.4  ENTRY(vmx_virtualization_fault)
     3.5  //    VMX_DBG_FAULT(37)
     3.6      mov r31=pr
     3.7 +    movl r30 = virtualization_fault_table
     3.8 +    mov r23=b0
     3.9      ;;
    3.10 -    cmp.eq p6,p0=EVENT_MOV_FROM_AR,r24
    3.11 -    cmp.eq p7,p0=EVENT_MOV_FROM_RR,r24
    3.12 -    cmp.eq p8,p0=EVENT_MOV_TO_RR,r24
    3.13 -    cmp.eq p9,p0=EVENT_RSM,r24
    3.14 -    cmp.eq p10,p0=EVENT_SSM,r24
    3.15 -    cmp.eq p11,p0=EVENT_MOV_TO_PSR,r24
    3.16 -    cmp.eq p12,p0=EVENT_THASH,r24 
    3.17 -    (p6) br.dptk.many vmx_asm_mov_from_ar
    3.18 -    (p7) br.dptk.many vmx_asm_mov_from_rr
    3.19 -    (p8) br.dptk.many vmx_asm_mov_to_rr
    3.20 -    (p9) br.dptk.many vmx_asm_rsm
    3.21 -    (p10) br.dptk.many vmx_asm_ssm
    3.22 -    (p11) br.dptk.many vmx_asm_mov_to_psr
    3.23 -    (p12) br.dptk.many vmx_asm_thash
    3.24 +    shladd r30=r24,4,r30
    3.25 +    ;;
    3.26 +    mov b0=r30
    3.27 +    br.sptk.many b0
    3.28      ;;
    3.29  vmx_virtualization_fault_back:
    3.30      mov r19=37
    3.31 @@ -990,23 +982,6 @@ vmx_virtualization_fault_back:
    3.32      ;;
    3.33      st8 [r16] = r24
    3.34      st8 [r17] = r25
    3.35 -    ;;
    3.36 -    cmp.ne p6,p0=EVENT_RFI, r24
    3.37 -    (p6) br.sptk vmx_dispatch_virtualization_fault
    3.38 -    ;;
    3.39 -    adds r18=IA64_VPD_BASE_OFFSET,r21
    3.40 -    ;;
    3.41 -    ld8 r18=[r18]
    3.42 -    ;;
    3.43 -    adds r18=IA64_VPD_VIFS_OFFSET,r18
    3.44 -    ;;
    3.45 -    ld8 r18=[r18]
    3.46 -    ;;
    3.47 -    tbit.z p6,p0=r18,63
    3.48 -    (p6) br.sptk vmx_dispatch_virtualization_fault
    3.49 -    ;;
    3.50 -    //if vifs.v=1 desert current register frame
    3.51 -    alloc r18=ar.pfs,0,0,0,0
    3.52      br.sptk vmx_dispatch_virtualization_fault
    3.53  END(vmx_virtualization_fault)
    3.54  
     4.1 --- a/xen/arch/ia64/vmx/vmx_phy_mode.c	Thu May 15 14:18:38 2008 +0900
     4.2 +++ b/xen/arch/ia64/vmx/vmx_phy_mode.c	Thu May 15 14:53:48 2008 +0900
     4.3 @@ -228,6 +228,33 @@ static int mm_switch_action(IA64_PSR ops
     4.4      return mm_switch_table[MODE_IND(opsr)][MODE_IND(npsr)];
     4.5  }
     4.6  
     4.7 +/* In fast path, psr.ic = 0, psr.i = 0, psr.bn = 0
     4.8 + * so that no tlb miss is allowed.
     4.9 + */
    4.10 +void
    4.11 +switch_mm_mode_fast(VCPU *vcpu, IA64_PSR old_psr, IA64_PSR new_psr)
    4.12 +{
    4.13 +    int act;
    4.14 +    act = mm_switch_action(old_psr, new_psr);
    4.15 +    switch (act) {
    4.16 +    case SW_2P_DT:
    4.17 +        vcpu->arch.arch_vmx.mmu_mode = VMX_MMU_PHY_DT;
    4.18 +        switch_to_physical_rid(vcpu);
    4.19 +        break;
    4.20 +    case SW_2P_D:
    4.21 +        vcpu->arch.arch_vmx.mmu_mode = VMX_MMU_PHY_D;
    4.22 +        switch_to_physical_rid(vcpu);
    4.23 +        break;
    4.24 +    case SW_2V:
    4.25 +        vcpu->arch.arch_vmx.mmu_mode = VMX_MMU_VIRTUAL;
    4.26 +        switch_to_virtual_rid(vcpu);
    4.27 +        break;
    4.28 +    default:
    4.29 +        break;
    4.30 +    }
    4.31 +    return;
    4.32 +}
    4.33 +
    4.34  void
    4.35  switch_mm_mode(VCPU *vcpu, IA64_PSR old_psr, IA64_PSR new_psr)
    4.36  {
     5.1 --- a/xen/arch/ia64/vmx/vmx_vcpu.c	Thu May 15 14:18:38 2008 +0900
     5.2 +++ b/xen/arch/ia64/vmx/vmx_vcpu.c	Thu May 15 14:53:48 2008 +0900
     5.3 @@ -168,6 +168,34 @@ IA64FAULT vmx_vcpu_cover(VCPU *vcpu)
     5.4      return (IA64_NO_FAULT);
     5.5  }
     5.6  
     5.7 +/* In fast path, psr.ic = 0, psr.i = 0, psr.bn = 0
     5.8 + * so that no tlb miss is allowed.
     5.9 + */
    5.10 +void vmx_vcpu_set_rr_fast(VCPU *vcpu, u64 reg, u64 val)
    5.11 +{
    5.12 +    u64 rrval;
    5.13 +
    5.14 +    VMX(vcpu, vrr[reg >> VRN_SHIFT]) = val;
    5.15 +    switch((u64)(reg >> VRN_SHIFT)) {
    5.16 +    case VRN4:
    5.17 +        rrval = vrrtomrr(vcpu, val);
    5.18 +        vcpu->arch.metaphysical_saved_rr4 = rrval;
    5.19 +        if (is_virtual_mode(vcpu) && likely(vcpu == current))
    5.20 +            ia64_set_rr(reg, rrval);
    5.21 +        break;
    5.22 +    case VRN0:
    5.23 +        rrval = vrrtomrr(vcpu, val);
    5.24 +        vcpu->arch.metaphysical_saved_rr0 = rrval;
    5.25 +        if (is_virtual_mode(vcpu) && likely(vcpu == current))
    5.26 +            ia64_set_rr(reg, rrval);
    5.27 +        break;
    5.28 +    default:
    5.29 +        if (likely(vcpu == current))
    5.30 +            ia64_set_rr(reg, vrrtomrr(vcpu, val));
    5.31 +        break;
    5.32 +    }
    5.33 +}
    5.34 +
    5.35  IA64FAULT vmx_vcpu_set_rr(VCPU *vcpu, u64 reg, u64 val)
    5.36  {
    5.37      u64 rrval;
    5.38 @@ -246,8 +274,138 @@ u64 vmx_vcpu_get_itir_on_fault(VCPU *vcp
    5.39      return (rr1.rrval);
    5.40  }
    5.41  
    5.42 +/* In fast path, psr.ic = 0, psr.i = 0, psr.bn = 0
    5.43 + * so that no tlb miss is allowed.
    5.44 + */
    5.45 +void vmx_vcpu_mov_to_psr_fast(VCPU *vcpu, u64 value)
    5.46 +{
    5.47 +    /* TODO: Only allowed for current vcpu */
    5.48 +    u64 old_vpsr, new_vpsr, mipsr, mask;
    5.49 +    old_vpsr = VCPU(vcpu, vpsr);
    5.50  
    5.51 +    new_vpsr = (old_vpsr & 0xffffffff00000000) | (value & 0xffffffff);
    5.52 +    VCPU(vcpu, vpsr) = new_vpsr;
    5.53  
    5.54 +    mipsr = ia64_getreg(_IA64_REG_CR_IPSR);
    5.55 +
    5.56 +    /* xenoprof:
    5.57 +     * don't change psr.pp.
    5.58 +     * It is manipulated by xenoprof.
    5.59 +     */
    5.60 +    mask = 0xffffffff00000000 | IA64_PSR_IC | IA64_PSR_I 
    5.61 +        | IA64_PSR_DT  | IA64_PSR_PP | IA64_PSR_SI | IA64_PSR_RT;
    5.62 +
    5.63 +    mipsr = (mipsr & mask) | (value & (~mask));
    5.64 +
    5.65 +    if (FP_PSR(vcpu) & IA64_PSR_DFH)
    5.66 +         mipsr |= IA64_PSR_DFH;
    5.67 +
    5.68 +    ia64_setreg(_IA64_REG_CR_IPSR, mipsr);
    5.69 +
    5.70 +    switch_mm_mode_fast(vcpu, (IA64_PSR)old_vpsr, (IA64_PSR)new_vpsr);
    5.71 +}
    5.72 +
    5.73 +#define IA64_PSR_MMU_VIRT (IA64_PSR_DT | IA64_PSR_RT | IA64_PSR_IT)
    5.74 +/* In fast path, psr.ic = 0, psr.i = 0, psr.bn = 0
    5.75 + * so that no tlb miss is allowed.
    5.76 + */
    5.77 +void vmx_vcpu_rfi_fast(VCPU *vcpu)
    5.78 +{
    5.79 +    /* TODO: Only allowed for current vcpu */
    5.80 +    u64 vifs, vipsr, vpsr, mipsr, mask;
    5.81 +    vipsr = VCPU(vcpu, ipsr);
    5.82 +    vpsr = VCPU(vcpu, vpsr);
    5.83 +    vifs = VCPU(vcpu, ifs);
    5.84 +    if (vipsr & IA64_PSR_BN) {
    5.85 +        if(!(vpsr & IA64_PSR_BN))
    5.86 +             vmx_asm_bsw1();
    5.87 +    } else if (vpsr & IA64_PSR_BN)
    5.88 +             vmx_asm_bsw0();
    5.89 +
    5.90 +    /*
    5.91 +     *  For those IA64_PSR bits: id/da/dd/ss/ed/ia
    5.92 +     *  Since these bits will become 0, after success execution of each
    5.93 +     *  instruction, we will change set them to mIA64_PSR
    5.94 +     */
    5.95 +    VCPU(vcpu, vpsr) = vipsr & (~ (IA64_PSR_ID |IA64_PSR_DA 
    5.96 +                | IA64_PSR_DD | IA64_PSR_ED | IA64_PSR_IA));    
    5.97 +
    5.98 +    /*
    5.99 +     * All vIA64_PSR bits shall go to mPSR (v->tf->tf_special.psr)
   5.100 +     * , except for the following bits:
   5.101 +     * ic/i/dt/si/rt/mc/it/bn/vm
   5.102 +     */
   5.103 +    /* xenoprof */
   5.104 +    mask = (IA64_PSR_IC | IA64_PSR_I | IA64_PSR_DT | IA64_PSR_SI |
   5.105 +            IA64_PSR_RT | IA64_PSR_MC | IA64_PSR_IT | IA64_PSR_BN |
   5.106 +            IA64_PSR_VM | IA64_PSR_PP);
   5.107 +    mipsr = ia64_getreg(_IA64_REG_CR_IPSR);
   5.108 +    mipsr = (mipsr & mask) | (vipsr & (~mask));
   5.109 +
   5.110 +    if (FP_PSR(vcpu) & IA64_PSR_DFH)
   5.111 +         mipsr |= IA64_PSR_DFH;
   5.112 +
   5.113 +    ia64_setreg(_IA64_REG_CR_IPSR, mipsr);
   5.114 +    vmx_ia64_set_dcr(vcpu);
   5.115 +
   5.116 +    if(vifs >> 63)
   5.117 +        ia64_setreg(_IA64_REG_CR_IFS, vifs);
   5.118 +
   5.119 +    ia64_setreg(_IA64_REG_CR_IIP, VCPU(vcpu, iip));
   5.120 +
   5.121 +    switch_mm_mode_fast(vcpu, (IA64_PSR)vpsr, (IA64_PSR)vipsr);
   5.122 +}
   5.123 +
   5.124 +/* In fast path, psr.ic = 0, psr.i = 0, psr.bn = 0
   5.125 + * so that no tlb miss is allowed.
   5.126 + */
   5.127 +void vmx_vcpu_ssm_fast(VCPU *vcpu, u64 imm24)
   5.128 +{
   5.129 +    u64  old_vpsr, new_vpsr, mipsr;
   5.130 +
   5.131 +    old_vpsr = VCPU(vcpu, vpsr);
   5.132 +    new_vpsr = old_vpsr | imm24;
   5.133 +
   5.134 +    VCPU(vcpu, vpsr) = new_vpsr;
   5.135 +
   5.136 +    mipsr = ia64_getreg(_IA64_REG_CR_IPSR);
   5.137 +    /* xenoprof:
   5.138 +     * don't change psr.pp.
   5.139 +     * It is manipulated by xenoprof.
   5.140 +     */
   5.141 +    mipsr |= imm24 & (~IA64_PSR_PP);
   5.142 +    ia64_setreg(_IA64_REG_CR_IPSR, mipsr);
   5.143 +
   5.144 +    switch_mm_mode_fast(vcpu, (IA64_PSR)old_vpsr, (IA64_PSR)new_vpsr);
   5.145 +}
   5.146 +
   5.147 +/* In fast path, psr.ic = 0, psr.i = 0, psr.bn = 0
   5.148 + * so that no tlb miss is allowed.
   5.149 + */
   5.150 +void vmx_vcpu_rsm_fast(VCPU *vcpu, u64 imm24)
   5.151 +{
   5.152 +    u64  old_vpsr, new_vpsr, mipsr;
   5.153 +
   5.154 +    old_vpsr = VCPU(vcpu, vpsr);
   5.155 +    new_vpsr = old_vpsr & ~imm24;
   5.156 +
   5.157 +    VCPU(vcpu, vpsr) = new_vpsr;
   5.158 +
   5.159 +    mipsr = ia64_getreg(_IA64_REG_CR_IPSR);
   5.160 +    /* xenoprof:
   5.161 +     * don't change psr.pp.
   5.162 +     * It is manipulated by xenoprof.
   5.163 +     */
   5.164 +    mipsr &= (~imm24) | IA64_PSR_PP;
   5.165 +    mipsr |= IA64_PSR_IC | IA64_PSR_I | IA64_PSR_DT | IA64_PSR_SI;
   5.166 +
   5.167 +    if (FP_PSR(vcpu) & IA64_PSR_DFH)
   5.168 +         mipsr |= IA64_PSR_DFH;
   5.169 +
   5.170 +    ia64_setreg(_IA64_REG_CR_IPSR, mipsr);
   5.171 +
   5.172 +    switch_mm_mode_fast(vcpu, (IA64_PSR)old_vpsr, (IA64_PSR)new_vpsr);
   5.173 +}
   5.174  
   5.175  IA64FAULT vmx_vcpu_rfi(VCPU *vcpu)
   5.176  {
     6.1 --- a/xen/include/asm-ia64/vmx_phy_mode.h	Thu May 15 14:18:38 2008 +0900
     6.2 +++ b/xen/include/asm-ia64/vmx_phy_mode.h	Thu May 15 14:53:48 2008 +0900
     6.3 @@ -79,7 +79,8 @@ extern void physical_mode_init(VCPU *);
     6.4  extern void switch_to_physical_rid(VCPU *);
     6.5  extern void switch_to_virtual_rid(VCPU *vcpu);
     6.6  extern void switch_mm_mode(VCPU *vcpu, IA64_PSR old_psr, IA64_PSR new_psr);
     6.7 -extern void check_mm_mode_switch (VCPU *vcpu,  IA64_PSR old_psr, IA64_PSR new_psr);
     6.8 +extern void switch_mm_mode_fast(VCPU *vcpu, IA64_PSR old_psr, IA64_PSR new_psr);
     6.9 +extern void check_mm_mode_switch(VCPU *vcpu,  IA64_PSR old_psr, IA64_PSR new_psr);
    6.10  extern void prepare_if_physical_mode(VCPU *vcpu);
    6.11  extern void recover_if_physical_mode(VCPU *vcpu);
    6.12  extern void vmx_init_all_rr(VCPU *vcpu);
     7.1 --- a/xen/include/asm-ia64/vmx_vcpu.h	Thu May 15 14:18:38 2008 +0900
     7.2 +++ b/xen/include/asm-ia64/vmx_vcpu.h	Thu May 15 14:53:48 2008 +0900
     7.3 @@ -106,6 +106,8 @@ extern void vcpu_load_kernel_regs(VCPU *
     7.4  extern void vmx_switch_rr7(unsigned long, void *, void *, void *);
     7.5  extern void vmx_ia64_set_dcr(VCPU * v);
     7.6  extern void inject_guest_interruption(struct vcpu *vcpu, u64 vec);
     7.7 +extern void vmx_asm_bsw0(void);
     7.8 +extern void vmx_asm_bsw1(void);
     7.9  
    7.10  /**************************************************************************
    7.11   VCPU control register access routines