]> xenbits.xensource.com Git - people/dariof/xen.git/commitdiff
x86: improve MSR_SHADOW_GS accesses
authorJan Beulich <jbeulich@suse.com>
Fri, 9 Mar 2018 16:29:45 +0000 (17:29 +0100)
committerJan Beulich <jbeulich@suse.com>
Fri, 9 Mar 2018 16:29:45 +0000 (17:29 +0100)
Instead of using RDMSR/WRMSR, on fsgsbase-capable systems use a double
SWAPGS combined with RDGSBASE/WRGSBASE. This halves execution time for
a shadow GS update alone on my Haswell (and we have indications of
good performance improvements by this on Skylake too), while the win is
even higher when e.g. updating more than one base (as may and commonly
will happen in load_segments()).

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
xen/arch/x86/domain.c
xen/arch/x86/hvm/vmx/vmx.c
xen/arch/x86/pv/emul-priv-op.c
xen/arch/x86/x86_64/mm.c
xen/arch/x86/x86_64/traps.c
xen/include/asm-x86/msr.h

index 69679a652583bcc390ad517046c4b91526833aad..b4e062472e729d9b2486390c7c6614983830d177 100644 (file)
@@ -1338,9 +1338,12 @@ static void load_segments(struct vcpu *n)
         if ( n->arch.pv_vcpu.fs_base | (dirty_segment_mask & DIRTY_FS_BASE) )
             wrfsbase(n->arch.pv_vcpu.fs_base);
 
-        /* Most kernels have non-zero GS base, so don't bother testing. */
-        /* (This is also a serialising instruction, avoiding AMD erratum #88.) */
-        wrmsrl(MSR_SHADOW_GS_BASE, n->arch.pv_vcpu.gs_base_kernel);
+        /*
+         * Most kernels have non-zero GS base, so don't bother testing.
+         * (For old AMD hardware this is also a serialising instruction,
+         * avoiding erratum #88.)
+         */
+        wrgsshadow(n->arch.pv_vcpu.gs_base_kernel);
 
         /* This can only be non-zero if selector is NULL. */
         if ( n->arch.pv_vcpu.gs_base_user |
index 18d8ce230324bbc9ce70b375f0fbc86df2c9f5b7..c7c8a0812f2eec1a33137a8eecb9fe5fa0d4bd4a 100644 (file)
@@ -503,12 +503,12 @@ static void vmx_save_guest_msrs(struct vcpu *v)
      * We cannot cache SHADOW_GS_BASE while the VCPU runs, as it can
      * be updated at any time via SWAPGS, which we cannot trap.
      */
-    rdmsrl(MSR_SHADOW_GS_BASE, v->arch.hvm_vmx.shadow_gs);
+    v->arch.hvm_vmx.shadow_gs = rdgsshadow();
 }
 
 static void vmx_restore_guest_msrs(struct vcpu *v)
 {
-    wrmsrl(MSR_SHADOW_GS_BASE, v->arch.hvm_vmx.shadow_gs);
+    wrgsshadow(v->arch.hvm_vmx.shadow_gs);
     wrmsrl(MSR_STAR,           v->arch.hvm_vmx.star);
     wrmsrl(MSR_LSTAR,          v->arch.hvm_vmx.lstar);
     wrmsrl(MSR_SYSCALL_MASK,   v->arch.hvm_vmx.sfmask);
@@ -2846,7 +2846,7 @@ static int vmx_msr_read_intercept(unsigned int msr, uint64_t *msr_content)
         break;
 
     case MSR_SHADOW_GS_BASE:
-        rdmsrl(MSR_SHADOW_GS_BASE, *msr_content);
+        *msr_content = rdgsshadow();
         break;
 
     case MSR_STAR:
@@ -3065,7 +3065,7 @@ static int vmx_msr_write_intercept(unsigned int msr, uint64_t msr_content)
         else if ( msr == MSR_GS_BASE )
             __vmwrite(GUEST_GS_BASE, msr_content);
         else
-            wrmsrl(MSR_SHADOW_GS_BASE, msr_content);
+            wrgsshadow(msr_content);
 
         break;
 
index ecb3b9c47b7a6d46c1a01ebd01af71e075784f37..af58544a3aa0a560b0923954b71c90fc3435ed67 100644 (file)
@@ -1032,7 +1032,7 @@ static int write_msr(unsigned int reg, uint64_t val,
     case MSR_SHADOW_GS_BASE:
         if ( is_pv_32bit_domain(currd) || !is_canonical_address(val) )
             break;
-        wrmsrl(MSR_SHADOW_GS_BASE, val);
+        wrgsshadow(val);
         curr->arch.pv_vcpu.gs_base_user = val;
         return X86EMUL_OKAY;
 
index 9b37da6698cb8ff2c5a628dd206b4275fcf52044..2dfb6af5e9d6cfd19edf97a55cebe62eb0474b39 100644 (file)
@@ -1034,7 +1034,7 @@ long do_set_segment_base(unsigned int which, unsigned long base)
     case SEGBASE_GS_USER:
         if ( is_canonical_address(base) )
         {
-            wrmsrl(MSR_SHADOW_GS_BASE, base);
+            wrgsshadow(base);
             v->arch.pv_vcpu.gs_base_user = base;
         }
         else
index 4649ad4d102794a37a594799dc154cf6167d7797..4f85c32f92fe563d730bcf6f3bae1263036e5ac6 100644 (file)
@@ -49,7 +49,7 @@ static void read_registers(struct cpu_user_regs *regs, unsigned long crs[8])
     regs->gs = read_sreg(gs);
     crs[5] = rdfsbase();
     crs[6] = rdgsbase();
-    rdmsrl(MSR_SHADOW_GS_BASE, crs[7]);
+    crs[7] = rdgsshadow();
 }
 
 static void _show_registers(
index b289b582f4dd78dc456f40375f03c4b4485e0eb2..f14f265aa55c70ffa3a109bb50454a439521a6b1 100644 (file)
@@ -170,6 +170,22 @@ static inline unsigned long rdgsbase(void)
     return base;
 }
 
+static inline unsigned long rdgsshadow(void)
+{
+    unsigned long base;
+
+    if ( cpu_has_fsgsbase )
+    {
+        asm volatile ( "swapgs" );
+        base = __rdgsbase();
+        asm volatile ( "swapgs" );
+    }
+    else
+        rdmsrl(MSR_SHADOW_GS_BASE, base);
+
+    return base;
+}
+
 static inline void wrfsbase(unsigned long base)
 {
     if ( cpu_has_fsgsbase )
@@ -194,6 +210,25 @@ static inline void wrgsbase(unsigned long base)
         wrmsrl(MSR_GS_BASE, base);
 }
 
+static inline void wrgsshadow(unsigned long base)
+{
+    if ( cpu_has_fsgsbase )
+    {
+        asm volatile ( "swapgs\n\t"
+#ifdef HAVE_AS_FSGSBASE
+                       "wrgsbase %0\n\t"
+                       "swapgs"
+                       :: "r" (base) );
+#else
+                       ".byte 0xf3, 0x48, 0x0f, 0xae, 0xd8\n\t"
+                       "swapgs"
+                       :: "a" (base) );
+#endif
+    }
+    else
+        wrmsrl(MSR_SHADOW_GS_BASE, base);
+}
+
 DECLARE_PER_CPU(uint64_t, efer);
 static inline uint64_t read_efer(void)
 {