per_cpu(dirty_segment_mask, cpu) = 0;
#ifdef CONFIG_HVM
- if ( !is_pv_32bit_vcpu(n) && !cpu_has_fsgsbase && cpu_has_svm &&
- !((uregs->fs | uregs->gs) & ~3) &&
- /*
- * The remaining part is just for optimization: If only shadow GS
- * needs loading, there's nothing to be gained here.
- */
- (n->arch.pv.fs_base | n->arch.pv.gs_base_user | n->arch.pv.ldt_ents) )
+ if ( cpu_has_svm && !is_pv_32bit_vcpu(n) &&
+ !(read_cr4() & X86_CR4_FSGSBASE) && !((uregs->fs | uregs->gs) & ~3) )
{
unsigned long gsb = n->arch.flags & TF_kernel_mode
? n->arch.pv.gs_base_kernel : n->arch.pv.gs_base_user;
regs->fs = read_sreg(fs);
regs->gs = read_sreg(gs);
- if ( cpu_has_fsgsbase && !is_pv_32bit_vcpu(v) )
+ /* %fs/%gs bases can only be stale if WR{FS,GS}BASE are usable. */
+ if ( (read_cr4() & X86_CR4_FSGSBASE) && !is_pv_32bit_vcpu(v) )
{
v->arch.pv.fs_base = __rdfsbase();
if ( v->arch.flags & TF_kernel_mode )
#if defined(CONFIG_PV) && defined(CONFIG_HVM)
/* Prefetch the VMCB if we expect to use it later in the context switch */
- if ( is_pv_domain(nd) && !is_pv_32bit_domain(nd) && !is_idle_domain(nd) &&
- !cpu_has_fsgsbase && cpu_has_svm )
+ if ( cpu_has_svm && is_pv_domain(nd) && !is_pv_32bit_domain(nd) &&
+ !is_idle_domain(nd) && !(read_cr4() & X86_CR4_FSGSBASE) )
svm_load_segs(0, 0, 0, 0, 0, 0, 0);
#endif
goto err;
#ifdef CONFIG_PV
- if ( !cpu_has_fsgsbase )
- per_cpu(host_vmcb_va, cpu) = __map_domain_page_global(pg);
+ per_cpu(host_vmcb_va, cpu) = __map_domain_page_global(pg);
#endif
clear_domain_page(page_to_mfn(pg));
if ( d->arch.vtsc || (v->arch.pv.ctrlreg[4] & X86_CR4_TSD) )
cr4 |= X86_CR4_TSD;
+ /*
+ * The {RD,WR}{FS,GS}BASE are only useable in 64bit code segments. While
+ * we must not have CR4.FSGSBASE set behind the back of a 64bit PV kernel,
+ * we do leave it set in 32bit PV context to speed up Xen's context switch
+ * path.
+ */
+ if ( !is_pv_32bit_domain(d) && !(v->arch.pv.ctrlreg[4] & X86_CR4_FSGSBASE) )
+ cr4 &= ~X86_CR4_FSGSBASE;
+
return cr4;
}
{
ASSERT(!is_pv_32bit_vcpu(v));
- if ( cpu_has_fsgsbase )
+ /* %fs/%gs bases can only be stale if WR{FS,GS}BASE are usable. */
+ if ( read_cr4() & X86_CR4_FSGSBASE )
{
if ( v->arch.flags & TF_kernel_mode )
v->arch.pv.gs_base_kernel = __rdgsbase();
}
case 4: /* Write CR4 */
+ /*
+ * If this write will disable FSGSBASE, refresh Xen's idea of the
+ * guest bases now that they can no longer change.
+ */
+ if ( (curr->arch.pv.ctrlreg[4] & X86_CR4_FSGSBASE) &&
+ !(val & X86_CR4_FSGSBASE) )
+ {
+ curr->arch.pv.fs_base = __rdfsbase();
+ curr->arch.pv.gs_base_kernel = __rdgsbase();
+ }
+
curr->arch.pv.ctrlreg[4] = pv_fixup_guest_cr4(curr, val);
write_cr4(pv_make_cr4(curr));
ctxt_switch_levelling(curr);
case MSR_FS_BASE:
if ( is_pv_32bit_domain(currd) )
break;
- *val = cpu_has_fsgsbase ? __rdfsbase() : curr->arch.pv.fs_base;
+ *val = (read_cr4() & X86_CR4_FSGSBASE) ? __rdfsbase()
+ : curr->arch.pv.fs_base;
return X86EMUL_OKAY;
case MSR_GS_BASE:
if ( is_pv_32bit_domain(currd) )
break;
- *val = cpu_has_fsgsbase ? __rdgsbase()
- : curr->arch.pv.gs_base_kernel;
+ *val = (read_cr4() & X86_CR4_FSGSBASE) ? __rdgsbase()
+ : curr->arch.pv.gs_base_kernel;
return X86EMUL_OKAY;
case MSR_SHADOW_GS_BASE:
cr4_pv32_mask = mmu_cr4_features & XEN_CR4_PV32_BITS;
- if ( cpu_has_fsgsbase )
+ if ( boot_cpu_has(X86_FEATURE_FSGSBASE) )
set_in_cr4(X86_CR4_FSGSBASE);
if ( opt_invpcid && cpu_has_invpcid )
#define cpu_has_xsaves boot_cpu_has(X86_FEATURE_XSAVES)
/* CPUID level 0x00000007:0.ebx */
-#define cpu_has_fsgsbase boot_cpu_has(X86_FEATURE_FSGSBASE)
#define cpu_has_bmi1 boot_cpu_has(X86_FEATURE_BMI1)
#define cpu_has_hle boot_cpu_has(X86_FEATURE_HLE)
#define cpu_has_avx2 boot_cpu_has(X86_FEATURE_AVX2)
: "=a" (low), "=d" (high) \
: "c" (counter))
+/*
+ * On hardware supporting FSGSBASE, the value loaded into hardware is the
+ * guest kernel's choice for 64bit PV guests (Xen's choice for Idle, HVM and
+ * 32bit PV).
+ *
+ * Therefore, the {RD,WR}{FS,GS}BASE instructions are only safe to use if
+ * %cr4.fsgsbase is set.
+ */
static inline unsigned long __rdfsbase(void)
{
unsigned long base;
{
unsigned long base;
- if ( cpu_has_fsgsbase )
+ if ( read_cr4() & X86_CR4_FSGSBASE )
return __rdfsbase();
rdmsrl(MSR_FS_BASE, base);
{
unsigned long base;
- if ( cpu_has_fsgsbase )
+ if ( read_cr4() & X86_CR4_FSGSBASE )
return __rdgsbase();
rdmsrl(MSR_GS_BASE, base);
{
unsigned long base;
- if ( cpu_has_fsgsbase )
+ if ( read_cr4() & X86_CR4_FSGSBASE )
{
asm volatile ( "swapgs" );
base = __rdgsbase();
static inline void wrfsbase(unsigned long base)
{
- if ( cpu_has_fsgsbase )
+ if ( read_cr4() & X86_CR4_FSGSBASE )
#ifdef HAVE_AS_FSGSBASE
asm volatile ( "wrfsbase %0" :: "r" (base) );
#else
static inline void wrgsbase(unsigned long base)
{
- if ( cpu_has_fsgsbase )
+ if ( read_cr4() & X86_CR4_FSGSBASE )
#ifdef HAVE_AS_FSGSBASE
asm volatile ( "wrgsbase %0" :: "r" (base) );
#else
static inline void wrgsshadow(unsigned long base)
{
- if ( cpu_has_fsgsbase )
+ if ( read_cr4() & X86_CR4_FSGSBASE )
{
asm volatile ( "swapgs\n\t"
#ifdef HAVE_AS_FSGSBASE
static inline void write_cr4(unsigned long val)
{
+ struct cpu_info *info = get_cpu_info();
+
/* No global pages in case of PCIDs enabled! */
ASSERT(!(val & X86_CR4_PGE) || !(val & X86_CR4_PCIDE));
- get_cpu_info()->cr4 = val;
- asm volatile ( "mov %0,%%cr4" : : "r" (val) );
+ /*
+ * On hardware supporting FSGSBASE, the value in %cr4 is the kernel's
+ * choice for 64bit PV guests, which impacts whether Xen can use the
+ * instructions.
+ *
+ * The {rd,wr}{fs,gs}base() helpers use info->cr4 to work out whether it
+ * is safe to execute the {RD,WR}{FS,GS}BASE instruction, falling back to
+ * the MSR path if not. Some users require interrupt safety.
+ *
+ * If FSGSBASE is currently or about to become clear, reflect this in
+ * info->cr4 before updating %cr4, so an interrupt which hits in the
+ * middle won't observe FSGSBASE set in info->cr4 but clear in %cr4.
+ */
+ info->cr4 = val & (info->cr4 | ~X86_CR4_FSGSBASE);
+
+ asm volatile ( "mov %[val], %%cr4"
+ : "+m" (info->cr4) /* Force ordering without a barrier. */
+ : [val] "r" (val) );
+
+ info->cr4 = val;
}
/* Clear and set 'TS' bit respectively */