v->arch.guest_table_user = pagetable_null();
}
+unsigned long pv_fixup_guest_cr4(const struct vcpu *v, unsigned long cr4)
+{
+ unsigned int leaf1_ecx = 0, leaf1_edx = 0;
+ unsigned int leaf7_0_ebx = 0, level = 0;
+
+ pv_cpuid(0, 0, &level, NULL, NULL, NULL);
+ if ( level >= 1 )
+ pv_cpuid(1, 0, NULL, NULL, &leaf1_ecx, &leaf1_edx);
+ if ( level >= 7 )
+ pv_cpuid(7, 0, NULL, &leaf7_0_ebx, NULL, NULL);
+
+ /* Discard attempts to set guest controllable bits outside of the policy. */
+ cr4 &= ~(((leaf1_edx & cpufeat_mask(X86_FEATURE_TSC))
+ ? 0 : X86_CR4_TSD) |
+ ((leaf1_edx & cpufeat_mask(X86_FEATURE_DE))
+ ? 0 : X86_CR4_DE) |
+ ((leaf7_0_ebx & cpufeat_mask(X86_FEATURE_FSGSBASE))
+ ? 0 : X86_CR4_FSGSBASE) |
+ ((leaf1_ecx & cpufeat_mask(X86_FEATURE_XSAVE))
+ ? 0 : X86_CR4_OSXSAVE));
+
+ /* Masks expected to be disjoint sets. */
+ BUILD_BUG_ON(PV_CR4_GUEST_MASK & PV_CR4_GUEST_VISIBLE_MASK);
+
+ /*
+ * A guest sees the policy subset of its own choice of guest controllable
+ * bits, and a subset of Xen's choice of certain hardware settings.
+ */
+ return ((cr4 & PV_CR4_GUEST_MASK) |
+ (mmu_cr4_features & PV_CR4_GUEST_VISIBLE_MASK));
+}
+
+unsigned long pv_make_cr4(const struct vcpu *v)
+{
+ const struct domain *d = v->domain;
+ unsigned long cr4 = mmu_cr4_features &
+ ~(X86_CR4_PCIDE | X86_CR4_PGE | X86_CR4_TSD);
+
+ /*
+ * PCIDE or PGE depends on the PCID/XPTI settings, but must not both be
+ * set, as it impacts the safety of TLB flushing.
+ */
+ if ( d->arch.pv_domain.pcid )
+ cr4 |= X86_CR4_PCIDE;
+ else if ( !d->arch.pv_domain.xpti )
+ cr4 |= X86_CR4_PGE;
+
+ /*
+ * TSD is needed if either the guest has elected to use it, or Xen is
+ * virtualising the TSC value the guest sees.
+ */
+ if ( d->arch.vtsc || (v->arch.pv_vcpu.ctrlreg[4] & X86_CR4_TSD) )
+ cr4 |= X86_CR4_TSD;
+
+ return cr4;
+}
+
static void set_domain_xpti(struct domain *d)
{
if ( is_pv_32bit_domain(d) )
/* PV guests by default have a 100Hz ticker. */
v->periodic_period = MILLISECS(10);
+
+ v->arch.pv_vcpu.ctrlreg[4] = pv_fixup_guest_cr4(v, 0);
}
v->arch.schedule_tail = continue_nonidle_domain;
v->arch.cr3 = __pa(idle_pg_table);
}
- v->arch.pv_vcpu.ctrlreg[4] = real_cr4_to_pv_guest_cr4(mmu_cr4_features);
-
if ( is_pv_32bit_domain(d) )
{
if ( (rc = setup_compat_arg_xlat(v)) )
return ret;
}
-/*
- * These are the masks of CR4 bits (subject to hardware availability) which a
- * PV guest may not legitimiately attempt to modify.
- */
-static unsigned long __read_mostly pv_cr4_mask, compat_pv_cr4_mask;
-
-static int __init init_pv_cr4_masks(void)
-{
- unsigned long common_mask = ~X86_CR4_TSD;
-
- /*
- * All PV guests may attempt to modify TSD, DE and OSXSAVE.
- */
- if ( cpu_has_de )
- common_mask &= ~X86_CR4_DE;
- if ( cpu_has_xsave )
- common_mask &= ~X86_CR4_OSXSAVE;
-
- pv_cr4_mask = compat_pv_cr4_mask = common_mask;
-
- /*
- * 64bit PV guests may attempt to modify FSGSBASE.
- */
- if ( cpu_has_fsgsbase )
- pv_cr4_mask &= ~X86_CR4_FSGSBASE;
-
- return 0;
-}
-__initcall(init_pv_cr4_masks);
-
-unsigned long pv_guest_cr4_fixup(const struct vcpu *v, unsigned long guest_cr4)
-{
- unsigned long hv_cr4 = real_cr4_to_pv_guest_cr4(read_cr4());
- unsigned long mask = is_pv_32bit_vcpu(v) ? compat_pv_cr4_mask : pv_cr4_mask;
-
- if ( (guest_cr4 & mask) != (hv_cr4 & mask) )
- printk(XENLOG_G_WARNING
- "d%d attempted to change %pv's CR4 flags %08lx -> %08lx\n",
- current->domain->domain_id, v, hv_cr4, guest_cr4);
-
- return (hv_cr4 & mask) | (guest_cr4 & ~mask);
-}
-
#define xen_vcpu_guest_context vcpu_guest_context
#define fpu_ctxt fpu_ctxt.x
CHECK_FIELD_(struct, vcpu_guest_context, fpu_ctxt);
struct domain *d = v->domain;
unsigned long cr3_gfn;
struct page_info *cr3_page;
- unsigned long flags, cr4;
+ unsigned long flags;
unsigned int i;
int rc = 0, compat;
v->arch.pv_vcpu.ctrlreg[0] &= X86_CR0_TS;
v->arch.pv_vcpu.ctrlreg[0] |= read_cr0() & ~X86_CR0_TS;
- cr4 = v->arch.pv_vcpu.ctrlreg[4];
- v->arch.pv_vcpu.ctrlreg[4] = cr4 ? pv_guest_cr4_fixup(v, cr4) :
- real_cr4_to_pv_guest_cr4(mmu_cr4_features);
+ v->arch.pv_vcpu.ctrlreg[4] =
+ pv_fixup_guest_cr4(v, v->arch.pv_vcpu.ctrlreg[4]);
memset(v->arch.debugreg, 0, sizeof(v->arch.debugreg));
for ( i = 0; i < 8; i++ )
v->arch.cr3 |= get_pcid_bits(v, 0);
}
-unsigned long pv_guest_cr4_to_real_cr4(const struct vcpu *v)
-{
- const struct domain *d = v->domain;
- unsigned long cr4;
-
- cr4 = v->arch.pv_vcpu.ctrlreg[4] & ~X86_CR4_DE;
- cr4 |= mmu_cr4_features & (X86_CR4_PSE | X86_CR4_SMEP | X86_CR4_SMAP |
- X86_CR4_OSXSAVE | X86_CR4_FSGSBASE);
-
- if ( d->arch.pv_domain.pcid )
- cr4 |= X86_CR4_PCIDE;
- else if ( !d->arch.pv_domain.xpti )
- cr4 |= X86_CR4_PGE;
-
- cr4 |= d->arch.vtsc ? X86_CR4_TSD : 0;
-
- return cr4;
-}
-
void write_ptbase(struct vcpu *v)
{
struct cpu_info *cpu_info = get_cpu_info();
unsigned long new_cr4;
new_cr4 = (is_pv_vcpu(v) && !is_idle_vcpu(v))
- ? pv_guest_cr4_to_real_cr4(v)
- : ((read_cr4() & ~(X86_CR4_PCIDE | X86_CR4_TSD)) | X86_CR4_PGE);
+ ? pv_make_cr4(v) : mmu_cr4_features;
if ( is_pv_vcpu(v) && v->domain->arch.pv_domain.xpti )
{
switch_cr3_cr4(v->arch.cr3, new_cr4);
cpu_info->pv_cr3 = 0;
}
-
- ASSERT(is_pv_vcpu(v) || read_cr4() == mmu_cr4_features);
}
/*
unsigned int *eax, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx)
{
- if ( !is_control_domain(currd) && !is_hardware_domain(currd) )
+ if ( !is_control_domain(currd) && !is_hardware_domain(currd) &&
+ !is_idle_domain(currd) )
domain_cpuid(currd, leaf, subleaf, eax, ebx, ecx, edx);
else
cpuid_count(leaf, subleaf, eax, ebx, ecx, edx);
}
case 4: /* Write CR4 */
- v->arch.pv_vcpu.ctrlreg[4] = pv_guest_cr4_fixup(v, *reg);
- write_cr4(pv_guest_cr4_to_real_cr4(v));
+ v->arch.pv_vcpu.ctrlreg[4] = pv_fixup_guest_cr4(v, *reg);
+ write_cr4(pv_make_cr4(v));
ctxt_switch_levelling(v);
break;
void vcpu_show_execution_state(struct vcpu *);
void vcpu_show_registers(const struct vcpu *);
-/* Clean up CR4 bits that are not under guest control. */
-unsigned long pv_guest_cr4_fixup(const struct vcpu *, unsigned long guest_cr4);
+/*
+ * Bits which a PV guest can toggle in its view of cr4. Some are loaded into
+ * hardware, while some are fully emulated.
+ */
+#define PV_CR4_GUEST_MASK \
+ (X86_CR4_TSD | X86_CR4_DE | X86_CR4_FSGSBASE | X86_CR4_OSXSAVE)
+
+/* Bits which a PV guest may observe from the real hardware settings. */
+#define PV_CR4_GUEST_VISIBLE_MASK \
+ (X86_CR4_PAE | X86_CR4_MCE | X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT)
-/* Convert between guest-visible and real CR4 values. */
-unsigned long pv_guest_cr4_to_real_cr4(const struct vcpu *v);
+/* Given a new cr4 value, construct the resulting guest-visible cr4 value. */
+unsigned long pv_fixup_guest_cr4(const struct vcpu *v, unsigned long cr4);
-#define real_cr4_to_pv_guest_cr4(c) \
- ((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD | \
- X86_CR4_OSXSAVE | X86_CR4_SMEP | \
- X86_CR4_FSGSBASE | X86_CR4_SMAP | X86_CR4_PCIDE))
+/* Create a cr4 value to load into hardware, based on vcpu settings. */
+unsigned long pv_make_cr4(const struct vcpu *v);
void domain_cpuid(struct domain *d,
unsigned int input,