Currently a PV hardware domain can also be given control over the CPU
frequency, and such guest is allowed to write to MSR_IA32_PERF_CTL.
However since commit
322ec7c89f6 the default behavior has been changed
to reject accesses to not explicitly handled MSRs, preventing PV
guests that manage CPU frequency from reading
MSR_IA32_PERF_{STATUS/CTL}.
Additionally some HVM guests (Windows at least) will attempt to read
MSR_IA32_PERF_CTL and will panic if given back a #GP fault:
vmx.c:3035:d8v0 RDMSR 0x00000199 unimplemented
d8v0 VIRIDIAN CRASH: 3b
c0000096 fffff806871c1651 ffffda0253683720 0
Move the handling of MSR_IA32_PERF_{STATUS/CTL} to the common MSR
handling shared between HVM and PV guests, and add an explicit case
for reads to MSR_IA32_PERF_{STATUS/CTL}.
Restore previous behavior and allow PV guests with the required
permissions to read the contents of the mentioned MSRs. Non privileged
guests will get 0 when trying to read those registers, as writes to
MSR_IA32_PERF_CTL by such guest will already be silently dropped.
Fixes: 322ec7c89f6 ('x86/pv: disallow access to unknown MSRs')
Fixes: 84e848fd7a1 ('x86/hvm: disallow access to unknown MSRs')
Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
goto gp_fault;
break;
+ /*
+ * These MSRs are not enumerated in CPUID. They have been around
+ * since the Pentium 4, and implemented by other vendors.
+ *
+ * Some versions of Windows try reading these before setting up a #GP
+ * handler, and Linux has several unguarded reads as well. Provide
+ * RAZ semantics, in general, but permit a cpufreq controller dom0 to
+ * have full access.
+ */
+ case MSR_IA32_PERF_STATUS:
+ case MSR_IA32_PERF_CTL:
+ if ( !(cp->x86_vendor & (X86_VENDOR_INTEL | X86_VENDOR_CENTAUR)) )
+ goto gp_fault;
+
+ *val = 0;
+ if ( likely(!is_cpufreq_controller(d)) || rdmsr_safe(msr, *val) == 0 )
+ break;
+ goto gp_fault;
+
case MSR_IA32_THERM_STATUS:
if ( cp->x86_vendor != X86_VENDOR_INTEL )
goto gp_fault;
goto gp_fault;
break;
+ /*
+ * This MSR is not enumerated in CPUID. It has been around since the
+ * Pentium 4, and implemented by other vendors.
+ *
+ * To match the RAZ semantics, implement as write-discard, except for
+ * a cpufreq controller dom0 which has full access.
+ */
+ case MSR_IA32_PERF_CTL:
+ if ( !(cp->x86_vendor & (X86_VENDOR_INTEL | X86_VENDOR_CENTAUR)) )
+ goto gp_fault;
+
+ if ( likely(!is_cpufreq_controller(d)) || wrmsr_safe(msr, val) == 0 )
+ break;
+ goto gp_fault;
+
case MSR_X2APIC_FIRST ... MSR_X2APIC_LAST:
if ( !is_hvm_domain(d) || v != curr )
goto gp_fault;
return val;
}
-static inline bool is_cpufreq_controller(const struct domain *d)
-{
- return ((cpufreq_controller == FREQCTL_dom0_kernel) &&
- is_hardware_domain(d));
-}
-
static uint64_t guest_efer(const struct domain *d)
{
uint64_t val;
return X86EMUL_OKAY;
break;
- case MSR_IA32_PERF_CTL:
- if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
- break;
- if ( likely(!is_cpufreq_controller(currd)) ||
- wrmsr_safe(reg, val) == 0 )
- return X86EMUL_OKAY;
- break;
-
case MSR_IA32_THERM_CONTROL:
case MSR_IA32_ENERGY_PERF_BIAS:
if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
FREQCTL_none, FREQCTL_dom0_kernel, FREQCTL_xen
} cpufreq_controller;
+static always_inline bool is_cpufreq_controller(const struct domain *d)
+{
+ /*
+ * A PV dom0 can be nominated as the cpufreq controller, instead of using
+ * Xen's cpufreq driver, at which point dom0 gets direct access to certain
+ * MSRs.
+ *
+ * This interface only works when dom0 is identity pinned and has the same
+ * number of vCPUs as pCPUs on the system.
+ *
+ * It would be far better to paravirtualise the interface.
+ */
+ return (is_pv_domain(d) && is_hardware_domain(d) &&
+ cpufreq_controller == FREQCTL_dom0_kernel);
+}
+
int cpupool_move_domain(struct domain *d, struct cpupool *c);
int cpupool_do_sysctl(struct xen_sysctl_cpupool_op *op);
int cpupool_get_id(const struct domain *d);