]> xenbits.xensource.com Git - people/pauldu/linux.git/commitdiff
KVM x86/xen: add an override for PVCLOCK_TSC_STABLE_BIT tsc_is_unstable3
authorPaul Durrant <pdurrant@amazon.com>
Tue, 31 Oct 2023 11:45:29 +0000 (11:45 +0000)
committerPaul Durrant <pdurrant@amazon.com>
Thu, 2 Nov 2023 16:19:15 +0000 (16:19 +0000)
Unless explicitly told to do so (by passing 'clocksource=tsc' and
'tsc=stable:socket', and then jumping through some hoops concerning
potential CPU hotplug) Xen will never use TSC as its clocksource.
Hence, by default, a Xen guest will not see PVCLOCK_TSC_STABLE_BIT set
in either the primary or secondary pvclock memory areas. This has
led to bugs in some guest kernels which only become evident if
PVCLOCK_TSC_STABLE_BIT *is* set in the pvclocks. Hence, to support
such guests, give the VMM a new Xen HVM config flag to tell KVM to
forcibly clear the bit in the Xen pvclocks.

Signed-off-by: Paul Durrant <pdurrant@amazon.com>
---

v5:
 - Fix warning reported by kernel test robot.

v4:
 - Re-base.
 - Re-work 'update_pvclock' test as requested.

v3:
 - Moved clearing of PVCLOCK_TSC_STABLE_BIT the right side of the
   memcpy().
 - Added an all-vCPUs KVM_REQ_CLOCK_UPDATE when the HVM config
   flag is changed.

Documentation/virt/kvm/api.rst
arch/x86/kvm/x86.c
arch/x86/kvm/xen.c
include/uapi/linux/kvm.h

index 7025b37510279530058dc68f66a1140fd72458a5..a9bdd25826d1a6ef83eb0909fa1671212d8ba16c 100644 (file)
@@ -8374,6 +8374,7 @@ PVHVM guests. Valid flags are::
   #define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL             (1 << 4)
   #define KVM_XEN_HVM_CONFIG_EVTCHN_SEND               (1 << 5)
   #define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG      (1 << 6)
+  #define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE      (1 << 7)
 
 The KVM_XEN_HVM_CONFIG_HYPERCALL_MSR flag indicates that the KVM_XEN_HVM_CONFIG
 ioctl is available, for the guest to set its hypercall page.
@@ -8417,6 +8418,11 @@ behave more correctly, not using the XEN_RUNSTATE_UPDATE flag until/unless
 specifically enabled (by the guest making the hypercall, causing the VMM
 to enable the KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG attribute).
 
+The KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE flag indicates that KVM supports
+clearing the PVCLOCK_TSC_STABLE_BIT flag in Xen pvclock sources. This will be
+done when the KVM_CAP_XEN_HVM ioctl sets the
+KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE flag.
+
 8.31 KVM_CAP_PPC_MULTITCE
 -------------------------
 
index 2c924075f6f112a594c0a4390bb7dcc2d7e8fabf..cc8d1ae29be3f75da15500b2403817b7d9903ec3 100644 (file)
@@ -3104,7 +3104,8 @@ u64 get_kvmclock_ns(struct kvm *kvm)
 
 static void kvm_setup_guest_pvclock(struct kvm_vcpu *v,
                                    struct gfn_to_pfn_cache *gpc,
-                                   unsigned int offset)
+                                   unsigned int offset,
+                                   bool force_tsc_unstable)
 {
        struct kvm_vcpu_arch *vcpu = &v->arch;
        struct pvclock_vcpu_time_info *guest_hv_clock;
@@ -3141,6 +3142,10 @@ static void kvm_setup_guest_pvclock(struct kvm_vcpu *v,
        }
 
        memcpy(guest_hv_clock, &vcpu->hv_clock, sizeof(*guest_hv_clock));
+
+       if (force_tsc_unstable)
+               guest_hv_clock->flags &= ~PVCLOCK_TSC_STABLE_BIT;
+
        smp_wmb();
 
        guest_hv_clock->version = ++vcpu->hv_clock.version;
@@ -3161,6 +3166,16 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
        u64 tsc_timestamp, host_tsc;
        u8 pvclock_flags;
        bool use_master_clock;
+#ifdef CONFIG_KVM_XEN
+       /*
+        * For Xen guests we may need to override PVCLOCK_TSC_STABLE_BIT as unless
+        * explicitly told to use TSC as its clocksource Xen will not set this bit.
+        * This default behaviour led to bugs in some guest kernels which cause
+        * problems if they observe PVCLOCK_TSC_STABLE_BIT in the pvclock flags.
+        */
+       bool xen_pvclock_tsc_unstable =
+               ka->xen_hvm_config.flags & KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE;
+#endif
 
        kernel_ns = 0;
        host_tsc = 0;
@@ -3239,13 +3254,15 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
        vcpu->hv_clock.flags = pvclock_flags;
 
        if (vcpu->pv_time.active)
-               kvm_setup_guest_pvclock(v, &vcpu->pv_time, 0);
+               kvm_setup_guest_pvclock(v, &vcpu->pv_time, 0, false);
 #ifdef CONFIG_KVM_XEN
        if (vcpu->xen.vcpu_info_cache.active)
                kvm_setup_guest_pvclock(v, &vcpu->xen.vcpu_info_cache,
-                                       offsetof(struct compat_vcpu_info, time));
+                                       offsetof(struct compat_vcpu_info, time),
+                                       xen_pvclock_tsc_unstable);
        if (vcpu->xen.vcpu_time_info_cache.active)
-               kvm_setup_guest_pvclock(v, &vcpu->xen.vcpu_time_info_cache, 0);
+               kvm_setup_guest_pvclock(v, &vcpu->xen.vcpu_time_info_cache, 0,
+                                       xen_pvclock_tsc_unstable);
 #endif
        kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock);
        return 0;
@@ -4638,7 +4655,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
                    KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL |
                    KVM_XEN_HVM_CONFIG_SHARED_INFO |
                    KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL |
-                   KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
+                   KVM_XEN_HVM_CONFIG_EVTCHN_SEND |
+                   KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE;
                if (sched_info_on())
                        r |= KVM_XEN_HVM_CONFIG_RUNSTATE |
                             KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG;
index e53fad915a626486936971757ca3ef61f5216d2c..e43948b87f94cff5ef5a0da89d7908a28b3a8fa3 100644 (file)
@@ -1162,7 +1162,9 @@ int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc)
 {
        /* Only some feature flags need to be *enabled* by userspace */
        u32 permitted_flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL |
-               KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
+               KVM_XEN_HVM_CONFIG_EVTCHN_SEND |
+               KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE;
+       u32 old_flags;
 
        if (xhc->flags & ~permitted_flags)
                return -EINVAL;
@@ -1183,9 +1185,14 @@ int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc)
        else if (!xhc->msr && kvm->arch.xen_hvm_config.msr)
                static_branch_slow_dec_deferred(&kvm_xen_enabled);
 
+       old_flags = kvm->arch.xen_hvm_config.flags;
        memcpy(&kvm->arch.xen_hvm_config, xhc, sizeof(*xhc));
 
        mutex_unlock(&kvm->arch.xen.xen_lock);
+
+       if ((old_flags ^ xhc->flags) & KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE)
+               kvm_make_all_cpus_request(kvm, KVM_REQ_CLOCK_UPDATE);
+
        return 0;
 }
 
index 211b86de35ac53f6457bbd2fae8c973ce6b3a968..ae90294456dfa934b64f5ed90aff93bcc9bbd783 100644 (file)
@@ -1291,6 +1291,7 @@ struct kvm_x86_mce {
 #define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL       (1 << 4)
 #define KVM_XEN_HVM_CONFIG_EVTCHN_SEND         (1 << 5)
 #define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG        (1 << 6)
+#define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE        (1 << 7)
 
 struct kvm_xen_hvm_config {
        __u32 flags;