]> xenbits.xensource.com Git - people/pauldu/linux.git/commitdiff
KVM x86/xen: add an override for PVCLOCK_TSC_STABLE_BIT tsc_is_unstable
authorPaul Durrant <pdurrant@amazon.com>
Mon, 9 Oct 2023 16:11:23 +0000 (16:11 +0000)
committerPaul Durrant <pdurrant@amazon.com>
Tue, 10 Oct 2023 08:52:35 +0000 (08:52 +0000)
Unless explicitly told to do so (by passing 'clocksource=tsc' and
'tsc=stable:socket', and then jumping through some hoops concerning
potential CPU hotplug) Xen will never use TSC as its clocksource.
Hence, by default, a Xen guest will not see PVCLOCK_TSC_STABLE_BIT set
in either the primary or secondary pvclock memory areas. This has
led to bugs in some guest kernels which only become evident if
PVCLOCK_TSC_STABLE_BIT *is* set in the pvclocks. Hence, to support
such guests, give the VMM a new attribute to tell KVM to forcibly
clear the bit in the Xen pvclocks.

Signed-off-by: Paul Durrant <pdurrant@amazon.com>
Documentation/virt/kvm/api.rst
arch/x86/include/asm/kvm_host.h
arch/x86/kvm/x86.c
arch/x86/kvm/xen.c
include/uapi/linux/kvm.h

index 21a7578142a18b4ad537acbd654ba510dd16fc9f..d06f971a2ce0d1dba0084a366c4dd455e153d40e 100644 (file)
@@ -5544,6 +5544,7 @@ attribute cannot be read.
                        __u64 expires_ns;
                } timer;
                __u8 vector;
+               __u32 flags;
        } u;
   };
 
@@ -5610,6 +5611,14 @@ KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR
   vector configured with HVM_PARAM_CALLBACK_IRQ. It is disabled by
   setting the vector to zero.
 
+KVM_XEN_VCPU_ATTR_TYPE_PVCLOCK
+  This attribute is available when the KVM_CAP_XEN_HVM ioctl indicates
+  support for KVM_XEN_HVM_CONFIG_PVCLOCK feature. It modifies the
+  pvclock information available to the guest. Currently the only defined
+  flag is KVM_XEN_PVCLOCK_TSC_UNSTABLE. If this flag is set then the
+  PVCLOCK_TSC_STABLE_BIT flag will not be set in any of the Xen pvclock
+  sources. This aligns with Xen's behaviour when it is not using TSC
+  as its clock source, which is the default behaviour.
 
 4.129 KVM_XEN_VCPU_GET_ATTR
 ---------------------------
index 17715cb8731d5d2c7d60caeebf6e340158888ef5..2edc48e94d560c14dcd10c10fed315a72a008362 100644 (file)
@@ -685,6 +685,7 @@ struct kvm_vcpu_xen {
        u64 hypercall_rip;
        u32 current_runstate;
        u8 upcall_vector;
+       bool tsc_is_unstable;
        struct gfn_to_pfn_cache vcpu_info_cache;
        struct gfn_to_pfn_cache vcpu_time_info_cache;
        struct gfn_to_pfn_cache runstate_cache;
index 9f18b06bbda66bf411bb492fa4ab4c5fd9e90080..1c6556e14d40e97e34d8ffa4781e2c0153991b0b 100644 (file)
@@ -3096,7 +3096,8 @@ u64 get_kvmclock_ns(struct kvm *kvm)
 
 static void kvm_setup_guest_pvclock(struct kvm_vcpu *v,
                                    struct gfn_to_pfn_cache *gpc,
-                                   unsigned int offset)
+                                   unsigned int offset,
+                                   bool force_tsc_unstable)
 {
        struct kvm_vcpu_arch *vcpu = &v->arch;
        struct pvclock_vcpu_time_info *guest_hv_clock;
@@ -3133,6 +3134,10 @@ static void kvm_setup_guest_pvclock(struct kvm_vcpu *v,
        }
 
        memcpy(guest_hv_clock, &vcpu->hv_clock, sizeof(*guest_hv_clock));
+
+       if (force_tsc_unstable)
+               guest_hv_clock->flags &= ~PVCLOCK_TSC_STABLE_BIT;
+
        smp_wmb();
 
        guest_hv_clock->version = ++vcpu->hv_clock.version;
@@ -3231,12 +3236,21 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
        vcpu->hv_clock.flags = pvclock_flags;
 
        if (vcpu->pv_time.active)
-               kvm_setup_guest_pvclock(v, &vcpu->pv_time, 0);
+               kvm_setup_guest_pvclock(v, &vcpu->pv_time, 0, false);
+
+       /*
+        * For Xen guests we may need to override PVCLOCK_TSC_STABLE_BIT as unless
+        * explicitly told to use TSC as its clocksource Xen will not set this bit.
+        * This default behaviour led to bugs in some guest kernels which cause
+        * problems if they observe PVCLOCK_TSC_STABLE_BIT in the pvclock flags.
+        */
        if (vcpu->xen.vcpu_info_cache.active)
                kvm_setup_guest_pvclock(v, &vcpu->xen.vcpu_info_cache,
-                                       offsetof(struct compat_vcpu_info, time));
+                                       offsetof(struct compat_vcpu_info, time),
+                                       vcpu->xen.tsc_is_unstable);
        if (vcpu->xen.vcpu_time_info_cache.active)
-               kvm_setup_guest_pvclock(v, &vcpu->xen.vcpu_time_info_cache, 0);
+               kvm_setup_guest_pvclock(v, &vcpu->xen.vcpu_time_info_cache, 0,
+                                       vcpu->xen.tsc_is_unstable);
        kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock);
        return 0;
 }
@@ -4531,7 +4545,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
                    KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL |
                    KVM_XEN_HVM_CONFIG_SHARED_INFO |
                    KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL |
-                   KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
+                   KVM_XEN_HVM_CONFIG_EVTCHN_SEND |
+                   KVM_XEN_HVM_CONFIG_PVCLOCK;
                if (sched_info_on())
                        r |= KVM_XEN_HVM_CONFIG_RUNSTATE |
                             KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG;
index 40edf4d1974c530336e9f9044fd3b18b18ea8de3..08e64df2e27d61eef8acdf4e9d7050e950db021d 100644 (file)
@@ -938,6 +938,12 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
                }
                break;
 
+       case KVM_XEN_VCPU_ATTR_TYPE_PVCLOCK:
+               vcpu->arch.xen.tsc_is_unstable = data->u.flags & KVM_XEN_PVCLOCK_TSC_UNSTABLE;
+               kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
+               r = 0;
+               break;
+
        default:
                break;
        }
@@ -1030,6 +1036,13 @@ int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
                r = 0;
                break;
 
+       case KVM_XEN_VCPU_ATTR_TYPE_PVCLOCK:
+               data->u.flags = 0;
+               if (vcpu->arch.xen.tsc_is_unstable)
+                       data->u.flags |= KVM_XEN_PVCLOCK_TSC_UNSTABLE;
+               r = 0;
+               break;
+
        default:
                break;
        }
index 13065dd96132da65beb99f9455659c9b75ed109d..a101fe60f2e183dbfe81fd160d960369bfcc2c5d 100644 (file)
@@ -1282,6 +1282,7 @@ struct kvm_x86_mce {
 #define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL       (1 << 4)
 #define KVM_XEN_HVM_CONFIG_EVTCHN_SEND         (1 << 5)
 #define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG        (1 << 6)
+#define KVM_XEN_HVM_CONFIG_PVCLOCK             (1 << 7)
 
 struct kvm_xen_hvm_config {
        __u32 flags;
@@ -1870,6 +1871,8 @@ struct kvm_xen_vcpu_attr {
                        __u64 expires_ns;
                } timer;
                __u8 vector;
+               __u32 flags;
+#define KVM_XEN_PVCLOCK_TSC_UNSTABLE (1 << 0)
        } u;
 };
 
@@ -1884,6 +1887,8 @@ struct kvm_xen_vcpu_attr {
 #define KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID         0x6
 #define KVM_XEN_VCPU_ATTR_TYPE_TIMER           0x7
 #define KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR   0x8
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_PVCLOCK */
+#define KVM_XEN_VCPU_ATTR_TYPE_PVCLOCK         0x9
 
 /* Secure Encrypted Virtualization command */
 enum sev_cmd_id {