From 2529c850ea48f036727ca2f148caed89391311b8 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 21 Jun 2016 12:03:02 +0200 Subject: [PATCH] add update indicator to vcpu_runstate_info There has been a report about incorrect vruntime accounting in Linux guests under Xen. A Linux kernel with CONFIG_PARAVIRT_TIME_ACCOUNTING set is capable to do correct vruntime accounting, but this would require the kernel to be able to read the runstate data of other cpus. A guest mapping vcpu_runstate_info into its memory can't read this information from another cpu but the one the data is referring to. Reason is there is no reliable way for the guest to detect a concurrent data update by the hypervisor. In order to support reading another vcpu's mapped vcpu_runstate_info an indicator for an occurring update of the runstate information is needed. Add the possibility to activate setting this indicator in the highest bit of state_entry_time via a vm_assist hypercall. When activated the update indicator will be set before the runstate information is modified in guest memory and it will be reset after modification is done. As state_entry_time is guaranteed to be different after each update the guest can detect any update (either in progress or while reading the runstate data) by comparing state_entry_time before and after reading runstate data: in case the values differ or the update indicator was set the data might be inconsistent and should be reread. Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Acked-by: Julien Grall --- xen/arch/arm/domain.c | 20 ++++++++++++++++++++ xen/arch/x86/domain.c | 21 +++++++++++++++++++++ xen/include/asm-arm/config.h | 2 +- xen/include/asm-x86/config.h | 1 + xen/include/public/vcpu.h | 6 ++++++ xen/include/public/xen.h | 7 +++++++ 6 files changed, 56 insertions(+), 1 deletion(-) diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c index 1365b4ad4b..d8a804ca48 100644 --- a/xen/arch/arm/domain.c +++ b/xen/arch/arm/domain.c @@ -239,10 +239,30 @@ static void ctxt_switch_to(struct vcpu *n) /* Update per-VCPU guest runstate shared memory area (if registered). */ static void update_runstate_area(struct vcpu *v) { + void __user *guest_handle = NULL; + if ( guest_handle_is_null(runstate_guest(v)) ) return; + if ( VM_ASSIST(v->domain, runstate_update_flag) ) + { + guest_handle = &v->runstate_guest.p->state_entry_time + 1; + guest_handle--; + v->runstate.state_entry_time |= XEN_RUNSTATE_UPDATE; + __raw_copy_to_guest(guest_handle, + (void *)(&v->runstate.state_entry_time + 1) - 1, 1); + smp_wmb(); + } + __copy_to_guest(runstate_guest(v), &v->runstate, 1); + + if ( guest_handle ) + { + v->runstate.state_entry_time &= ~XEN_RUNSTATE_UPDATE; + smp_wmb(); + __raw_copy_to_guest(guest_handle, + (void *)(&v->runstate.state_entry_time + 1) - 1, 1); + } } static void schedule_tail(struct vcpu *prev) diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c index 989bc74a83..3ba7ed16c5 100644 --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -1926,12 +1926,25 @@ bool_t update_runstate_area(struct vcpu *v) { bool_t rc; smap_check_policy_t smap_policy; + void __user *guest_handle = NULL; if ( guest_handle_is_null(runstate_guest(v)) ) return 1; smap_policy = smap_policy_change(v, SMAP_CHECK_ENABLED); + if ( VM_ASSIST(v->domain, runstate_update_flag) ) + { + guest_handle = has_32bit_shinfo(v->domain) + ? &v->runstate_guest.compat.p->state_entry_time + 1 + : &v->runstate_guest.native.p->state_entry_time + 1; + guest_handle--; + v->runstate.state_entry_time |= XEN_RUNSTATE_UPDATE; + __raw_copy_to_guest(guest_handle, + (void *)(&v->runstate.state_entry_time + 1) - 1, 1); + smp_wmb(); + } + if ( has_32bit_shinfo(v->domain) ) { struct compat_vcpu_runstate_info info; @@ -1944,6 +1957,14 @@ bool_t update_runstate_area(struct vcpu *v) rc = __copy_to_guest(runstate_guest(v), &v->runstate, 1) != sizeof(v->runstate); + if ( guest_handle ) + { + v->runstate.state_entry_time &= ~XEN_RUNSTATE_UPDATE; + smp_wmb(); + __raw_copy_to_guest(guest_handle, + (void *)(&v->runstate.state_entry_time + 1) - 1, 1); + } + smap_policy_change(v, smap_policy); return rc; diff --git a/xen/include/asm-arm/config.h b/xen/include/asm-arm/config.h index 4e7d20e086..9417be6e34 100644 --- a/xen/include/asm-arm/config.h +++ b/xen/include/asm-arm/config.h @@ -199,7 +199,7 @@ extern unsigned long frametable_virt_end; #define watchdog_disable() ((void)0) #define watchdog_enable() ((void)0) -#define VM_ASSIST_VALID (0) +#define VM_ASSIST_VALID (1UL << VMASST_TYPE_runstate_update_flag) #endif /* __ARM_CONFIG_H__ */ /* diff --git a/xen/include/asm-x86/config.h b/xen/include/asm-x86/config.h index c10129d70c..6fd84e7fff 100644 --- a/xen/include/asm-x86/config.h +++ b/xen/include/asm-x86/config.h @@ -332,6 +332,7 @@ extern unsigned long xen_phys_start; (1UL << VMASST_TYPE_writable_pagetables) | \ (1UL << VMASST_TYPE_pae_extended_cr3) | \ (1UL << VMASST_TYPE_architectural_iopl) | \ + (1UL << VMASST_TYPE_runstate_update_flag)| \ (1UL << VMASST_TYPE_m2p_strict)) #define VM_ASSIST_VALID NATIVE_VM_ASSIST_VALID #define COMPAT_VM_ASSIST_VALID (NATIVE_VM_ASSIST_VALID & \ diff --git a/xen/include/public/vcpu.h b/xen/include/public/vcpu.h index 692b87afbb..2aa230da2d 100644 --- a/xen/include/public/vcpu.h +++ b/xen/include/public/vcpu.h @@ -83,6 +83,12 @@ struct vcpu_runstate_info { int state; /* When was current state entered (system time, ns)? */ uint64_t state_entry_time; + /* + * Update indicator set in state_entry_time: + * When activated via VMASST_TYPE_runstate_update_flag, set during + * updates in guest memory mapped copy of vcpu_runstate_info. + */ +#define XEN_RUNSTATE_UPDATE (1ULL << 63) /* * Time spent in each RUNSTATE_* (ns). The sum of these times is * guaranteed not to drift from system time. diff --git a/xen/include/public/xen.h b/xen/include/public/xen.h index 37bbb22b90..b9e5e0f2f2 100644 --- a/xen/include/public/xen.h +++ b/xen/include/public/xen.h @@ -509,6 +509,13 @@ DEFINE_XEN_GUEST_HANDLE(mmuext_op_t); */ #define VMASST_TYPE_architectural_iopl 4 +/* + * All guests: activate update indicator in vcpu_runstate_info + * Enable setting the XEN_RUNSTATE_UPDATE flag in guest memory mapped + * vcpu_runstate_info during updates of the runstate information. + */ +#define VMASST_TYPE_runstate_update_flag 5 + /* * x86/64 guests: strictly hide M2P from user mode. * This allows the guest to control respective hypervisor behavior: -- 2.39.5