]> xenbits.xensource.com Git - people/royger/xen.git/commitdiff
xen/x86: track dirty pCPU caches for a given vCPU
authorRoger Pau Monne <roger.pau@citrix.com>
Wed, 30 Apr 2025 08:43:22 +0000 (10:43 +0200)
committerRoger Pau Monne <roger.pau@citrix.com>
Tue, 6 May 2025 08:02:36 +0000 (10:02 +0200)
When a guest is allowed access to cache control operations such tracking
prevents having to issue a system-wide cache flush, and rather just flush
the pCPUs where the vCPU has been scheduled since the last flush.

Note that domain-wide flushes accumulate the dirty caches from all the
vCPUs, but clearing the vCPU masks will require pausing all vCPUs, which
seems overkill.  Instead leave the vCPU dirty masks as-is, worse case it
will result in redundant flushes in further calls.

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
xen/arch/x86/domain.c
xen/arch/x86/hvm/hvm.c
xen/arch/x86/hvm/mtrr.c
xen/arch/x86/hvm/svm/svm.c
xen/arch/x86/hvm/vmx/vmx.c
xen/arch/x86/include/asm/domain.h
xen/arch/x86/mm.c
xen/arch/x86/pv/emul-priv-op.c

index f197dad4c0cdf979a863ec580137c2d49bcf6b9f..3d08b829d2db2cb28a08d9473e93e1225de613f9 100644 (file)
@@ -579,6 +579,13 @@ int arch_vcpu_create(struct vcpu *v)
 
         if ( (rc = init_vcpu_msr_policy(v)) )
             goto fail;
+
+        if ( cache_flush_permitted(d) &&
+             !cond_zalloc_cpumask_var(&v->arch.dirty_cache) )
+        {
+            rc = -ENOMEM;
+            goto fail;
+        }
     }
     else if ( (rc = xstate_alloc_save_area(v)) != 0 )
         return rc;
@@ -614,6 +621,7 @@ int arch_vcpu_create(struct vcpu *v)
     vcpu_destroy_fpu(v);
     xfree(v->arch.msrs);
     v->arch.msrs = NULL;
+    FREE_CPUMASK_VAR(v->arch.dirty_cache);
 
     return rc;
 }
@@ -628,6 +636,8 @@ void arch_vcpu_destroy(struct vcpu *v)
     xfree(v->arch.msrs);
     v->arch.msrs = NULL;
 
+    FREE_CPUMASK_VAR(v->arch.dirty_cache);
+
     if ( is_hvm_vcpu(v) )
         hvm_vcpu_destroy(v);
     else
@@ -2018,6 +2028,9 @@ static void __context_switch(void)
         cpumask_set_cpu(cpu, nd->dirty_cpumask);
     write_atomic(&n->dirty_cpu, cpu);
 
+    if ( cache_flush_permitted(nd) )
+        __cpumask_set_cpu(cpu, n->arch.dirty_cache);
+
     if ( !is_idle_domain(nd) )
     {
         memcpy(stack_regs, &n->arch.user_regs, CTXT_SWITCH_STACK_BYTES);
@@ -2606,6 +2619,36 @@ unsigned int domain_max_paddr_bits(const struct domain *d)
     return bits;
 }
 
+void vcpu_flush_cache(struct vcpu *curr)
+{
+    ASSERT(curr == current);
+    ASSERT(cache_flush_permitted(curr->domain));
+
+    flush_mask(curr->arch.dirty_cache, FLUSH_CACHE);
+    cpumask_clear(curr->arch.dirty_cache);
+    __cpumask_set_cpu(smp_processor_id(), curr->arch.dirty_cache);
+}
+
+void domain_flush_cache(const struct domain *d)
+{
+    const struct vcpu *v;
+    cpumask_t *mask = this_cpu(scratch_cpumask);
+
+    ASSERT(cache_flush_permitted(d));
+
+    cpumask_clear(mask);
+    for_each_vcpu( d, v )
+        cpumask_or(mask, mask, v->arch.dirty_cache);
+
+    flush_mask(mask, FLUSH_CACHE);
+    /*
+     * Clearing the mask of vCPUs in the domain would be racy unless all vCPUs
+     * are paused, so just leave them as-is, at the cost of possibly doing
+     * redundant flushes in later calls.  It's still better than doing a
+     * host-wide cache flush.
+     */
+}
+
 /*
  * Local variables:
  * mode: C
index 4cb2e13046d12d7377bb73d7942b7677320e937e..aed582a215a08931393567defd3f129ab7463497 100644 (file)
@@ -2277,7 +2277,7 @@ void hvm_shadow_handle_cd(struct vcpu *v, unsigned long value)
             domain_pause_nosync(v->domain);
 
             /* Flush physical caches. */
-            flush_all(FLUSH_CACHE);
+            domain_flush_cache(v->domain);
             hvm_set_uc_mode(v, 1);
 
             domain_unpause(v->domain);
index 887994d2b98429e58039f65a3b67fd8743c2c579..cfe0d44459c2d50d3ee6825240cdb8681f92a0d5 100644 (file)
@@ -769,7 +769,7 @@ void memory_type_changed(struct domain *d)
     if ( cache_flush_permitted(d) &&
          d->vcpu && d->vcpu[0] && p2m_memory_type_changed(d) )
     {
-        flush_all(FLUSH_CACHE);
+        domain_flush_cache(d);
     }
 }
 
index e33a38c1e44697004db805dee8a41fa0a8c8e9c0..5d1777ace335c9d6408dde21d8333d900603d3af 100644 (file)
@@ -2315,8 +2315,10 @@ static void svm_vmexit_mce_intercept(
 
 static void cf_check svm_wbinvd_intercept(void)
 {
-    if ( cache_flush_permitted(current->domain) )
-        flush_all(FLUSH_CACHE);
+    struct vcpu *curr = current;
+
+    if ( cache_flush_permitted(curr->domain) )
+        vcpu_flush_cache(curr);
 }
 
 static void svm_vmexit_do_invalidate_cache(struct cpu_user_regs *regs,
index 639882ceb216a1048656629842d25a642b4cc793..9273607d576c28d70f682722bfd521db141d4c33 100644 (file)
@@ -3840,11 +3840,13 @@ static void vmx_do_extint(struct cpu_user_regs *regs)
 
 static void cf_check vmx_wbinvd_intercept(void)
 {
-    if ( !cache_flush_permitted(current->domain) )
+    struct vcpu *curr = current;
+
+    if ( !cache_flush_permitted(curr->domain) )
         return;
 
     if ( cpu_has_wbinvd_exiting )
-        flush_all(FLUSH_CACHE);
+        vcpu_flush_cache(curr);
     else
         wbinvd();
 }
index 8c0dea12a5266406142b71d182c5d901122cefc1..064b51889dc2c9da7be0e39f3b7f5b7829cbe302 100644 (file)
@@ -668,6 +668,12 @@ struct arch_vcpu
 
     struct vcpu_msrs *msrs;
 
+    /*
+     * When vCPU is allowed cache control track the pCPUs the vCPU has run on
+     * since the last flush.
+     */
+    cpumask_var_t dirty_cache;
+
     struct {
         bool next_interrupt_enabled;
     } monitor;
@@ -790,6 +796,9 @@ unsigned int domain_max_paddr_bits(const struct domain *d);
 #define arch_init_idle_domain arch_init_idle_domain
 void arch_init_idle_domain(struct domain *d);
 
+void vcpu_flush_cache(struct vcpu *curr);
+void domain_flush_cache(const struct domain *d);
+
 #endif /* __ASM_DOMAIN_H__ */
 
 /*
index 59b60b1e62a7bfcb07ae455693f991c1a5b59aa6..11b59398a2c4fbe08f83fd582c981354789a5492 100644 (file)
@@ -3804,26 +3804,19 @@ long do_mmuext_op(
             break;
 
         case MMUEXT_FLUSH_CACHE:
-            /*
-             * Dirty pCPU caches where the current vCPU has been scheduled are
-             * not tracked, and hence we need to resort to a global cache
-             * flush for correctness.
-             */
+            if ( unlikely(currd != pg_owner) )
+                rc = -EPERM;
+            else if ( likely(cache_flush_permitted(currd)) )
+                vcpu_flush_cache(curr);
+            else
+                rc = -EINVAL;
+            break;
+
         case MMUEXT_FLUSH_CACHE_GLOBAL:
             if ( unlikely(currd != pg_owner) )
                 rc = -EPERM;
             else if ( likely(cache_flush_permitted(currd)) )
-            {
-                unsigned int cpu;
-                cpumask_t *mask = this_cpu(scratch_cpumask);
-
-                cpumask_clear(mask);
-                for_each_online_cpu(cpu)
-                    if ( !cpumask_intersects(mask,
-                                             per_cpu(cpu_sibling_mask, cpu)) )
-                        __cpumask_set_cpu(cpu, mask);
-                flush_mask(mask, FLUSH_CACHE);
-            }
+                domain_flush_cache(currd);
             else
                 rc = -EINVAL;
             break;
index 089d4cb4d9053a78d71713c864d7ee989286e10f..076ce8f004574d54e81f838abe6e7e863511b07b 100644 (file)
@@ -1199,12 +1199,8 @@ static int cf_check cache_op(
      * newer linux uses this in some start-of-day timing loops.
      */
     if ( cache_flush_permitted(current->domain) )
-        /*
-         * Handle wbnoinvd as wbinvd, at the expense of higher cost.  Broadcast
-         * the flush to all pCPUs, Xen doesn't track where the vCPU has ran
-         * previously.
-         */
-        flush_all(FLUSH_CACHE);
+        /* Handle wbnoinvd as wbinvd, at the expense of higher cost. */
+        vcpu_flush_cache(current);
 
     return X86EMUL_OKAY;
 }