xen/x86: track dirty pCPU caches for a given vCPU

author Roger Pau Monne <roger.pau@citrix.com>

Wed, 30 Apr 2025 08:43:22 +0000 (10:43 +0200)

committer Roger Pau Monne <roger.pau@citrix.com>

Tue, 6 May 2025 08:02:36 +0000 (10:02 +0200)
author Roger Pau Monne <roger.pau@citrix.com>
Wed, 30 Apr 2025 08:43:22 +0000 (10:43 +0200)
committer Roger Pau Monne <roger.pau@citrix.com>
Tue, 6 May 2025 08:02:36 +0000 (10:02 +0200)
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c

index f197dad4c0cdf979a863ec580137c2d49bcf6b9f..3d08b829d2db2cb28a08d9473e93e1225de613f9 100644 (file)
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -579,6 +579,13 @@ int arch_vcpu_create(struct vcpu *v)
  
          if ( (rc = init_vcpu_msr_policy(v)) )
              goto fail;
+
+        if ( cache_flush_permitted(d) &&
+             !cond_zalloc_cpumask_var(&v->arch.dirty_cache) )
+        {
+            rc = -ENOMEM;
+            goto fail;
+        }
      }
      else if ( (rc = xstate_alloc_save_area(v)) != 0 )
          return rc;
@@ -614,6 +621,7 @@ int arch_vcpu_create(struct vcpu *v)
      vcpu_destroy_fpu(v);
      xfree(v->arch.msrs);
      v->arch.msrs = NULL;
+    FREE_CPUMASK_VAR(v->arch.dirty_cache);
  
      return rc;
  }
@@ -628,6 +636,8 @@ void arch_vcpu_destroy(struct vcpu *v)
      xfree(v->arch.msrs);
      v->arch.msrs = NULL;
  
+    FREE_CPUMASK_VAR(v->arch.dirty_cache);
+
      if ( is_hvm_vcpu(v) )
          hvm_vcpu_destroy(v);
      else
@@ -2018,6 +2028,9 @@ static void __context_switch(void)
          cpumask_set_cpu(cpu, nd->dirty_cpumask);
      write_atomic(&n->dirty_cpu, cpu);
  
+    if ( cache_flush_permitted(nd) )
+        __cpumask_set_cpu(cpu, n->arch.dirty_cache);
+
      if ( !is_idle_domain(nd) )
      {
          memcpy(stack_regs, &n->arch.user_regs, CTXT_SWITCH_STACK_BYTES);
@@ -2606,6 +2619,36 @@ unsigned int domain_max_paddr_bits(const struct domain *d)
      return bits;
  }
  
+void vcpu_flush_cache(struct vcpu *curr)
+{
+    ASSERT(curr == current);
+    ASSERT(cache_flush_permitted(curr->domain));
+
+    flush_mask(curr->arch.dirty_cache, FLUSH_CACHE);
+    cpumask_clear(curr->arch.dirty_cache);
+    __cpumask_set_cpu(smp_processor_id(), curr->arch.dirty_cache);
+}
+
+void domain_flush_cache(const struct domain *d)
+{
+    const struct vcpu *v;
+    cpumask_t *mask = this_cpu(scratch_cpumask);
+
+    ASSERT(cache_flush_permitted(d));
+
+    cpumask_clear(mask);
+    for_each_vcpu( d, v )
+        cpumask_or(mask, mask, v->arch.dirty_cache);
+
+    flush_mask(mask, FLUSH_CACHE);
+    /*
+     * Clearing the mask of vCPUs in the domain would be racy unless all vCPUs
+     * are paused, so just leave them as-is, at the cost of possibly doing
+     * redundant flushes in later calls.  It's still better than doing a
+     * host-wide cache flush.
+     */
+}
+
  /*
   * Local variables:
   * mode: C
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c

index 4cb2e13046d12d7377bb73d7942b7677320e937e..aed582a215a08931393567defd3f129ab7463497 100644 (file)
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -2277,7 +2277,7 @@ void hvm_shadow_handle_cd(struct vcpu *v, unsigned long value)
              domain_pause_nosync(v->domain);
  
              /* Flush physical caches. */
-            flush_all(FLUSH_CACHE);
+            domain_flush_cache(v->domain);
              hvm_set_uc_mode(v, 1);
  
              domain_unpause(v->domain);
diff --git a/xen/arch/x86/hvm/mtrr.c b/xen/arch/x86/hvm/mtrr.c

index 887994d2b98429e58039f65a3b67fd8743c2c579..cfe0d44459c2d50d3ee6825240cdb8681f92a0d5 100644 (file)
--- a/xen/arch/x86/hvm/mtrr.c
+++ b/xen/arch/x86/hvm/mtrr.c
@@ -769,7 +769,7 @@ void memory_type_changed(struct domain *d)
      if ( cache_flush_permitted(d) &&
           d->vcpu && d->vcpu[0] && p2m_memory_type_changed(d) )
      {
-        flush_all(FLUSH_CACHE);
+        domain_flush_cache(d);
      }
  }
  
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c

index e33a38c1e44697004db805dee8a41fa0a8c8e9c0..5d1777ace335c9d6408dde21d8333d900603d3af 100644 (file)
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -2315,8 +2315,10 @@ static void svm_vmexit_mce_intercept(
  
  static void cf_check svm_wbinvd_intercept(void)
  {
-    if ( cache_flush_permitted(current->domain) )
-        flush_all(FLUSH_CACHE);
+    struct vcpu *curr = current;
+
+    if ( cache_flush_permitted(curr->domain) )
+        vcpu_flush_cache(curr);
  }
  
  static void svm_vmexit_do_invalidate_cache(struct cpu_user_regs *regs,
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c

index 639882ceb216a1048656629842d25a642b4cc793..9273607d576c28d70f682722bfd521db141d4c33 100644 (file)
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -3840,11 +3840,13 @@ static void vmx_do_extint(struct cpu_user_regs *regs)
  
  static void cf_check vmx_wbinvd_intercept(void)
  {
-    if ( !cache_flush_permitted(current->domain) )
+    struct vcpu *curr = current;
+
+    if ( !cache_flush_permitted(curr->domain) )
          return;
  
      if ( cpu_has_wbinvd_exiting )
-        flush_all(FLUSH_CACHE);
+        vcpu_flush_cache(curr);
      else
          wbinvd();
  }
diff --git a/xen/arch/x86/include/asm/domain.h b/xen/arch/x86/include/asm/domain.h

index 8c0dea12a5266406142b71d182c5d901122cefc1..064b51889dc2c9da7be0e39f3b7f5b7829cbe302 100644 (file)
--- a/xen/arch/x86/include/asm/domain.h
+++ b/xen/arch/x86/include/asm/domain.h
@@ -668,6 +668,12 @@ struct arch_vcpu
  
      struct vcpu_msrs *msrs;
  
+    /*
+     * When vCPU is allowed cache control track the pCPUs the vCPU has run on
+     * since the last flush.
+     */
+    cpumask_var_t dirty_cache;
+
      struct {
          bool next_interrupt_enabled;
      } monitor;
@@ -790,6 +796,9 @@ unsigned int domain_max_paddr_bits(const struct domain *d);
  #define arch_init_idle_domain arch_init_idle_domain
  void arch_init_idle_domain(struct domain *d);
  
+void vcpu_flush_cache(struct vcpu *curr);
+void domain_flush_cache(const struct domain *d);
+
  #endif /* __ASM_DOMAIN_H__ */
  
  /*
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c

index 59b60b1e62a7bfcb07ae455693f991c1a5b59aa6..11b59398a2c4fbe08f83fd582c981354789a5492 100644 (file)
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -3804,26 +3804,19 @@ long do_mmuext_op(
              break;
  
          case MMUEXT_FLUSH_CACHE:
-            /*
-             * Dirty pCPU caches where the current vCPU has been scheduled are
-             * not tracked, and hence we need to resort to a global cache
-             * flush for correctness.
-             */
+            if ( unlikely(currd != pg_owner) )
+                rc = -EPERM;
+            else if ( likely(cache_flush_permitted(currd)) )
+                vcpu_flush_cache(curr);
+            else
+                rc = -EINVAL;
+            break;
+
          case MMUEXT_FLUSH_CACHE_GLOBAL:
              if ( unlikely(currd != pg_owner) )
                  rc = -EPERM;
              else if ( likely(cache_flush_permitted(currd)) )
-            {
-                unsigned int cpu;
-                cpumask_t *mask = this_cpu(scratch_cpumask);
-
-                cpumask_clear(mask);
-                for_each_online_cpu(cpu)
-                    if ( !cpumask_intersects(mask,
-                                             per_cpu(cpu_sibling_mask, cpu)) )
-                        __cpumask_set_cpu(cpu, mask);
-                flush_mask(mask, FLUSH_CACHE);
-            }
+                domain_flush_cache(currd);
              else
                  rc = -EINVAL;
              break;
diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c

index 089d4cb4d9053a78d71713c864d7ee989286e10f..076ce8f004574d54e81f838abe6e7e863511b07b 100644 (file)
--- a/xen/arch/x86/pv/emul-priv-op.c
+++ b/xen/arch/x86/pv/emul-priv-op.c
@@ -1199,12 +1199,8 @@ static int cf_check cache_op(
       * newer linux uses this in some start-of-day timing loops.
       */
      if ( cache_flush_permitted(current->domain) )
-        /*
-         * Handle wbnoinvd as wbinvd, at the expense of higher cost.  Broadcast
-         * the flush to all pCPUs, Xen doesn't track where the vCPU has ran
-         * previously.
-         */
-        flush_all(FLUSH_CACHE);
+        /* Handle wbnoinvd as wbinvd, at the expense of higher cost. */
+        vcpu_flush_cache(current);
  
      return X86EMUL_OKAY;
  }
author	Roger Pau Monne <roger.pau@citrix.com>
	Wed, 30 Apr 2025 08:43:22 +0000 (10:43 +0200)
committer	Roger Pau Monne <roger.pau@citrix.com>
	Tue, 6 May 2025 08:02:36 +0000 (10:02 +0200)
xen/arch/x86/domain.c		patch \| blob \| blame \| history
xen/arch/x86/hvm/hvm.c		patch \| blob \| blame \| history
xen/arch/x86/hvm/mtrr.c		patch \| blob \| blame \| history
xen/arch/x86/hvm/svm/svm.c		patch \| blob \| blame \| history
xen/arch/x86/hvm/vmx/vmx.c		patch \| blob \| blame \| history
xen/arch/x86/include/asm/domain.h		patch \| blob \| blame \| history
xen/arch/x86/mm.c		patch \| blob \| blame \| history
xen/arch/x86/pv/emul-priv-op.c		patch \| blob \| blame \| history