x86/HVM: implement memory read caching for insn emulation

author Jan Beulich <jbeulich@suse.com>

Thu, 23 Apr 2020 07:55:00 +0000 (09:55 +0200)

committer Jan Beulich <jbeulich@suse.com>

Thu, 23 Apr 2020 07:55:00 +0000 (09:55 +0200)
author Jan Beulich <jbeulich@suse.com>
Thu, 23 Apr 2020 07:55:00 +0000 (09:55 +0200)
committer Jan Beulich <jbeulich@suse.com>
Thu, 23 Apr 2020 07:55:00 +0000 (09:55 +0200)
diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c

index a4185c68cb30bdebb22add5018ad1ca3abe82c4a..6b3cbc7e501f07e1a8270a5f37e4df00e0e1ccfa 100644 (file)
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -28,6 +28,19 @@
  #include <asm/iocap.h>
  #include <asm/vm_event.h>
  
+struct hvmemul_cache
+{
+    /* The cache is disabled as long as num_ents > max_ents. */
+    unsigned int num_ents;
+    unsigned int max_ents;
+    struct {
+        paddr_t gpa:PADDR_BITS;
+        unsigned int :BITS_PER_LONG - PADDR_BITS - 8;
+        unsigned int size:8;
+        unsigned long data;
+    } ents[];
+};
+
  static void hvmtrace_io_assist(const ioreq_t *p)
  {
      unsigned int size, event;
@@ -136,6 +149,8 @@ void hvmemul_cancel(struct vcpu *v)
      vio->mmio_access = (struct npfec){};
      vio->mmio_retry = false;
      vio->g2m_ioport = NULL;
+
+    hvmemul_cache_disable(v);
  }
  
  static int hvmemul_do_io(
@@ -1883,12 +1898,17 @@ static int hvmemul_rep_movs(
          rc = HVMTRANS_okay;
      }
      else
+    {
+        unsigned int token = hvmemul_cache_disable(curr);
+
          /*
           * We do a modicum of checking here, just for paranoia's sake and to
           * definitely avoid copying an unitialised buffer into guest address
           * space.
           */
          rc = hvm_copy_from_guest_phys(buf, sgpa, bytes);
+        hvmemul_cache_restore(curr, token);
+    }
  
      if ( rc == HVMTRANS_okay )
          rc = hvm_copy_to_guest_phys(dgpa, buf, bytes, curr);
@@ -2551,6 +2571,19 @@ static int _hvm_emulate_one(struct hvm_emulate_ctxt *hvmemul_ctxt,
      struct hvm_vcpu_io *vio = &curr->arch.hvm.hvm_io;
      int rc;
  
+    /*
+     * Enable caching if it's currently disabled, but leave the cache
+     * untouched if it's already enabled, for re-execution to consume
+     * entries populated by an earlier pass.
+     */
+    if ( vio->cache->num_ents > vio->cache->max_ents )
+    {
+        ASSERT(vio->io_req.state == STATE_IOREQ_NONE);
+        vio->cache->num_ents = 0;
+    }
+    else
+        ASSERT(vio->io_req.state == STATE_IORESP_READY);
+
      hvm_emulate_init_per_insn(hvmemul_ctxt, vio->mmio_insn,
                                vio->mmio_insn_bytes);
  
@@ -2564,6 +2597,7 @@ static int _hvm_emulate_one(struct hvm_emulate_ctxt *hvmemul_ctxt,
      {
          vio->mmio_cache_count = 0;
          vio->mmio_insn_bytes = 0;
+        hvmemul_cache_disable(curr);
      }
      else
      {
@@ -2856,6 +2890,123 @@ void hvm_dump_emulation_state(const char *loglvl, const char *prefix,
             hvmemul_ctxt->insn_buf);
  }
  
+int hvmemul_cache_init(struct vcpu *v)
+{
+    /*
+     * No insn can access more than 16 independent linear addresses (AVX512F
+     * scatters/gathers being the worst). Each such linear range can span a
+     * page boundary, i.e. may require two page walks. Account for each insn
+     * byte individually, for simplicity.
+     */
+    const unsigned int nents = (CONFIG_PAGING_LEVELS + 1) *
+                               (MAX_INST_LEN + 16 * 2);
+    struct hvmemul_cache *cache = xmalloc_flex_struct(struct hvmemul_cache,
+                                                      ents, nents);
+
+    if ( !cache )
+        return -ENOMEM;
+
+    /* Cache is disabled initially. */
+    cache->num_ents = nents + 1;
+    cache->max_ents = nents;
+
+    v->arch.hvm.hvm_io.cache = cache;
+
+    return 0;
+}
+
+unsigned int hvmemul_cache_disable(struct vcpu *v)
+{
+    struct hvmemul_cache *cache = v->arch.hvm.hvm_io.cache;
+    unsigned int token = cache->num_ents;
+
+    cache->num_ents = cache->max_ents + 1;
+
+    return token;
+}
+
+void hvmemul_cache_restore(struct vcpu *v, unsigned int token)
+{
+    struct hvmemul_cache *cache = v->arch.hvm.hvm_io.cache;
+
+    ASSERT(cache->num_ents > cache->max_ents);
+    cache->num_ents = token;
+}
+
+bool hvmemul_read_cache(const struct vcpu *v, paddr_t gpa,
+                        void *buffer, unsigned int size)
+{
+    const struct hvmemul_cache *cache = v->arch.hvm.hvm_io.cache;
+    unsigned int i;
+
+    /* Cache unavailable? */
+    if ( cache->num_ents > cache->max_ents )
+        return false;
+
+    while ( size > sizeof(cache->ents->data) )
+    {
+        i = gpa & (sizeof(cache->ents->data) - 1)
+            ? -gpa & (sizeof(cache->ents->data) - 1)
+            : sizeof(cache->ents->data);
+        if ( !hvmemul_read_cache(v, gpa, buffer, i) )
+            return false;
+        gpa += i;
+        buffer += i;
+        size -= i;
+    }
+
+    for ( i = 0; i < cache->num_ents; ++i )
+        if ( cache->ents[i].gpa == gpa && cache->ents[i].size == size )
+        {
+            memcpy(buffer, &cache->ents[i].data, size);
+            return true;
+        }
+
+    return false;
+}
+
+void hvmemul_write_cache(const struct vcpu *v, paddr_t gpa,
+                         const void *buffer, unsigned int size)
+{
+    struct hvmemul_cache *cache = v->arch.hvm.hvm_io.cache;
+    unsigned int i;
+
+    /* Cache unavailable? */
+    if ( cache->num_ents > cache->max_ents )
+        return;
+
+    while ( size > sizeof(cache->ents->data) )
+    {
+        i = gpa & (sizeof(cache->ents->data) - 1)
+            ? -gpa & (sizeof(cache->ents->data) - 1)
+            : sizeof(cache->ents->data);
+        hvmemul_write_cache(v, gpa, buffer, i);
+        gpa += i;
+        buffer += i;
+        size -= i;
+    }
+
+    for ( i = 0; i < cache->num_ents; ++i )
+        if ( cache->ents[i].gpa == gpa && cache->ents[i].size == size )
+        {
+            memcpy(&cache->ents[i].data, buffer, size);
+            return;
+        }
+
+    if ( unlikely(i >= cache->max_ents) )
+    {
+        domain_crash(v->domain);
+        return;
+    }
+
+    cache->ents[i].gpa  = gpa;
+    cache->ents[i].size = size;
+
+    memcpy(&cache->ents[i].data, buffer, size);
+
+    cache->num_ents = i + 1;
+}
+
  /*
   * Local variables:
   * mode: C
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c

index 34b39e27df2244292bdc7f75b3a164eff54de44a..814b7020d8735d1159ef399e3b10bcfbef3f8e2c 100644 (file)
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -727,6 +727,8 @@ int hvm_domain_initialise(struct domain *d)
  /* This function and all its descendants need to be to be idempotent. */
  void hvm_domain_relinquish_resources(struct domain *d)
  {
+    struct vcpu *v;
+
      if ( hvm_funcs.domain_relinquish_resources )
          alternative_vcall(hvm_funcs.domain_relinquish_resources, d);
  
@@ -743,6 +745,9 @@ void hvm_domain_relinquish_resources(struct domain *d)
      rtc_deinit(d);
      pmtimer_deinit(d);
      hpet_deinit(d);
+
+    for_each_vcpu ( d, v )
+        hvmemul_cache_destroy(v);
  }
  
  void hvm_domain_destroy(struct domain *d)
@@ -1550,6 +1555,10 @@ int hvm_vcpu_initialise(struct vcpu *v)
  
      v->arch.hvm.inject_event.vector = HVM_EVENT_VECTOR_UNSET;
  
+    rc = hvmemul_cache_init(v);
+    if ( rc )
+        goto fail4;
+
      rc = setup_compat_arg_xlat(v); /* teardown: free_compat_arg_xlat() */
      if ( rc != 0 )
          goto fail4;
@@ -1585,6 +1594,7 @@ int hvm_vcpu_initialise(struct vcpu *v)
   fail5:
      free_compat_arg_xlat(v);
   fail4:
+    hvmemul_cache_destroy(v);
      hvm_funcs.vcpu_destroy(v);
   fail3:
      vlapic_destroy(v);
@@ -2946,6 +2956,7 @@ void hvm_task_switch(
      unsigned int eflags, new_cpl;
      pagefault_info_t pfinfo;
      int exn_raised, rc;
+    unsigned int token = hvmemul_cache_disable(v);
      struct tss32 tss;
  
      hvm_get_segment_register(v, x86_seg_gdtr, &gdt);
@@ -3153,6 +3164,8 @@ void hvm_task_switch(
   out:
      hvm_unmap_entry(optss_desc);
      hvm_unmap_entry(nptss_desc);
+
+    hvmemul_cache_restore(v, token);
  }
  
  enum hvm_translation_result hvm_translate_get_page(
@@ -3240,7 +3253,6 @@ static enum hvm_translation_result __hvm_copy(
      void *buf, paddr_t addr, unsigned int size, struct vcpu *v, unsigned int flags,
      uint32_t pfec, pagefault_info_t *pfinfo)
  {
-    char *p;
      ASSERT(is_hvm_vcpu(v));
  
      /*
@@ -3288,11 +3300,17 @@ static enum hvm_translation_result __hvm_copy(
              return HVMTRANS_need_retry;
          }
  
-        p = __map_domain_page(page) + pgoff;
-
-        if ( flags & HVMCOPY_to_guest )
+        if ( (flags & HVMCOPY_to_guest) ||
+             !hvmemul_read_cache(v, gfn_to_gaddr(gfn) | pgoff, buf, count) )
          {
-            if ( p2m_is_discard_write(p2mt) )
+            void *p = __map_domain_page(page) + pgoff;
+
+            if ( !(flags & HVMCOPY_to_guest) )
+            {
+                memcpy(buf, p, count);
+                hvmemul_write_cache(v, gfn_to_gaddr(gfn) | pgoff, buf, count);
+            }
+            else if ( p2m_is_discard_write(p2mt) )
              {
                  static unsigned long lastpage;
  
@@ -3309,13 +3327,9 @@ static enum hvm_translation_result __hvm_copy(
                      memset(p, 0, count);
                  paging_mark_pfn_dirty(v->domain, _pfn(gfn_x(gfn)));
              }
-        }
-        else
-        {
-            memcpy(buf, p, count);
-        }
  
-        unmap_domain_page(p);
+            unmap_domain_page(p);
+        }
  
          addr += count;
          if ( buf )
diff --git a/xen/arch/x86/hvm/hypercall.c b/xen/arch/x86/hvm/hypercall.c

index b4a0aeab50bdf5c3b9ab5db94f9204b75c06e17e..17ba0fe91bd738d4e73979d23472554745632819 100644 (file)
--- a/xen/arch/x86/hvm/hypercall.c
+++ b/xen/arch/x86/hvm/hypercall.c
@@ -22,6 +22,7 @@
  #include <xen/hypercall.h>
  #include <xen/nospec.h>
  
+#include <asm/hvm/emulate.h>
  #include <asm/hvm/support.h>
  #include <asm/hvm/viridian.h>
  
@@ -164,6 +165,7 @@ int hvm_hypercall(struct cpu_user_regs *regs)
      struct domain *currd = curr->domain;
      int mode = hvm_guest_x86_mode(curr);
      unsigned long eax = regs->eax;
+    unsigned int token;
  
      switch ( mode )
      {
@@ -188,7 +190,18 @@ int hvm_hypercall(struct cpu_user_regs *regs)
      }
  
      if ( (eax & 0x80000000) && is_viridian_domain(currd) )
-        return viridian_hypercall(regs);
+    {
+        int ret;
+
+        /* See comment below. */
+        token = hvmemul_cache_disable(curr);
+
+        ret = viridian_hypercall(regs);
+
+        hvmemul_cache_restore(curr, token);
+
+        return ret;
+    }
  
      BUILD_BUG_ON(ARRAY_SIZE(hvm_hypercall_table) >
                   ARRAY_SIZE(hypercall_args_table));
@@ -207,6 +220,12 @@ int hvm_hypercall(struct cpu_user_regs *regs)
          return HVM_HCALL_completed;
      }
  
+    /*
+     * Caching is intended for instruction emulation only. Disable it
+     * for any accesses by hypercall argument copy-in / copy-out.
+     */
+    token = hvmemul_cache_disable(curr);
+
      curr->hcall_preempted = false;
  
      if ( mode == 8 )
@@ -300,6 +319,8 @@ int hvm_hypercall(struct cpu_user_regs *regs)
  #endif
      }
  
+    hvmemul_cache_restore(curr, token);
+
      HVM_DBG_LOG(DBG_LEVEL_HCALL, "hcall%lu -> %lx", eax, regs->rax);
  
      if ( curr->hcall_preempted )
diff --git a/xen/arch/x86/hvm/intercept.c b/xen/arch/x86/hvm/intercept.c

index 0976a992adf3a89525f9614889c5ef5814b8755b..cd4c4c14b1e026fc998ef74c95c98eeb4d476968 100644 (file)
--- a/xen/arch/x86/hvm/intercept.c
+++ b/xen/arch/x86/hvm/intercept.c
@@ -20,6 +20,7 @@
  #include <xen/types.h>
  #include <xen/sched.h>
  #include <asm/regs.h>
+#include <asm/hvm/emulate.h>
  #include <asm/hvm/hvm.h>
  #include <asm/hvm/support.h>
  #include <asm/hvm/domain.h>
@@ -163,6 +164,9 @@ int hvm_process_io_intercept(const struct hvm_io_handler *handler,
          {
              if ( p->data_is_ptr )
              {
+                struct vcpu *curr = current;
+                unsigned int token = hvmemul_cache_disable(curr);
+
                  data = 0;
                  switch ( hvm_copy_from_guest_phys(&data, p->data + step * i,
                                                    p->size) )
@@ -179,9 +183,11 @@ int hvm_process_io_intercept(const struct hvm_io_handler *handler,
                      ASSERT_UNREACHABLE();
                      /* fall through */
                  default:
-                    domain_crash(current->domain);
+                    domain_crash(curr->domain);
                      return X86EMUL_UNHANDLEABLE;
                  }
+
+                hvmemul_cache_restore(curr, token);
              }
              else
                  data = p->data;
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c

index 888f504a9409e026c5883a71496d53bdb8a889a9..5950e4d52baeb9f6216565b3479c7121f80200f2 100644 (file)
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -1040,6 +1040,8 @@ void svm_vmenter_helper(const struct cpu_user_regs *regs)
      struct vcpu *curr = current;
      struct vmcb_struct *vmcb = curr->arch.hvm.svm.vmcb;
  
+    ASSERT(hvmemul_cache_disabled(curr));
+
      svm_asid_handle_vmrun();
  
      if ( unlikely(tb_init_done) )
diff --git a/xen/arch/x86/hvm/vmsi.c b/xen/arch/x86/hvm/vmsi.c

index 6597d9f71912abec5a5fb71295db9acb3403bc7d..5d4eddebeea88143e697d711a309ea24fa6fdfad 100644 (file)
--- a/xen/arch/x86/hvm/vmsi.c
+++ b/xen/arch/x86/hvm/vmsi.c
@@ -35,6 +35,7 @@
  #include <xen/irq.h>
  #include <xen/vpci.h>
  #include <public/hvm/ioreq.h>
+#include <asm/hvm/emulate.h>
  #include <asm/hvm/io.h>
  #include <asm/hvm/vpic.h>
  #include <asm/hvm/vlapic.h>
@@ -607,6 +608,7 @@ void msix_write_completion(struct vcpu *v)
      if ( !ctrl_address && snoop_addr &&
           v->arch.hvm.hvm_io.msix_snoop_gpa )
      {
+        unsigned int token = hvmemul_cache_disable(v);
          const struct msi_desc *desc;
          uint32_t data;
  
@@ -621,6 +623,8 @@ void msix_write_completion(struct vcpu *v)
                                        sizeof(data)) == HVMTRANS_okay &&
               !(data & PCI_MSIX_VECTOR_BITMASK) )
              ctrl_address = snoop_addr;
+
+        hvmemul_cache_restore(v, token);
      }
  
      if ( !ctrl_address )
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c

index 869339062b1183ccadceca405e5160bc4fe64531..b4cf2eb4c1c316f66fd0b0148750efd8ffe4479e 100644 (file)
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -4362,6 +4362,8 @@ bool vmx_vmenter_helper(const struct cpu_user_regs *regs)
      struct hvm_vcpu_asid *p_asid;
      bool_t need_flush;
  
+    ASSERT(hvmemul_cache_disabled(curr));
+
      /* Shadow EPTP can't be updated here because irqs are disabled */
       if ( nestedhvm_vcpu_in_guestmode(curr) && vcpu_nestedhvm(curr).stale_np2m )
           return false;
diff --git a/xen/include/asm-x86/hvm/emulate.h b/xen/include/asm-x86/hvm/emulate.h

index a2ad7ff1aaf24818a913b5b41495845d629aed79..148bab93ceacf9511a093e9388c007ca66c30cef 100644 (file)
--- a/xen/include/asm-x86/hvm/emulate.h
+++ b/xen/include/asm-x86/hvm/emulate.h
@@ -14,6 +14,7 @@
  
  #include <xen/err.h>
  #include <xen/mm.h>
+#include <xen/sched.h>
  #include <asm/hvm/hvm.h>
  #include <asm/x86_emulate.h>
  
@@ -98,6 +99,39 @@ int hvmemul_do_pio_buffer(uint16_t port,
                            uint8_t dir,
                            void *buffer);
  
+#ifdef CONFIG_HVM
+/*
+ * The cache controlled by the functions below is not like an ordinary CPU
+ * cache, i.e. aiming to help performance, but a "secret store" which is
+ * needed for correctness.  The issue it helps addressing is the need for
+ * re-execution of an insn (after data was provided by a device model) to
+ * observe the exact same memory state, i.e. to specifically not observe any
+ * updates which may have occurred in the meantime by other agents.
+ * Therefore this cache gets
+ * - enabled when emulation of an insn starts,
+ * - disabled across processing secondary things like a hypercall resulting
+ *   from insn emulation,
+ * - disabled again when an emulated insn is known to not require any
+ *   further re-execution.
+ */
+int __must_check hvmemul_cache_init(struct vcpu *v);
+static inline void hvmemul_cache_destroy(struct vcpu *v)
+{
+    XFREE(v->arch.hvm.hvm_io.cache);
+}
+bool hvmemul_read_cache(const struct vcpu *, paddr_t gpa,
+                        void *buffer, unsigned int size);
+void hvmemul_write_cache(const struct vcpu *, paddr_t gpa,
+                         const void *buffer, unsigned int size);
+unsigned int hvmemul_cache_disable(struct vcpu *);
+void hvmemul_cache_restore(struct vcpu *, unsigned int token);
+/* For use in ASSERT()s only: */
+static inline bool hvmemul_cache_disabled(struct vcpu *v)
+{
+    return hvmemul_cache_disable(v) == hvmemul_cache_disable(v);
+}
+#endif
+
  void hvm_dump_emulation_state(const char *loglvl, const char *prefix,
                                struct hvm_emulate_ctxt *hvmemul_ctxt, int rc);
  
diff --git a/xen/include/asm-x86/hvm/vcpu.h b/xen/include/asm-x86/hvm/vcpu.h

index 3d80cf5d76acd7b831b532136a583026a95d087c..5ccd0758152ed9cb4b4947c921e6fa88815ec511 100644 (file)
--- a/xen/include/asm-x86/hvm/vcpu.h
+++ b/xen/include/asm-x86/hvm/vcpu.h
@@ -76,6 +76,8 @@ struct hvm_vcpu_io {
      /* For retries we shouldn't re-fetch the instruction. */
      unsigned int mmio_insn_bytes;
      unsigned char mmio_insn[16];
+    struct hvmemul_cache *cache;
+
      /*
       * For string instruction emulation we need to be able to signal a
       * necessary retry through other than function return codes.
author	Jan Beulich <jbeulich@suse.com>
	Thu, 23 Apr 2020 07:55:00 +0000 (09:55 +0200)
committer	Jan Beulich <jbeulich@suse.com>
	Thu, 23 Apr 2020 07:55:00 +0000 (09:55 +0200)
xen/arch/x86/hvm/emulate.c		patch \| blob \| blame \| history
xen/arch/x86/hvm/hvm.c		patch \| blob \| blame \| history
xen/arch/x86/hvm/hypercall.c		patch \| blob \| blame \| history
xen/arch/x86/hvm/intercept.c		patch \| blob \| blame \| history
xen/arch/x86/hvm/svm/svm.c		patch \| blob \| blame \| history
xen/arch/x86/hvm/vmsi.c		patch \| blob \| blame \| history
xen/arch/x86/hvm/vmx/vmx.c		patch \| blob \| blame \| history
xen/include/asm-x86/hvm/emulate.h		patch \| blob \| blame \| history
xen/include/asm-x86/hvm/vcpu.h		patch \| blob \| blame \| history