]> xenbits.xensource.com Git - people/andrewcoop/xen.git/commitdiff
x86: Add Support for Paging-Write Feature
authorPetr Beneš <w1benny@gmail.com>
Thu, 2 Jan 2025 17:13:28 +0000 (17:13 +0000)
committerAndrew Cooper <andrew.cooper3@citrix.com>
Mon, 13 Jan 2025 16:50:29 +0000 (16:50 +0000)
This patch introduces a new XENMEM_access_r_pw permission.
Functionally, it is similar to XENMEM_access_r, but for processors
with TERTIARY_EXEC_EPT_PAGING_WRITE support (Intel 12th Gen/Alder Lake
and later, Xeon 4th Gen/Sappire Rapids and later), it also permits the
CPU to write to the page during guest page-table walks (e.g., updating
A/D bits) without triggering an EPT violation.

This behavior works by both enabling the EPT paging-write feature and
setting the EPT paging-write flag in the EPT leaf entry.

This feature provides a significant performance boost for
introspection tools that monitor guest page-table updates. Previously,
every page-table modification by the guest—including routine updates
like setting A/D bits—triggered an EPT violation, adding unnecessary
overhead. The new XENMEM_access_r_pw permission allows these
"uninteresting" updates to occur without EPT violations, improving
efficiency.

Additionally, this feature simplifies the handling of race conditions
in scenarios where an introspection tool:

- Sets an "invisible breakpoint" in the altp2m view for a function F.
- Monitors guest page-table updates to track whether the page
  containing F is paged out.
- Encounters a cleared Access (A) bit on the page containing F while
  the guest is about to execute the breakpoint.

In the current implementation:

- If xc_monitor_inguest_pagefault() is enabled, the introspection tool
  must emulate both the breakpoint and the setting of the Access bit.
- If xc_monitor_inguest_pagefault() is disabled, Xen handles the EPT
  violation without notifying the introspection tool, setting the
  Access bit and emulating the instruction. However, Xen fetches the
  instruction from the default view instead of the altp2m view,
  potentially causing the breakpoint to be missed.

With this patch, setting XENMEM_access_r_pw for monitored guest
page-tables prevents EPT violations in these cases. This change
enhances performance and reduces complexity for introspection tools,
ensuring seamless breakpoint handling while tracking guest page-table
updates.

Signed-off-by: Petr Beneš <w1benny@gmail.com>
Acked-by: Tamas K Lengyel <tamas@tklengyel.com>
Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
12 files changed:
xen/arch/arm/mem_access.c
xen/arch/arm/mmu/p2m.c
xen/arch/x86/hvm/hvm.c
xen/arch/x86/hvm/monitor.c
xen/arch/x86/hvm/vmx/vmcs.c
xen/arch/x86/include/asm/hvm/vmx/vmcs.h
xen/arch/x86/include/asm/p2m.h
xen/arch/x86/mm/hap/nested_hap.c
xen/arch/x86/mm/mem_access.c
xen/arch/x86/mm/p2m-ept.c
xen/include/public/memory.h
xen/include/xen/mem_access.h

index 0ec34623647e01a607edb8265f6abf163283c7b1..2af92bb402e07deae3f36e7ca0b7889d01677db1 100644 (file)
@@ -32,6 +32,7 @@ static int __p2m_get_mem_access(struct domain *d, gfn_t gfn,
             ACCESS(rwx),
             ACCESS(rx2rw),
             ACCESS(n2rwx),
+            ACCESS(r_pw),
 #undef ACCESS
     };
 
@@ -172,6 +173,7 @@ p2m_mem_access_check_and_get_page(vaddr_t gva, unsigned long flag,
             break;
         else
             goto err;
+    case XENMEM_access_r_pw:
     case XENMEM_access_rx2rw:
     case XENMEM_access_rx:
     case XENMEM_access_r:
@@ -253,6 +255,7 @@ bool p2m_mem_access_check(paddr_t gpa, vaddr_t gla, const struct npfec npfec)
         violation = npfec.read_access || npfec.insn_fetch;
         break;
     case XENMEM_access_r:
+    case XENMEM_access_r_pw:
         violation = npfec.write_access || npfec.insn_fetch;
         break;
     default:
@@ -361,6 +364,7 @@ long p2m_set_mem_access(struct domain *d, gfn_t gfn, uint32_t nr,
         ACCESS(rwx),
         ACCESS(rx2rw),
         ACCESS(n2rwx),
+        ACCESS(r_pw),
 #undef ACCESS
     };
 
index 28df6e5d034a5ee19768680ba10121767dc19ae2..7642dbc7c55bb7d228075d3882a149d14733c643 100644 (file)
@@ -597,6 +597,7 @@ static void p2m_set_permission(lpae_t *e, p2m_type_t t, p2m_access_t a)
         e->p2m.read = 0;
         break;
     case p2m_access_r:
+    case p2m_access_r_pw:
         e->p2m.write = 0;
         e->p2m.xn = 1;
         break;
index 922c9b3af64d9132022d37627c54af092275e9cf..39e39ce4ce36475baf6767d79e72d7b49e25e99a 100644 (file)
@@ -1897,6 +1897,7 @@ int hvm_hap_nested_page_fault(paddr_t gpa, unsigned long gla,
             violation = npfec.read_access || npfec.write_access || npfec.insn_fetch;
             break;
         case p2m_access_r:
+        case p2m_access_r_pw:
             violation = npfec.write_access || npfec.insn_fetch;
             break;
         case p2m_access_w:
index 74621000b2f333f103e66569abb518107312756a..523586ca980b9e6e04ee8185beb5d9e1e2c4e6e7 100644 (file)
@@ -295,6 +295,7 @@ bool hvm_monitor_check_p2m(unsigned long gla, gfn_t gfn, uint32_t pfec,
 
     case XENMEM_access_r:
     case XENMEM_access_n:
+    case XENMEM_access_r_pw:
         if ( pfec & PFEC_write_access )
             req.u.mem_access.flags |= MEM_ACCESS_R | MEM_ACCESS_W;
         if ( pfec & PFEC_insn_fetch )
index 147e998371a81280551f5587b83901b4b59c7e91..8c0ea789c1a318160d8b6b5922c651f521f6f3e4 100644 (file)
@@ -203,6 +203,7 @@ static void __init vmx_display_features(void)
     P(cpu_has_vmx_bus_lock_detection, "Bus Lock Detection");
     P(cpu_has_vmx_notify_vm_exiting, "Notify VM Exit");
     P(cpu_has_vmx_virt_spec_ctrl, "Virtualize SPEC_CTRL");
+    P(cpu_has_vmx_ept_paging_write, "EPT Paging-Write");
 #undef P
 
     if ( !printed )
@@ -366,7 +367,8 @@ static int vmx_init_vmcs_config(bool bsp)
 
     if ( _vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_TERTIARY_CONTROLS )
     {
-        uint64_t opt = TERTIARY_EXEC_VIRT_SPEC_CTRL;
+        uint64_t opt = (TERTIARY_EXEC_VIRT_SPEC_CTRL |
+                        TERTIARY_EXEC_EPT_PAGING_WRITE);
 
         _vmx_tertiary_exec_control = adjust_vmx_controls2(
             "Tertiary Exec Control", 0, opt,
index 939b87eb508f71191a540acd099b43b4e2d67340..e1d339814143270f06002a4855b7b2faa88539b3 100644 (file)
@@ -273,6 +273,9 @@ extern uint64_t vmx_tertiary_exec_control;
 #define cpu_has_vmx_virt_spec_ctrl \
      (vmx_tertiary_exec_control & TERTIARY_EXEC_VIRT_SPEC_CTRL)
 
+#define cpu_has_vmx_ept_paging_write \
+     (vmx_tertiary_exec_control & TERTIARY_EXEC_EPT_PAGING_WRITE)
+
 #define VMX_EPT_EXEC_ONLY_SUPPORTED                         0x00000001
 #define VMX_EPT_WALK_LENGTH_4_SUPPORTED                     0x00000040
 #define VMX_EPT_MEMORY_TYPE_UC                              0x00000100
index e6de37f1087be731367eaa47525b44b8f66a127e..aa1bf7c9d0b1a2d94259d1a241c2776016b2a3f1 100644 (file)
@@ -980,6 +980,7 @@ static inline unsigned int p2m_access_to_iommu_flags(p2m_access_t p2ma)
     case p2m_access_r:
     case p2m_access_rx:
     case p2m_access_rx2rw:
+    case p2m_access_r_pw:
         return IOMMUF_readable;
 
     case p2m_access_w:
index cc7bc6e5ea5c8c804623c0f4bf453290fa40a870..255fba7e1cacd269f5e68616dd617234ceca432f 100644 (file)
@@ -213,6 +213,9 @@ int nestedhvm_hap_nested_page_fault(
     case p2m_access_n2rwx:
         p2ma_10 = p2m_access_n;
         break;
+    case p2m_access_r_pw:
+        p2ma_10 = p2m_access_r;
+        break;
     default:
         p2ma_10 = p2m_access_n;
         /* For safety, remove all permissions. */
index 60a0cce68aa325c3bcdf543be154c23f375b43d8..21b5b7ecda05d09f3d304f5daccee7e8b63a857c 100644 (file)
@@ -45,6 +45,7 @@ static int _p2m_get_mem_access(struct p2m_domain *p2m, gfn_t gfn,
             ACCESS(rwx),
             ACCESS(rx2rw),
             ACCESS(n2rwx),
+            ACCESS(r_pw),
 #undef ACCESS
     };
 
@@ -94,6 +95,7 @@ bool p2m_mem_access_emulate_check(struct vcpu *v,
             break;
 
         case XENMEM_access_r:
+        case XENMEM_access_r_pw:
             violation = data->flags & MEM_ACCESS_WX;
             break;
 
@@ -312,6 +314,7 @@ bool xenmem_access_to_p2m_access(const struct p2m_domain *p2m,
         ACCESS(rwx),
         ACCESS(rx2rw),
         ACCESS(n2rwx),
+        ACCESS(r_pw),
 #undef ACCESS
     };
 
index 21728397f9acb30ce1f438e8fa614b33319a18f2..0cf6818c13f015ab1176b9ec696f74e06eeaec82 100644 (file)
@@ -149,12 +149,16 @@ static void ept_p2m_type_to_flags(const struct p2m_domain *p2m,
     }
 
     /* Then restrict with access permissions */
+    entry->pw = 0;
     switch ( entry->access )
     {
         case p2m_access_n:
         case p2m_access_n2rwx:
             entry->r = entry->w = entry->x = 0;
             break;
+        case p2m_access_r_pw:
+            entry->pw = !!cpu_has_vmx_ept_paging_write;
+            fallthrough;
         case p2m_access_r:
             entry->w = entry->x = 0;
             break;
index 5e545ae9a418a4a92d746b556b89373b72a5f37e..bd9fc37b5297dde4239ea1cc133d1d8d362ac425 100644 (file)
@@ -426,6 +426,15 @@ typedef enum {
      * pausing the vcpu
      */
     XENMEM_access_n2rwx,
+
+    /*
+     * Same as XENMEM_access_r, but on processors with
+     * the TERTIARY_EXEC_EPT_PAGING_WRITE support,
+     * CPU-initiated page-table walks can still
+     * write to it (e.g., update A/D bits)
+     */
+    XENMEM_access_r_pw,
+
     /* Take the domain default */
     XENMEM_access_default
 } xenmem_access_t;
index 87d93b31f6abed1d42ae73764e242410512c614d..2231341b5d5ee7869a382715e019db6c8c79b63b 100644 (file)
@@ -64,6 +64,12 @@ typedef enum {
                            * generates an event but does not pause the
                            * vcpu */
 
+    p2m_access_r_pw = 10, /* Special: same as R, but on processors with
+                           * the TERTIARY_EXEC_EPT_PAGING_WRITE support,
+                           * CPU-initiated page-table walks can still
+                           * write to it (e.g., update A/D bits)
+                           */
+
     /* NOTE: Assumed to be only 4 bits right now on x86. */
 } p2m_access_t;