]> xenbits.xensource.com Git - people/iwj/xen.git/commitdiff
x86/xpti: avoid copying L4 page table contents when possible
authorJuergen Gross <jgross@suse.com>
Thu, 26 Apr 2018 11:33:10 +0000 (13:33 +0200)
committerWei Liu <wei.liu2@citrix.com>
Fri, 4 May 2018 14:52:03 +0000 (15:52 +0100)
For mitigation of Meltdown the current L4 page table is copied to the
cpu local root page table each time a 64 bit pv guest is entered.

Copying can be avoided in cases where the guest L4 page table hasn't
been modified while running the hypervisor, e.g. when handling
interrupts or any hypercall not modifying the L4 page table or %cr3.

So add a per-cpu flag indicating whether the copying should be
performed and set that flag only when loading a new %cr3 or modifying
the L4 page table.  This includes synchronization of the cpu local
root page table with other cpus, so add a special synchronization flag
for that case.

A simple performance check (compiling the hypervisor via "make -j 4")
in dom0 with 4 vcpus shows a significant improvement:

- real time drops from 112 seconds to 103 seconds
- system time drops from 142 seconds to 131 seconds

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
xen/arch/x86/flushtlb.c
xen/arch/x86/mm.c
xen/arch/x86/mm/shadow/multi.c
xen/arch/x86/pv/domain.c
xen/arch/x86/smp.c
xen/arch/x86/x86_64/asm-offsets.c
xen/arch/x86/x86_64/entry.S
xen/include/asm-x86/current.h
xen/include/asm-x86/flushtlb.h

index 8a7a76b8ff69445c15e0d07d779bff7c55a439d8..2729ba42e7d44fad769b366a8b3831766632bf73 100644 (file)
@@ -8,6 +8,7 @@
  */
 
 #include <xen/sched.h>
+#include <xen/smp.h>
 #include <xen/softirq.h>
 #include <asm/flushtlb.h>
 #include <asm/page.h>
@@ -160,5 +161,8 @@ unsigned int flush_area_local(const void *va, unsigned int flags)
 
     local_irq_restore(irqfl);
 
+    if ( flags & FLUSH_ROOT_PGTBL )
+        get_cpu_info()->root_pgt_changed = true;
+
     return flags;
 }
index 53b36d8889c621c70fb0b6e4eb8d6fcb395839db..5373879359a6800c4f1debfbaa4169c803dfeea5 100644 (file)
@@ -502,6 +502,7 @@ void make_cr3(struct vcpu *v, mfn_t mfn)
 
 void write_ptbase(struct vcpu *v)
 {
+    get_cpu_info()->root_pgt_changed = true;
     write_cr3(v->arch.cr3);
 }
 
@@ -3699,18 +3700,27 @@ long do_mmu_update(
                         break;
                     rc = mod_l4_entry(va, l4e_from_intpte(req.val), mfn,
                                       cmd == MMU_PT_UPDATE_PRESERVE_AD, v);
-                    /*
-                     * No need to sync if all uses of the page can be accounted
-                     * to the page lock we hold, its pinned status, and uses on
-                     * this (v)CPU.
-                     */
-                    if ( !rc && !cpu_has_no_xpti &&
-                         ((page->u.inuse.type_info & PGT_count_mask) >
-                          (1 + !!(page->u.inuse.type_info & PGT_pinned) +
-                           (pagetable_get_pfn(curr->arch.guest_table) == mfn) +
-                           (pagetable_get_pfn(curr->arch.guest_table_user) ==
-                            mfn))) )
-                        sync_guest = true;
+                    if ( !rc && !cpu_has_no_xpti )
+                    {
+                        bool local_in_use = false;
+
+                        if ( pagetable_get_pfn(curr->arch.guest_table) == mfn )
+                        {
+                            local_in_use = true;
+                            get_cpu_info()->root_pgt_changed = true;
+                        }
+
+                        /*
+                         * No need to sync if all uses of the page can be
+                         * accounted to the page lock we hold, its pinned
+                         * status, and uses on this (v)CPU.
+                         */
+                        if ( (page->u.inuse.type_info & PGT_count_mask) >
+                             (1 + !!(page->u.inuse.type_info & PGT_pinned) +
+                              (pagetable_get_pfn(curr->arch.guest_table_user) ==
+                               mfn) + local_in_use) )
+                            sync_guest = true;
+                    }
                     break;
 
                 case PGT_writable_page:
@@ -3825,7 +3835,7 @@ long do_mmu_update(
 
         cpumask_andnot(mask, pt_owner->dirty_cpumask, cpumask_of(cpu));
         if ( !cpumask_empty(mask) )
-            flush_mask(mask, FLUSH_TLB_GLOBAL);
+            flush_mask(mask, FLUSH_TLB_GLOBAL | FLUSH_ROOT_PGTBL);
     }
 
     perfc_add(num_page_updates, i);
index 9c3af330eceaa4e4deb18475bc49d492c8a1b0ef..da586c21c741364d746653a15a34324112aa95e0 100644 (file)
@@ -952,6 +952,8 @@ static int shadow_set_l4e(struct domain *d,
 
     /* Write the new entry */
     shadow_write_entries(sl4e, &new_sl4e, 1, sl4mfn);
+    flush_root_pgtbl_domain(d);
+
     flags |= SHADOW_SET_CHANGED;
 
     if ( shadow_l4e_get_flags(old_sl4e) & _PAGE_PRESENT )
@@ -966,6 +968,7 @@ static int shadow_set_l4e(struct domain *d,
         }
         sh_put_ref(d, osl3mfn, paddr);
     }
+
     return flags;
 }
 
index ac65ba4609f92dc145cd23e389ddf586e8e5837a..b1c40373faa62f4bb8e8044f222e7e60a31c3c6d 100644 (file)
@@ -217,6 +217,8 @@ static void _toggle_guest_pt(struct vcpu *v)
 {
     v->arch.flags ^= TF_kernel_mode;
     update_cr3(v);
+    get_cpu_info()->root_pgt_changed = true;
+
     /* Don't flush user global mappings from the TLB. Don't tick TLB clock. */
     asm volatile ( "mov %0, %%cr3" : : "r" (v->arch.cr3) : "memory" );
 
index 033dd059580b821661afd66c214a14b01371b626..63e819ca3837fda1fc6df893bcfa3b9d0ea97345 100644 (file)
@@ -208,7 +208,7 @@ void invalidate_interrupt(struct cpu_user_regs *regs)
     ack_APIC_irq();
     perfc_incr(ipis);
     if ( (flags & FLUSH_VCPU_STATE) && __sync_local_execstate() )
-        flags &= ~(FLUSH_TLB | FLUSH_TLB_GLOBAL);
+        flags &= ~(FLUSH_TLB | FLUSH_TLB_GLOBAL | FLUSH_ROOT_PGTBL);
     if ( flags & ~(FLUSH_VCPU_STATE | FLUSH_ORDER_MASK) )
         flush_area_local(flush_va, flags);
     cpumask_clear_cpu(smp_processor_id(), &flush_cpumask);
index a2fea94f4c220215bf6904140e6cf9978b16cc5a..9e2aefb00f5d625c04023726f82a64841ccae4b1 100644 (file)
@@ -143,6 +143,7 @@ void __dummy__(void)
     OFFSET(CPUINFO_shadow_spec_ctrl, struct cpu_info, shadow_spec_ctrl);
     OFFSET(CPUINFO_use_shadow_spec_ctrl, struct cpu_info, use_shadow_spec_ctrl);
     OFFSET(CPUINFO_bti_ist_info, struct cpu_info, bti_ist_info);
+    OFFSET(CPUINFO_root_pgt_changed, struct cpu_info, root_pgt_changed);
     DEFINE(CPUINFO_sizeof, sizeof(struct cpu_info));
     BLANK();
 
index b52721dfec3050001e7eb0ba9f77346ef53a6355..2fc0810b1ad1601544b1cdd72efd50fec31487fc 100644 (file)
@@ -129,11 +129,15 @@ restore_all_guest:
         mov   VCPU_cr3(%rbx), %r9
         GET_STACK_END(dx)
         mov   STACK_CPUINFO_FIELD(pv_cr3)(%rdx), %rdi
+        test  %rdi, %rdi
+        jz    .Lrag_keep_cr3
+        mov   %rdi, %rax
+        cmpb  $0, STACK_CPUINFO_FIELD(root_pgt_changed)(%rdx)
+        je    .Lrag_copy_done
+        movb  $0, STACK_CPUINFO_FIELD(root_pgt_changed)(%rdx)
         movabs $PADDR_MASK & PAGE_MASK, %rsi
         movabs $DIRECTMAP_VIRT_START, %rcx
-        mov   %rdi, %rax
         and   %rsi, %rdi
-        jz    .Lrag_keep_cr3
         and   %r9, %rsi
         add   %rcx, %rdi
         add   %rcx, %rsi
@@ -148,6 +152,7 @@ restore_all_guest:
         sub   $(ROOT_PAGETABLE_FIRST_XEN_SLOT - \
                 ROOT_PAGETABLE_LAST_XEN_SLOT - 1) * 8, %rdi
         rep movsq
+.Lrag_copy_done:
         mov   STACK_CPUINFO_FIELD(cr4)(%rdx), %rdi
         mov   %r9, STACK_CPUINFO_FIELD(xen_cr3)(%rdx)
         mov   %rdi, %rsi
index 3a0e1eef36426a55aa00732f4076f9e46cb154f6..f2491b442346c2920bcb8bafa8d72a05509012b1 100644 (file)
@@ -59,6 +59,14 @@ struct cpu_info {
     bool         use_shadow_spec_ctrl;
     uint8_t      bti_ist_info;
 
+    /*
+     * The following field controls copying of the L4 page table of 64-bit
+     * PV guests to the per-cpu root page table on entering the guest context.
+     * If set the L4 page table is being copied to the root page table and
+     * the field will be reset.
+     */
+    bool         root_pgt_changed;
+
     unsigned long __pad;
     /* get_stack_bottom() must be 16-byte aligned */
 };
index 2cade9cbfbd6126503f59e724843d89763735e04..9b2336c07afc8d9afd71767e8afaac6a854df954 100644 (file)
@@ -103,6 +103,8 @@ void write_cr3(unsigned long cr3);
 #define FLUSH_VA_VALID   0x800
  /* Flush CPU state */
 #define FLUSH_VCPU_STATE 0x1000
+ /* Flush the per-cpu root page table */
+#define FLUSH_ROOT_PGTBL 0x2000
 
 /* Flush local TLBs/caches. */
 unsigned int flush_area_local(const void *va, unsigned int flags);
@@ -134,6 +136,12 @@ void flush_area_mask(const cpumask_t *, const void *va, unsigned int flags);
 #define flush_tlb_one_all(v)                    \
     flush_tlb_one_mask(&cpu_online_map, v)
 
+#define flush_root_pgtbl_domain(d)                                       \
+{                                                                        \
+    if ( !cpu_has_no_xpti && is_pv_domain(d) && !is_pv_32bit_domain(d) ) \
+        flush_mask((d)->dirty_cpumask, FLUSH_ROOT_PGTBL);                \
+}
+
 static inline void flush_page_to_ram(unsigned long mfn, bool sync_icache) {}
 static inline int invalidate_dcache_va_range(const void *p,
                                              unsigned long size)