]> xenbits.xensource.com Git - xen.git/commitdiff
x86/xpti: avoid copying L4 page table contents when possible
authorJuergen Gross <jgross@suse.com>
Tue, 29 May 2018 08:42:44 +0000 (10:42 +0200)
committerJan Beulich <jbeulich@suse.com>
Tue, 29 May 2018 08:42:44 +0000 (10:42 +0200)
For mitigation of Meltdown the current L4 page table is copied to the
cpu local root page table each time a 64 bit pv guest is entered.

Copying can be avoided in cases where the guest L4 page table hasn't
been modified while running the hypervisor, e.g. when handling
interrupts or any hypercall not modifying the L4 page table or %cr3.

So add a per-cpu flag indicating whether the copying should be
performed and set that flag only when loading a new %cr3 or modifying
the L4 page table.  This includes synchronization of the cpu local
root page table with other cpus, so add a special synchronization flag
for that case.

A simple performance check (compiling the hypervisor via "make -j 4")
in dom0 with 4 vcpus shows a significant improvement:

- real time drops from 112 seconds to 103 seconds
- system time drops from 142 seconds to 131 seconds

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
xen/arch/x86/flushtlb.c
xen/arch/x86/mm.c
xen/arch/x86/mm/shadow/multi.c
xen/arch/x86/smp.c
xen/arch/x86/x86_64/asm-offsets.c
xen/arch/x86/x86_64/entry.S
xen/arch/x86/x86_64/traps.c
xen/include/asm-x86/current.h
xen/include/asm-x86/flushtlb.h

index d74874c16a458f199b738d4ab2081f01c45b6e08..f0d2bc1d7a86e245dd9f6611c93425bd219be069 100644 (file)
@@ -9,6 +9,7 @@
 
 #include <xen/config.h>
 #include <xen/sched.h>
+#include <xen/smp.h>
 #include <xen/softirq.h>
 #include <asm/flushtlb.h>
 #include <asm/page.h>
@@ -161,5 +162,8 @@ unsigned int flush_area_local(const void *va, unsigned int flags)
 
     local_irq_restore(irqfl);
 
+    if ( flags & FLUSH_ROOT_PGTBL )
+        get_cpu_info()->root_pgt_changed = 1;
+
     return flags;
 }
index ec323ecc7e478b10f78653c08f3bcfcdf44a5eb0..0ed878a015cd3a93d21b0cfb91b999097c5c0e7f 100644 (file)
@@ -499,6 +499,7 @@ void make_cr3(struct vcpu *v, unsigned long mfn)
 
 void write_ptbase(struct vcpu *v)
 {
+    get_cpu_info()->root_pgt_changed = 1;
     write_cr3(v->arch.cr3);
 }
 
@@ -4006,18 +4007,27 @@ long do_mmu_update(
                 case PGT_l4_page_table:
                     rc = mod_l4_entry(va, l4e_from_intpte(req.val), mfn,
                                       cmd == MMU_PT_UPDATE_PRESERVE_AD, v);
-                    /*
-                     * No need to sync if all uses of the page can be accounted
-                     * to the page lock we hold, its pinned status, and uses on
-                     * this (v)CPU.
-                     */
-                    if ( !rc && !cpu_has_no_xpti &&
-                         ((page->u.inuse.type_info & PGT_count_mask) >
-                          (1 + !!(page->u.inuse.type_info & PGT_pinned) +
-                           (pagetable_get_pfn(curr->arch.guest_table) == mfn) +
-                           (pagetable_get_pfn(curr->arch.guest_table_user) ==
-                            mfn))) )
-                        sync_guest = 1;
+                    if ( !rc && !cpu_has_no_xpti )
+                    {
+                        bool_t local_in_use = 0;
+
+                        if ( pagetable_get_pfn(curr->arch.guest_table) == mfn )
+                        {
+                            local_in_use = 1;
+                            get_cpu_info()->root_pgt_changed = 1;
+                        }
+
+                        /*
+                         * No need to sync if all uses of the page can be
+                         * accounted to the page lock we hold, its pinned
+                         * status, and uses on this (v)CPU.
+                         */
+                        if ( (page->u.inuse.type_info & PGT_count_mask) >
+                             (1 + !!(page->u.inuse.type_info & PGT_pinned) +
+                              (pagetable_get_pfn(curr->arch.guest_table_user) ==
+                               mfn) + local_in_use) )
+                            sync_guest = 1;
+                    }
                     break;
                 case PGT_writable_page:
                     perfc_incr(writable_mmu_updates);
@@ -4126,7 +4136,8 @@ long do_mmu_update(
          * Force other vCPU-s of the affected guest to pick up L4 entry
          * changes (if any).
          */
-        flush_mask(pt_owner->domain_dirty_cpumask, FLUSH_TLB_GLOBAL);
+        flush_mask(pt_owner->domain_dirty_cpumask,
+                   FLUSH_TLB_GLOBAL | FLUSH_ROOT_PGTBL);
     }
 
     perfc_add(num_page_updates, i);
index 4831d51c1cb024ed4834d04b8915b4c2d591728c..cb24a0fdef5ae4193201b84f950020441ad1b556 100644 (file)
@@ -939,6 +939,8 @@ static int shadow_set_l4e(struct domain *d,
 
     /* Write the new entry */
     shadow_write_entries(sl4e, &new_sl4e, 1, sl4mfn);
+    flush_root_pgtbl_domain(d);
+
     flags |= SHADOW_SET_CHANGED;
 
     if ( shadow_l4e_get_flags(old_sl4e) & _PAGE_PRESENT )
@@ -953,6 +955,7 @@ static int shadow_set_l4e(struct domain *d,
         }
         sh_put_ref(d, osl3mfn, paddr);
     }
+
     return flags;
 }
 
index 4abb16f3bbf6387fa74b7fa5c81a728cb62adf10..01f0ad4ae44fe5b99c09717d030593fc3cbce490 100644 (file)
@@ -209,7 +209,7 @@ void invalidate_interrupt(struct cpu_user_regs *regs)
     ack_APIC_irq();
     perfc_incr(ipis);
     if ( __sync_local_execstate() )
-        flags &= ~(FLUSH_TLB | FLUSH_TLB_GLOBAL);
+        flags &= ~(FLUSH_TLB | FLUSH_TLB_GLOBAL | FLUSH_ROOT_PGTBL);
     flush_area_local(flush_va, flags);
     cpumask_clear_cpu(smp_processor_id(), &flush_cpumask);
 }
index b1fc806b697a15f860ce7ec6db3fcfc17e86a61b..1b88c36c4089ebae74b7e3a768f53a3fe0af3856 100644 (file)
@@ -144,6 +144,7 @@ void __dummy__(void)
     OFFSET(CPUINFO_shadow_spec_ctrl, struct cpu_info, shadow_spec_ctrl);
     OFFSET(CPUINFO_xen_spec_ctrl, struct cpu_info, xen_spec_ctrl);
     OFFSET(CPUINFO_spec_ctrl_flags, struct cpu_info, spec_ctrl_flags);
+    OFFSET(CPUINFO_root_pgt_changed, struct cpu_info, root_pgt_changed);
     DEFINE(CPUINFO_sizeof, sizeof(struct cpu_info));
     BLANK();
 
index 1c4f0149f750b741abdcab8b31dbda36a03e8310..26b8ab0c5435fbae40608280713a64021b2c01dd 100644 (file)
@@ -46,11 +46,15 @@ restore_all_guest:
         mov   VCPU_cr3(%rbx), %r9
         GET_STACK_END(dx)
         mov   STACK_CPUINFO_FIELD(pv_cr3)(%rdx), %rdi
+        test  %rdi, %rdi
+        jz    .Lrag_keep_cr3
+        mov   %rdi, %rax
+        cmpb  $0, STACK_CPUINFO_FIELD(root_pgt_changed)(%rdx)
+        je    .Lrag_copy_done
+        movb  $0, STACK_CPUINFO_FIELD(root_pgt_changed)(%rdx)
         movabs $PADDR_MASK & PAGE_MASK, %rsi
         movabs $DIRECTMAP_VIRT_START, %rcx
-        mov   %rdi, %rax
         and   %rsi, %rdi
-        jz    .Lrag_keep_cr3
         and   %r9, %rsi
         add   %rcx, %rdi
         add   %rcx, %rsi
@@ -65,6 +69,7 @@ restore_all_guest:
         sub   $(ROOT_PAGETABLE_FIRST_XEN_SLOT - \
                 ROOT_PAGETABLE_LAST_XEN_SLOT - 1) * 8, %rdi
         rep movsq
+.Lrag_copy_done:
         mov   STACK_CPUINFO_FIELD(cr4)(%rdx), %rdi
         mov   %r9, STACK_CPUINFO_FIELD(xen_cr3)(%rdx)
         mov   %rdi, %rsi
index bf8dfcbdee128cc537849cdb40e197ae165ff424..4e6c68128f43fc37d59f41f39d241aec8326fba7 100644 (file)
@@ -284,6 +284,8 @@ void toggle_guest_pt(struct vcpu *v)
 
     v->arch.flags ^= TF_kernel_mode;
     update_cr3(v);
+    get_cpu_info()->root_pgt_changed = 1;
+
     /* Don't flush user global mappings from the TLB. Don't tick TLB clock. */
     asm volatile ( "mov %0, %%cr3" : : "r" (v->arch.cr3) : "memory" );
 
index 43aac0b8b61ffcaf49242f1455d49548fe2ec837..e4c6b0224a114f66c2f7b8ff7065b10844c51f03 100644 (file)
@@ -60,6 +60,14 @@ struct cpu_info {
     uint8_t      xen_spec_ctrl;
     uint8_t      spec_ctrl_flags;
 
+    /*
+     * The following field controls copying of the L4 page table of 64-bit
+     * PV guests to the per-cpu root page table on entering the guest context.
+     * If set the L4 page table is being copied to the root page table and
+     * the field will be reset.
+     */
+    bool_t       root_pgt_changed;
+
     unsigned long __pad;
     /* get_stack_bottom() must be 16-byte aligned */
 };
index 02f7b569d4c9a2a76bc73947c4df8c194c2b2221..1be6a72306c2d04f7f45a16c3d4cd7bfd0c51c97 100644 (file)
@@ -101,6 +101,8 @@ void write_cr3(unsigned long cr3);
 #define FLUSH_CACHE      0x400
  /* VA for the flush has a valid mapping */
 #define FLUSH_VA_VALID   0x800
+ /* Flush the per-cpu root page table */
+#define FLUSH_ROOT_PGTBL 0x2000
 
 /* Flush local TLBs/caches. */
 unsigned int flush_area_local(const void *va, unsigned int flags);
@@ -132,6 +134,12 @@ void flush_area_mask(const cpumask_t *, const void *va, unsigned int flags);
 #define flush_tlb_one_all(v)                    \
     flush_tlb_one_mask(&cpu_online_map, v)
 
+#define flush_root_pgtbl_domain(d)                                       \
+{                                                                        \
+    if ( !cpu_has_no_xpti && is_pv_domain(d) && !is_pv_32bit_domain(d) ) \
+        flush_mask((d)->domain_dirty_cpumask, FLUSH_ROOT_PGTBL);         \
+}
+
 static inline void flush_page_to_ram(unsigned long mfn) {}
 static inline int invalidate_dcache_va_range(const void *p,
                                              unsigned long size)