]> xenbits.xensource.com Git - people/liuw/freebsd.git/commitdiff
Intel SDM before revision 56 described the CLFLUSH instruction as only
authorkib <kib@FreeBSD.org>
Sat, 24 Oct 2015 21:37:47 +0000 (21:37 +0000)
committerkib <kib@FreeBSD.org>
Sat, 24 Oct 2015 21:37:47 +0000 (21:37 +0000)
ordered with the MFENCE instruction.  Similar weak guarantees are also
specified by the AMD APM vol. 3 rev. 3.22.  x86 pmap methods
pmap_invalidate_cache_range() and pmap_invalidate_cache_pages() braced
CLFLUSH loop with MFENCE both before and after the loop.

In the revision 56 of SDM, Intel stated that all existing
implementations of CLFLUSH are strict, CLFLUSH instructions execution
is ordered WRT other CLFLUSH and writes.  Also, the strict behaviour
is made architectural.

A new instruction CLFLUSHOPT (which was documented for some time in
the Instruction Set Extensions Programming Reference) provides the
weak behaviour which was previously attributed to CLFLUSH.

Use CLFLUSHOPT when available.  When CLFLUSH is used on Intel CPUs, do
not execute MFENCE before and after the flushing loop.

Reviewed by: alc
Sponsored by: The FreeBSD Foundation

sys/amd64/amd64/initcpu.c
sys/amd64/amd64/pmap.c
sys/i386/i386/initcpu.c
sys/i386/i386/pmap.c

index 36f2d0f027b2922a3ea0704c002ad7f142886855..f711b12c6dbf8a319f54b408352790929a27b860 100644 (file)
@@ -211,12 +211,17 @@ initializecpucache(void)
         * CPUID_SS feature even though the native CPU supports it.
         */
        TUNABLE_INT_FETCH("hw.clflush_disable", &hw_clflush_disable);
-       if (vm_guest != VM_GUEST_NO && hw_clflush_disable == -1)
+       if (vm_guest != VM_GUEST_NO && hw_clflush_disable == -1) {
                cpu_feature &= ~CPUID_CLFSH;
+               cpu_stdext_feature &= ~CPUID_STDEXT_CLFLUSHOPT;
+       }
+
        /*
-        * Allow to disable CLFLUSH feature manually by
-        * hw.clflush_disable tunable.
+        * The kernel's use of CLFLUSH{,OPT} can be disabled manually
+        * by setting the hw.clflush_disable tunable.
         */
-       if (hw_clflush_disable == 1)
+       if (hw_clflush_disable == 1) {
                cpu_feature &= ~CPUID_CLFSH;
+               cpu_stdext_feature &= ~CPUID_STDEXT_CLFLUSHOPT;
+       }
 }
index cc1b73efdae530e555746f47c080d1125272d83b..94a7d0c9640142810b158ab183832293ac795c96 100644 (file)
@@ -1710,9 +1710,8 @@ pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva, boolean_t force)
 
        if ((cpu_feature & CPUID_SS) != 0 && !force)
                ; /* If "Self Snoop" is supported and allowed, do nothing. */
-       else if ((cpu_feature & CPUID_CLFSH) != 0 &&
+       else if ((cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0 &&
            eva - sva < PMAP_CLFLUSH_THRESHOLD) {
-
                /*
                 * XXX: Some CPUs fault, hang, or trash the local APIC
                 * registers if we use CLFLUSH on the local APIC
@@ -1731,8 +1730,21 @@ pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva, boolean_t force)
                 */
                mfence();
                for (; sva < eva; sva += cpu_clflush_line_size)
-                       clflush(sva);
+                       clflushopt(sva);
                mfence();
+       } else if ((cpu_feature & CPUID_CLFSH) != 0 &&
+           eva - sva < PMAP_CLFLUSH_THRESHOLD) {
+               if (pmap_kextract(sva) == lapic_paddr)
+                       return;
+               /*
+                * Writes are ordered by CLFLUSH on Intel CPUs.
+                */
+               if (cpu_vendor_id != CPU_VENDOR_INTEL)
+                       mfence();
+               for (; sva < eva; sva += cpu_clflush_line_size)
+                       clflush(sva);
+               if (cpu_vendor_id != CPU_VENDOR_INTEL)
+                       mfence();
        } else {
 
                /*
@@ -1756,19 +1768,27 @@ pmap_invalidate_cache_pages(vm_page_t *pages, int count)
 {
        vm_offset_t daddr, eva;
        int i;
+       bool useclflushopt;
 
+       useclflushopt = (cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0;
        if (count >= PMAP_CLFLUSH_THRESHOLD / PAGE_SIZE ||
-           (cpu_feature & CPUID_CLFSH) == 0)
+           ((cpu_feature & CPUID_CLFSH) == 0 && !useclflushopt))
                pmap_invalidate_cache();
        else {
-               mfence();
+               if (useclflushopt || cpu_vendor_id != CPU_VENDOR_INTEL)
+                       mfence();
                for (i = 0; i < count; i++) {
                        daddr = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pages[i]));
                        eva = daddr + PAGE_SIZE;
-                       for (; daddr < eva; daddr += cpu_clflush_line_size)
-                               clflush(daddr);
+                       for (; daddr < eva; daddr += cpu_clflush_line_size) {
+                               if (useclflushopt)
+                                       clflushopt(daddr);
+                               else
+                                       clflush(daddr);
+                       }
                }
-               mfence();
+               if (useclflushopt || cpu_vendor_id != CPU_VENDOR_INTEL)
+                       mfence();
        }
 }
 
index 705d915728fb6670d593ef7a3c0716c744c4af82..a4a49eae1627821a49fea59136be2f45786c64bc 100644 (file)
@@ -826,14 +826,18 @@ initializecpucache(void)
         * CPUID_SS feature even though the native CPU supports it.
         */
        TUNABLE_INT_FETCH("hw.clflush_disable", &hw_clflush_disable);
-       if (vm_guest != VM_GUEST_NO && hw_clflush_disable == -1)
+       if (vm_guest != VM_GUEST_NO && hw_clflush_disable == -1) {
                cpu_feature &= ~CPUID_CLFSH;
+               cpu_stdext_feature &= ~CPUID_STDEXT_CLFLUSHOPT;
+       }
        /*
-        * Allow to disable CLFLUSH feature manually by
-        * hw.clflush_disable tunable.
+        * The kernel's use of CLFLUSH{,OPT} can be disabled manually
+        * by setting the hw.clflush_disable tunable.
         */
-       if (hw_clflush_disable == 1)
+       if (hw_clflush_disable == 1) {
                cpu_feature &= ~CPUID_CLFSH;
+               cpu_stdext_feature &= ~CPUID_STDEXT_CLFLUSHOPT;
+       }
 
 #if defined(PC98) && !defined(CPU_UPGRADE_HW_CACHE)
        /*
index 270845b5516cb3aed992aca4be40e17088e412a3..7fffdfc254a3728bf48d98ddff1bd297b4d1f1b5 100644 (file)
@@ -1234,9 +1234,8 @@ pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva, boolean_t force)
 
        if ((cpu_feature & CPUID_SS) != 0 && !force)
                ; /* If "Self Snoop" is supported and allowed, do nothing. */
-       else if ((cpu_feature & CPUID_CLFSH) != 0 &&
+       else if ((cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0 &&
            eva - sva < PMAP_CLFLUSH_THRESHOLD) {
-
 #ifdef DEV_APIC
                /*
                 * XXX: Some CPUs fault, hang, or trash the local APIC
@@ -1256,8 +1255,21 @@ pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva, boolean_t force)
                 */
                mfence();
                for (; sva < eva; sva += cpu_clflush_line_size)
-                       clflush(sva);
+                       clflushopt(sva);
                mfence();
+       } else if ((cpu_feature & CPUID_CLFSH) != 0 &&
+           eva - sva < PMAP_CLFLUSH_THRESHOLD) {
+               if (pmap_kextract(sva) == lapic_paddr)
+                       return;
+               /*
+                * Writes are ordered by CLFLUSH on Intel CPUs.
+                */
+               if (cpu_vendor_id != CPU_VENDOR_INTEL)
+                       mfence();
+               for (; sva < eva; sva += cpu_clflush_line_size)
+                       clflush(sva);
+               if (cpu_vendor_id != CPU_VENDOR_INTEL)
+                       mfence();
        } else {
 
                /*
@@ -5224,8 +5236,10 @@ pmap_flush_page(vm_page_t m)
 {
        struct sysmaps *sysmaps;
        vm_offset_t sva, eva;
+       bool useclflushopt;
 
-       if ((cpu_feature & CPUID_CLFSH) != 0) {
+       useclflushopt = (cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0;
+       if (useclflushopt || (cpu_feature & CPUID_CLFSH) != 0) {
                sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
                mtx_lock(&sysmaps->lock);
                if (*sysmaps->CMAP2)
@@ -5239,13 +5253,20 @@ pmap_flush_page(vm_page_t m)
 
                /*
                 * Use mfence despite the ordering implied by
-                * mtx_{un,}lock() because clflush is not guaranteed
-                * to be ordered by any other instruction.
+                * mtx_{un,}lock() because clflush on non-Intel CPUs
+                * and clflushopt are not guaranteed to be ordered by
+                * any other instruction.
                 */
-               mfence();
-               for (; sva < eva; sva += cpu_clflush_line_size)
-                       clflush(sva);
-               mfence();
+               if (useclflushopt || cpu_vendor_id != CPU_VENDOR_INTEL)
+                       mfence();
+               for (; sva < eva; sva += cpu_clflush_line_size) {
+                       if (useclflushopt)
+                               clflushopt(sva);
+                       else
+                               clflush(sva);
+               }
+               if (useclflushopt || cpu_vendor_id != CPU_VENDOR_INTEL)
+                       mfence();
                *sysmaps->CMAP2 = 0;
                sched_unpin();
                mtx_unlock(&sysmaps->lock);