]> xenbits.xensource.com Git - people/andrewcoop/xen.git/commitdiff
x86/bitops: Account for POPCNT errata on earlier Intel CPUs
authorAndrew Cooper <andrew.cooper3@citrix.com>
Tue, 25 Mar 2025 18:02:03 +0000 (18:02 +0000)
committerAndrew Cooper <andrew.cooper3@citrix.com>
Wed, 26 Mar 2025 11:54:59 +0000 (11:54 +0000)
Manually break the false dependency for the benefit of cases such as
bitmap_weight() which is a reasonable hotpath.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Jan Beulich <jbeulich@suse.com>
xen/arch/x86/include/asm/bitops.h

index bb9d7564602321e7933b0952ba4b8e8c4fbfe244..87eac7782f103bf9286b0dbd1aa239f86f17846b 100644 (file)
@@ -488,10 +488,16 @@ static always_inline unsigned int arch_hweightl(unsigned long x)
      *
      * This limits the POPCNT instruction to using the same ABI as a function
      * call (input in %rdi, output in %eax) but that's fine.
+     *
+     * On Intel CPUs prior to Cannon Lake, the POPCNT instruction has a false
+     * input dependency on it's destination register (errata HSD146, SKL029
+     * amongst others), impacting loops such as bitmap_weight().  Insert an
+     * XOR to manually break the dependency.
      */
     alternative_io("call arch_generic_hweightl",
+                   "xor %k[res], %k[res]\n\t"
                    "popcnt %[val], %q[res]", X86_FEATURE_POPCNT,
-                   ASM_OUTPUT2([res] "=a" (r) ASM_CALL_CONSTRAINT),
+                   ASM_OUTPUT2([res] "=&a" (r) ASM_CALL_CONSTRAINT),
                    [val] "D" (x));
 
     return r;