static always_inline unsigned int arch_ffs(unsigned int x)
{
- int r;
+ unsigned int r;
+
+ if ( __builtin_constant_p(x > 0) && x > 0 )
+ {
+ /*
+ * A common code pattern is:
+ *
+ * while ( bits )
+ * {
+ * bit = ffs(bits);
+ * ...
+ *
+ * and the optimiser really can work with the knowledge of x being
+ * non-zero without knowing it's exact value, in which case we don't
+ * need to compensate for BSF's corner cases. Otherwise...
+ */
+ asm ( "bsf %[val], %[res]"
+ : [res] "=r" (r)
+ : [val] "rm" (x) );
+ }
+ else
+ {
+ /*
+ * ... the AMD manual states that BSF won't modify the destination
+ * register if x=0. The Intel manual states that the result is
+ * undefined, but the architects have said that the register is
+ * written back with it's old value (zero extended as normal).
+ */
+ asm ( "bsf %[val], %[res]"
+ : [res] "=r" (r)
+ : [val] "rm" (x), "[res]" (-1) );
+ }
- asm ( "bsf %1,%0\n\t"
- "jnz 1f\n\t"
- "mov $-1,%0\n"
- "1:" : "=r" (r) : "rm" (x));
return r + 1;
}
#define arch_ffs arch_ffs