--- /dev/null
+#include "simd.h"
+
+ENTRY(fma_test);
+
+#if VEC_SIZE < 16
+# define to_bool(cmp) (!~(cmp)[0])
+#elif VEC_SIZE == 16
+# if FLOAT_SIZE == 4
+# define to_bool(cmp) __builtin_ia32_vtestcps(cmp, (vec_t){} == 0)
+# elif FLOAT_SIZE == 8
+# define to_bool(cmp) __builtin_ia32_vtestcpd(cmp, (vec_t){} == 0)
+# endif
+#elif VEC_SIZE == 32
+# if FLOAT_SIZE == 4
+# define to_bool(cmp) __builtin_ia32_vtestcps256(cmp, (vec_t){} == 0)
+# elif FLOAT_SIZE == 8
+# define to_bool(cmp) __builtin_ia32_vtestcpd256(cmp, (vec_t){} == 0)
+# endif
+#endif
+
+#if VEC_SIZE == 16
+# if FLOAT_SIZE == 4
+# define addsub(x, y) __builtin_ia32_addsubps(x, y)
+# if defined(__FMA4__)
+# define fmaddsub(x, y, z) __builtin_ia32_vfmaddsubps(x, y, z)
+# endif
+# elif FLOAT_SIZE == 8
+# define addsub(x, y) __builtin_ia32_addsubpd(x, y)
+# if defined(__FMA4__)
+# define fmaddsub(x, y, z) __builtin_ia32_vfmaddsubpd(x, y, z)
+# endif
+# endif
+#elif VEC_SIZE == 32
+# if FLOAT_SIZE == 4
+# define addsub(x, y) __builtin_ia32_addsubps256(x, y)
+# if defined(__FMA4__)
+# define fmaddsub(x, y, z) __builtin_ia32_vfmaddsubps256(x, y, z)
+# endif
+# elif FLOAT_SIZE == 8
+# define addsub(x, y) __builtin_ia32_addsubpd256(x, y)
+# if defined(__FMA4__)
+# define fmaddsub(x, y, z) __builtin_ia32_vfmaddsubpd256(x, y, z)
+# endif
+# endif
+#endif
+
+int fma_test(void)
+{
+ unsigned int i;
+ vec_t x, y, z, src, inv, one;
+
+ for ( i = 0; i < ELEM_COUNT; ++i )
+ {
+ src[i] = i + 1;
+ inv[i] = ELEM_COUNT - i;
+ one[i] = 1;
+ }
+
+ x = (src + one) * inv;
+ y = (src - one) * inv;
+ touch(src);
+ z = inv * src + inv;
+ if ( !to_bool(x == z) ) return __LINE__;
+
+ touch(src);
+ z = -inv * src - inv;
+ if ( !to_bool(-x == z) ) return __LINE__;
+
+ touch(src);
+ z = inv * src - inv;
+ if ( !to_bool(y == z) ) return __LINE__;
+
+ touch(src);
+ z = -inv * src + inv;
+ if ( !to_bool(-y == z) ) return __LINE__;
+ touch(src);
+
+ x = src + inv;
+ y = src - inv;
+ touch(inv);
+ z = src * one + inv;
+ if ( !to_bool(x == z) ) return __LINE__;
+
+ touch(inv);
+ z = -src * one - inv;
+ if ( !to_bool(-x == z) ) return __LINE__;
+
+ touch(inv);
+ z = src * one - inv;
+ if ( !to_bool(y == z) ) return __LINE__;
+
+ touch(inv);
+ z = -src * one + inv;
+ if ( !to_bool(-y == z) ) return __LINE__;
+ touch(inv);
+
+#if defined(addsub) && defined(fmaddsub)
+ x = addsub(src * inv, one);
+ y = addsub(src * inv, -one);
+ touch(one);
+ z = fmaddsub(src, inv, one);
+ if ( !to_bool(x == z) ) return __LINE__;
+
+ touch(one);
+ z = fmaddsub(src, inv, -one);
+ if ( !to_bool(y == z) ) return __LINE__;
+ touch(one);
+
+ x = addsub(src * inv, one);
+ touch(inv);
+ z = fmaddsub(src, inv, one);
+ if ( !to_bool(x == z) ) return __LINE__;
+
+ touch(inv);
+ z = fmaddsub(src, inv, -one);
+ if ( !to_bool(y == z) ) return __LINE__;
+ touch(inv);
+#endif
+
+ return 0;
+}
[0x44] = { .simd_size = simd_packed_int },
[0x4a ... 0x4b] = { .simd_size = simd_packed_fp, .four_op = 1 },
[0x4c] = { .simd_size = simd_packed_int, .four_op = 1 },
+ [0x5c ... 0x5f] = { .simd_size = simd_packed_fp, .four_op = 1 },
[0x60 ... 0x63] = { .simd_size = simd_packed_int, .two_op = 1 },
+ [0x68 ... 0x69] = { .simd_size = simd_packed_fp, .four_op = 1 },
+ [0x6a ... 0x6b] = { .simd_size = simd_scalar_fp, .four_op = 1 },
+ [0x6c ... 0x6d] = { .simd_size = simd_packed_fp, .four_op = 1 },
+ [0x6e ... 0x6f] = { .simd_size = simd_scalar_fp, .four_op = 1 },
+ [0x78 ... 0x79] = { .simd_size = simd_packed_fp, .four_op = 1 },
+ [0x7a ... 0x7b] = { .simd_size = simd_scalar_fp, .four_op = 1 },
+ [0x7c ... 0x7d] = { .simd_size = simd_packed_fp, .four_op = 1 },
+ [0x7e ... 0x7f] = { .simd_size = simd_scalar_fp, .four_op = 1 },
[0xcc] = { .simd_size = simd_other },
[0xdf] = { .simd_size = simd_packed_int, .two_op = 1 },
[0xf0] = {},
#define vcpu_has_lzcnt() vcpu_has(0x80000001, ECX, 5, ctxt, ops)
#define vcpu_has_sse4a() vcpu_has(0x80000001, ECX, 6, ctxt, ops)
#define vcpu_has_misalignsse() vcpu_has(0x80000001, ECX, 7, ctxt, ops)
+#define vcpu_has_fma4() vcpu_has(0x80000001, ECX, 16, ctxt, ops)
#define vcpu_has_tbm() vcpu_has(0x80000001, ECX, 21, ctxt, ops)
#define vcpu_has_bmi1() vcpu_has( 7, EBX, 3, ctxt, ops)
#define vcpu_has_hle() vcpu_has( 7, EBX, 4, ctxt, ops)
simd_0f_imm8_avx:
host_and_vcpu_must_have(avx);
}
+ simd_0f_imm8_ymm:
get_fpu(X86EMUL_FPU_ymm, &fic);
}
else if ( vex.pfx )
generate_exception_if(vex.w, EXC_UD);
goto simd_0f_int_imm8;
+ case X86EMUL_OPC_VEX_66(0x0f3a, 0x5c): /* vfmaddsubps {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+ /* vfmaddsubps {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
+ case X86EMUL_OPC_VEX_66(0x0f3a, 0x5d): /* vfmaddsubpd {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+ /* vfmaddsubpd {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
+ case X86EMUL_OPC_VEX_66(0x0f3a, 0x5e): /* vfmsubaddps {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+ /* vfmsubaddps {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
+ case X86EMUL_OPC_VEX_66(0x0f3a, 0x5f): /* vfmsubaddpd {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+ /* vfmsubaddpd {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
+ case X86EMUL_OPC_VEX_66(0x0f3a, 0x68): /* vfmaddps {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+ /* vfmaddps {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
+ case X86EMUL_OPC_VEX_66(0x0f3a, 0x69): /* vfmaddpd {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+ /* vfmaddpd {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
+ case X86EMUL_OPC_VEX_66(0x0f3a, 0x6a): /* vfmaddss xmm,xmm/m32,xmm,xmm */
+ /* vfmaddss xmm/m32,xmm,xmm,xmm */
+ case X86EMUL_OPC_VEX_66(0x0f3a, 0x6b): /* vfmaddsd xmm,xmm/m64,xmm,xmm */
+ /* vfmaddsd xmm/m64,xmm,xmm,xmm */
+ case X86EMUL_OPC_VEX_66(0x0f3a, 0x6c): /* vfmsubps {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+ /* vfmsubps {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
+ case X86EMUL_OPC_VEX_66(0x0f3a, 0x6d): /* vfmsubpd {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+ /* vfmsubpd {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
+ case X86EMUL_OPC_VEX_66(0x0f3a, 0x6e): /* vfmsubss xmm,xmm/m32,xmm,xmm */
+ /* vfmsubss xmm/m32,xmm,xmm,xmm */
+ case X86EMUL_OPC_VEX_66(0x0f3a, 0x6f): /* vfmsubsd xmm,xmm/m64,xmm,xmm */
+ /* vfmsubsd xmm/m64,xmm,xmm,xmm */
+ case X86EMUL_OPC_VEX_66(0x0f3a, 0x78): /* vfnmaddps {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+ /* vfnmaddps {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
+ case X86EMUL_OPC_VEX_66(0x0f3a, 0x79): /* vfnmaddpd {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+ /* vfnmaddpd {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
+ case X86EMUL_OPC_VEX_66(0x0f3a, 0x7a): /* vfnmaddss xmm,xmm/m32,xmm,xmm */
+ /* vfnmaddss xmm/m32,xmm,xmm,xmm */
+ case X86EMUL_OPC_VEX_66(0x0f3a, 0x7b): /* vfnmaddsd xmm,xmm/m64,xmm,xmm */
+ /* vfnmaddsd xmm/m64,xmm,xmm,xmm */
+ case X86EMUL_OPC_VEX_66(0x0f3a, 0x7c): /* vfnmsubps {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+ /* vfnmsubps {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
+ case X86EMUL_OPC_VEX_66(0x0f3a, 0x7d): /* vfnmsubpd {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+ /* vfnmsubpd {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
+ case X86EMUL_OPC_VEX_66(0x0f3a, 0x7e): /* vfnmsubss xmm,xmm/m32,xmm,xmm */
+ /* vfnmsubss xmm/m32,xmm,xmm,xmm */
+ case X86EMUL_OPC_VEX_66(0x0f3a, 0x7f): /* vfnmsubsd xmm,xmm/m64,xmm,xmm */
+ /* vfnmsubsd xmm/m64,xmm,xmm,xmm */
+ host_and_vcpu_must_have(fma4);
+ goto simd_0f_imm8_ymm;
+
case X86EMUL_OPC_66(0x0f3a, 0x60): /* pcmpestrm $imm8,xmm/m128,xmm */
case X86EMUL_OPC_VEX_66(0x0f3a, 0x60): /* vpcmpestrm $imm8,xmm/m128,xmm */
case X86EMUL_OPC_66(0x0f3a, 0x61): /* pcmpestri $imm8,xmm/m128,xmm */