tools/security/xensec_tool
tools/tests/x86_emulator/*.bin
tools/tests/x86_emulator/*.tmp
+tools/tests/x86_emulator/3dnow*.[ch]
tools/tests/x86_emulator/asm
tools/tests/x86_emulator/avx*.[ch]
tools/tests/x86_emulator/blowfish.h
run: $(TARGET)
./$(TARGET)
-SIMD := sse sse2 sse4 avx avx2 xop
+SIMD := 3dnow sse sse2 sse4 avx avx2 xop
FMA := fma4 fma
SG := avx2-sg
TESTCASES := blowfish $(SIMD) $(FMA) $(SG)
blowfish-cflags := ""
blowfish-cflags-x86_32 := "-mno-accumulate-outgoing-args -Dstatic="
+3dnow-vecs := 8
+3dnow-ints :=
+3dnow-flts := 4
sse-vecs := 16
sse-ints :=
sse-flts := 4
xop-flts := $(avx-flts)
# For AVX and later, have the compiler avoid XMM0 to widen coverage of
-# the VEX.vvvv checks in the emulator.
-non-sse = $(if $(filter sse%,$(1)),,-ffixed-xmm0)
+# the VEX.vvvv checks in the emulator. For 3DNow!, however, force SSE
+# use for floating point operations, to avoid mixing MMX and FPU register
+# uses. Also enable 3DNow! extensions, but note that we can't use 3dnowa
+# as the test flavor right away since -m3dnowa is being understood only
+# by gcc 7.x and newer (older ones want a specific machine model instead).
+3dnowa := $(call cc-option,$(CC),-m3dnowa,-march=k8)
+non-sse = $(if $(filter sse%,$(1)),,$(if $(filter 3dnow%,$(1)),-msse -mfpmath=sse $(3dnowa),-ffixed-xmm0))
define simd-defs
$(1)-cflags := \
$(foreach arch,$(filter-out $(XEN_COMPILE_ARCH),x86_32) $(XEN_COMPILE_ARCH), \
for cflags in $($*-cflags) $($*-cflags-$(arch)); do \
$(MAKE) -f testcase.mk TESTCASE=$* XEN_TARGET_ARCH=$(arch) $*-cflags="$$cflags" all; \
+ prefix=$(shell echo $(subst -,_,$*) | sed -e 's,^\([0-9]\),_\1,'); \
flavor=$$(echo $${cflags} | sed -e 's, .*,,' -e 'y,-=,__,') ; \
- (echo "static const unsigned int $(subst -,_,$*)_$(arch)$${flavor}[] = {"; \
+ (echo "static const unsigned int $${prefix}_$(arch)$${flavor}[] = {"; \
od -v -t x $*.bin | sed -e 's/^[0-9]* /0x/' -e 's/ /, 0x/g' -e 's/$$/,/'; \
echo "};") >>$@.new; \
rm -f $*.bin; \
#if VEC_SIZE == FLOAT_SIZE
# define to_int(x) ((vec_t){ (int)(x)[0] })
+#elif VEC_SIZE == 8 && FLOAT_SIZE == 4 && defined(__3dNOW__)
+# define to_int(x) __builtin_ia32_pi2fd(__builtin_ia32_pf2id(x))
#elif VEC_SIZE == 16 && defined(__SSE2__)
# if FLOAT_SIZE == 4
# define to_int(x) __builtin_ia32_cvtdq2ps(__builtin_ia32_cvtps2dq(x))
})
#endif
-#if FLOAT_SIZE == 4 && defined(__SSE__)
+#if VEC_SIZE == 8 && FLOAT_SIZE == 4 && defined(__3dNOW_A__)
+# define max __builtin_ia32_pfmax
+# define min __builtin_ia32_pfmin
+# define recip(x) ({ \
+ vec_t t_ = __builtin_ia32_pfrcp(x); \
+ touch(x); \
+ t_[1] = __builtin_ia32_pfrcp(__builtin_ia32_pswapdsf(x))[0]; \
+ touch(x); \
+ __builtin_ia32_pfrcpit2(__builtin_ia32_pfrcpit1(t_, x), t_); \
+})
+# define rsqrt(x) ({ \
+ vec_t t_ = __builtin_ia32_pfrsqrt(x); \
+ touch(x); \
+ t_[1] = __builtin_ia32_pfrsqrt(__builtin_ia32_pswapdsf(x))[0]; \
+ touch(x); \
+ __builtin_ia32_pfrcpit2(__builtin_ia32_pfrsqit1(__builtin_ia32_pfmul(t_, t_), x), t_); \
+})
+#elif FLOAT_SIZE == 4 && defined(__SSE__)
# if VEC_SIZE == 32 && defined(__AVX__)
# if defined(__AVX2__)
# define broadcast(x) \
#include "x86-emulate.h"
#include "blowfish.h"
+#include "3dnow.h"
#include "sse.h"
#include "sse2.h"
#include "sse4.h"
return regs->eax == 2 && regs->edx == 1;
}
+static bool simd_check__3dnow(void)
+{
+ return cpu_has_3dnow_ext && cpu_has_sse;
+}
+
static bool simd_check_sse(void)
{
return cpu_has_sse;
#else
# define SIMD(desc, feat, form) SIMD_(32, desc, feat, form)
#endif
+ SIMD(3DNow! single, _3dnow, 8f4),
SIMD(SSE scalar single, sse, f4),
SIMD(SSE packed single, sse, 16f4),
SIMD(SSE2 scalar single, sse2, f4),
(res.b & (1U << 8)) != 0; \
})
+#define cpu_has_3dnow_ext ({ \
+ struct cpuid_leaf res; \
+ emul_test_cpuid(0x80000001, 0, &res, NULL); \
+ (res.d & (1U << 30)) != 0; \
+})
+
#define cpu_has_sse4a ({ \
struct cpuid_leaf res; \
emul_test_cpuid(0x80000001, 0, &res, NULL); \
[0xff] = { ModRM }
};
+/*
+ * The next two tables are indexed by high opcode extension byte (the one
+ * that's encoded like an immediate) nibble, with each table element then
+ * bit-indexed by low opcode extension byte nibble.
+ */
+static const uint16_t _3dnow_table[16] = {
+ [0x0] = (1 << 0xd) /* pi2fd */,
+ [0x1] = (1 << 0xd) /* pf2id */,
+ [0x9] = (1 << 0x0) /* pfcmpge */ |
+ (1 << 0x4) /* pfmin */ |
+ (1 << 0x6) /* pfrcp */ |
+ (1 << 0x7) /* pfrsqrt */ |
+ (1 << 0xa) /* pfsub */ |
+ (1 << 0xe) /* pfadd */,
+ [0xa] = (1 << 0x0) /* pfcmpgt */ |
+ (1 << 0x4) /* pfmax */ |
+ (1 << 0x6) /* pfrcpit1 */ |
+ (1 << 0x7) /* pfrsqit1 */ |
+ (1 << 0xa) /* pfsubr */ |
+ (1 << 0xe) /* pfacc */,
+ [0xb] = (1 << 0x0) /* pfcmpeq */ |
+ (1 << 0x4) /* pfmul */ |
+ (1 << 0x6) /* pfrcpit2 */ |
+ (1 << 0x7) /* pmulhrw */ |
+ (1 << 0xf) /* pavgusb */,
+};
+
+static const uint16_t _3dnow_ext_table[16] = {
+ [0x0] = (1 << 0xc) /* pi2fw */,
+ [0x1] = (1 << 0xc) /* pf2iw */,
+ [0x8] = (1 << 0xa) /* pfnacc */ |
+ (1 << 0xe) /* pfpnacc */,
+ [0xb] = (1 << 0xb) /* pswapd */,
+};
+
/*
* "two_op" and "four_op" below refer to the number of register operands
* (one of which possibly also allowing to be a memory one). The named
#define vcpu_has_rdrand() vcpu_has( 1, ECX, 30, ctxt, ops)
#define vcpu_has_mmxext() (vcpu_has(0x80000001, EDX, 22, ctxt, ops) || \
vcpu_has_sse())
+#define vcpu_has_3dnow_ext() vcpu_has(0x80000001, EDX, 30, ctxt, ops)
+#define vcpu_has_3dnow() vcpu_has(0x80000001, EDX, 31, ctxt, ops)
#define vcpu_has_lahf_lm() vcpu_has(0x80000001, ECX, 0, ctxt, ops)
#define vcpu_has_cr8_legacy() vcpu_has(0x80000001, ECX, 4, ctxt, ops)
#define vcpu_has_lzcnt() vcpu_has(0x80000001, ECX, 5, ctxt, ops)
case X86EMUL_OPC(0x0f, 0x19) ... X86EMUL_OPC(0x0f, 0x1f): /* nop */
break;
+ case X86EMUL_OPC(0x0f, 0x0e): /* femms */
+ host_and_vcpu_must_have(3dnow);
+ asm volatile ( "femms" );
+ break;
+
+ case X86EMUL_OPC(0x0f, 0x0f): /* 3DNow! */
+ if ( _3dnow_table[(imm1 >> 4) & 0xf] & (1 << (imm1 & 0xf)) )
+ host_and_vcpu_must_have(3dnow);
+ else if ( _3dnow_ext_table[(imm1 >> 4) & 0xf] & (1 << (imm1 & 0xf)) )
+ host_and_vcpu_must_have(3dnow_ext);
+ else
+ generate_exception(EXC_UD);
+
+ get_fpu(X86EMUL_FPU_mmx, &fic);
+
+ d = DstReg | SrcMem;
+ op_bytes = 8;
+ state->simd_size = simd_other;
+ goto simd_0f_imm8;
+
#define CASE_SIMD_PACKED_INT(pfx, opc) \
case X86EMUL_OPC(pfx, opc): \
case X86EMUL_OPC_66(pfx, opc)
&& boot_cpu_has(X86_FEATURE_FFXSR))
#define cpu_has_page1gb boot_cpu_has(X86_FEATURE_PAGE1GB)
#define cpu_has_rdtscp boot_cpu_has(X86_FEATURE_RDTSCP)
+#define cpu_has_3dnow_ext boot_cpu_has(X86_FEATURE_3DNOWEXT)
+#define cpu_has_3dnow boot_cpu_has(X86_FEATURE_3DNOW)
/* CPUID level 0x80000001.ecx */
#define cpu_has_cmp_legacy boot_cpu_has(X86_FEATURE_CMP_LEGACY)