else
printf("skipped\n");
+ printf("%-40s", "Testing pcmpestri $0x1a,(%ecx),%xmm2...");
+ if ( stack_exec && cpu_has_sse4_2 )
+ {
+ decl_insn(pcmpestri);
+
+ memcpy(res, "abcdefgh\0\1\2\3\4\5\6\7", 16);
+ asm volatile ( "movq %0, %%xmm2\n"
+ put_insn(pcmpestri, "pcmpestri $0b00011010, (%1), %%xmm2")
+ :: "m" (res[0]), "c" (NULL) );
+
+ set_insn(pcmpestri);
+ regs.eax = regs.edx = 12;
+ regs.ecx = (unsigned long)res;
+ regs.eflags = X86_EFLAGS_PF | X86_EFLAGS_AF |
+ X86_EFLAGS_IF | X86_EFLAGS_OF;
+ rc = x86_emulate(&ctxt, &emulops);
+ if ( rc != X86EMUL_OKAY || !check_eip(pcmpestri) ||
+ regs.ecx != 9 ||
+ (regs.eflags & X86_EFLAGS_ARITH_MASK) !=
+ (X86_EFLAGS_CF | X86_EFLAGS_ZF | X86_EFLAGS_SF) )
+ goto fail;
+ printf("okay\n");
+ }
+ else
+ printf("skipped\n");
+
+ printf("%-40s", "Testing pcmpestrm $0x5a,(%ecx),%xmm2...");
+ if ( stack_exec && cpu_has_sse4_2 )
+ {
+ decl_insn(pcmpestrm);
+
+ asm volatile ( "movq %0, %%xmm2\n"
+ put_insn(pcmpestrm, "pcmpestrm $0b01011010, (%1), %%xmm2")
+ :: "m" (res[0]), "c" (NULL) );
+
+ set_insn(pcmpestrm);
+ regs.ecx = (unsigned long)res;
+ regs.eflags = X86_EFLAGS_PF | X86_EFLAGS_AF |
+ X86_EFLAGS_IF | X86_EFLAGS_OF;
+ rc = x86_emulate(&ctxt, &emulops);
+ if ( rc != X86EMUL_OKAY || !check_eip(pcmpestrm) )
+ goto fail;
+ asm ( "pmovmskb %%xmm0, %0" : "=r" (rc) );
+ if ( rc != 0x0e00 ||
+ (regs.eflags & X86_EFLAGS_ARITH_MASK) !=
+ (X86_EFLAGS_CF | X86_EFLAGS_ZF | X86_EFLAGS_SF) )
+ goto fail;
+ printf("okay\n");
+ }
+ else
+ printf("skipped\n");
+
+ printf("%-40s", "Testing pcmpistri $0x1a,(%ecx),%xmm2...");
+ if ( stack_exec && cpu_has_sse4_2 )
+ {
+ decl_insn(pcmpistri);
+
+ asm volatile ( "movq %0, %%xmm2\n"
+ put_insn(pcmpistri, "pcmpistri $0b00011010, (%1), %%xmm2")
+ :: "m" (res[0]), "c" (NULL) );
+
+ set_insn(pcmpistri);
+ regs.eflags = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
+ X86_EFLAGS_IF | X86_EFLAGS_OF;
+ rc = x86_emulate(&ctxt, &emulops);
+ if ( rc != X86EMUL_OKAY || !check_eip(pcmpistri) ||
+ regs.ecx != 16 ||
+ (regs.eflags & X86_EFLAGS_ARITH_MASK) !=
+ (X86_EFLAGS_ZF | X86_EFLAGS_SF) )
+ goto fail;
+ printf("okay\n");
+ }
+ else
+ printf("skipped\n");
+
+ printf("%-40s", "Testing pcmpistrm $0x4a,(%ecx),%xmm2...");
+ if ( stack_exec && cpu_has_sse4_2 )
+ {
+ decl_insn(pcmpistrm);
+
+ asm volatile ( "movq %0, %%xmm2\n"
+ put_insn(pcmpistrm, "pcmpistrm $0b01001010, (%1), %%xmm2")
+ :: "m" (res[0]), "c" (NULL) );
+
+ set_insn(pcmpistrm);
+ regs.ecx = (unsigned long)res;
+ regs.eflags = X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_IF;
+ rc = x86_emulate(&ctxt, &emulops);
+ if ( rc != X86EMUL_OKAY || !check_eip(pcmpistrm) )
+ goto fail;
+ asm ( "pmovmskb %%xmm0, %0" : "=r" (rc) );
+ if ( rc != 0xffff ||
+ (regs.eflags & X86_EFLAGS_ARITH_MASK) !=
+ (X86_EFLAGS_CF | X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF) )
+ goto fail;
+ printf("okay\n");
+ }
+ else
+ printf("skipped\n");
+
+ printf("%-40s", "Testing vpcmpestri $0x7a,(%esi),%xmm2...");
+ if ( stack_exec && cpu_has_avx )
+ {
+ decl_insn(vpcmpestri);
+
+#ifdef __x86_64__
+ /*
+ * gas up to at least 2.27 doesn't honor explict "rex.w" for
+ * VEX/EVEX encoded instructions, and also doesn't provide any
+ * other means to control VEX.W.
+ */
+ asm volatile ( "movq %0, %%xmm2\n"
+ put_insn(vpcmpestri,
+ ".byte 0xC4, 0xE3, 0xF9, 0x61, 0x16, 0x7A")
+ :: "m" (res[0]) );
+#else
+ asm volatile ( "movq %0, %%xmm2\n"
+ put_insn(vpcmpestri,
+ "vpcmpestri $0b01111010, (%1), %%xmm2")
+ :: "m" (res[0]), "S" (NULL) );
+#endif
+
+ set_insn(vpcmpestri);
+#ifdef __x86_64__
+ regs.rax = ~0U + 1UL;
+ regs.rcx = ~0UL;
+#else
+ regs.eax = 0x7fffffff;
+#endif
+ regs.esi = (unsigned long)res;
+ regs.eflags = X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_SF |
+ X86_EFLAGS_IF | X86_EFLAGS_OF;
+ rc = x86_emulate(&ctxt, &emulops);
+ if ( rc != X86EMUL_OKAY || !check_eip(vpcmpestri) ||
+ regs.ecx != 11 ||
+ (regs.eflags & X86_EFLAGS_ARITH_MASK) !=
+ (X86_EFLAGS_ZF | X86_EFLAGS_CF) )
+ goto fail;
+ printf("okay\n");
+ }
+ else
+ printf("skipped\n");
+
printf("%-40s", "Testing stmxcsr (%edx)...");
if ( cpu_has_sse )
{
[0x2a] = { .simd_size = simd_packed_int, .two_op = 1 },
[0x2b] = { .simd_size = simd_packed_int },
[0x30 ... 0x35] = { .simd_size = simd_other, .two_op = 1 },
- [0x38 ... 0x3f] = { .simd_size = simd_packed_int },
+ [0x37 ... 0x3f] = { .simd_size = simd_packed_int },
[0x40] = { .simd_size = simd_packed_int },
[0x41] = { .simd_size = simd_packed_int, .two_op = 1 },
[0xf0] = { .two_op = 1 },
[0x42] = { .simd_size = simd_packed_int },
[0x4a ... 0x4b] = { .simd_size = simd_packed_fp, .four_op = 1 },
[0x4c] = { .simd_size = simd_packed_int, .four_op = 1 },
+ [0x60 ... 0x63] = { .simd_size = simd_packed_int, .two_op = 1 },
[0xf0] = {},
};
case X86EMUL_OPC_VEX_66(0x0f38, 0x28): /* vpmuldq {x,y}mm/mem,{x,y}mm,{x,y}mm */
case X86EMUL_OPC_VEX_66(0x0f38, 0x29): /* vpcmpeqq {x,y}mm/mem,{x,y}mm,{x,y}mm */
case X86EMUL_OPC_VEX_66(0x0f38, 0x2b): /* vpackusdw {x,y}mm/mem,{x,y}mm,{x,y}mm */
+ case X86EMUL_OPC_VEX_66(0x0f38, 0x37): /* vpcmpgtq {x,y}mm/mem,{x,y}mm,{x,y}mm */
case X86EMUL_OPC_VEX_66(0x0f38, 0x38): /* vpminsb {x,y}mm/mem,{x,y}mm,{x,y}mm */
case X86EMUL_OPC_VEX_66(0x0f38, 0x39): /* vpminsd {x,y}mm/mem,{x,y}mm,{x,y}mm */
case X86EMUL_OPC_VEX_66(0x0f38, 0x3a): /* vpminub {x,y}mm/mem,{x,y}mm,{x,y}mm */
}
goto movdqa;
+ case X86EMUL_OPC_66(0x0f38, 0x37): /* pcmpgtq xmm/m128,xmm */
+ host_and_vcpu_must_have(sse4_2);
+ goto simd_0f38_common;
+
case X86EMUL_OPC(0x0f38, 0xf0): /* movbe m,r */
case X86EMUL_OPC(0x0f38, 0xf1): /* movbe r,m */
vcpu_must_have(movbe);
generate_exception_if(vex.w, EXC_UD);
goto simd_0f_int_imm8;
+ case X86EMUL_OPC_66(0x0f3a, 0x60): /* pcmpestrm $imm8,xmm/m128,xmm */
+ case X86EMUL_OPC_VEX_66(0x0f3a, 0x60): /* vpcmpestrm $imm8,xmm/m128,xmm */
+ case X86EMUL_OPC_66(0x0f3a, 0x61): /* pcmpestri $imm8,xmm/m128,xmm */
+ case X86EMUL_OPC_VEX_66(0x0f3a, 0x61): /* vpcmpestri $imm8,xmm/m128,xmm */
+ case X86EMUL_OPC_66(0x0f3a, 0x62): /* pcmpistrm $imm8,xmm/m128,xmm */
+ case X86EMUL_OPC_VEX_66(0x0f3a, 0x62): /* vpcmpistrm $imm8,xmm/m128,xmm */
+ case X86EMUL_OPC_66(0x0f3a, 0x63): /* pcmpistri $imm8,xmm/m128,xmm */
+ case X86EMUL_OPC_VEX_66(0x0f3a, 0x63): /* vpcmpistri $imm8,xmm/m128,xmm */
+ if ( vex.opcx == vex_none )
+ {
+ host_and_vcpu_must_have(sse4_2);
+ get_fpu(X86EMUL_FPU_xmm, &fic);
+ }
+ else
+ {
+ generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD);
+ host_and_vcpu_must_have(avx);
+ get_fpu(X86EMUL_FPU_ymm, &fic);
+ }
+
+ opc = init_prefixes(stub);
+ if ( vex.opcx == vex_none )
+ opc++[0] = 0x3a;
+ opc[0] = b;
+ opc[1] = modrm;
+ if ( ea.type == OP_MEM )
+ {
+ /* Convert memory operand to (%rDI). */
+ rex_prefix &= ~REX_B;
+ vex.b = 1;
+ opc[1] &= 0x3f;
+ opc[1] |= 0x07;
+
+ rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp, 16, ctxt);
+ if ( rc != X86EMUL_OKAY )
+ goto done;
+ }
+ opc[2] = imm1;
+ fic.insn_bytes = PFX_BYTES + 3;
+ opc[3] = 0xc3;
+ if ( vex.opcx == vex_none )
+ {
+ /* Cover for extra prefix byte. */
+ --opc;
+ ++fic.insn_bytes;
+ }
+
+ copy_REX_VEX(opc, rex_prefix, vex);
+#ifdef __x86_64__
+ if ( rex_prefix & REX_W )
+ emulate_stub("=c" (dst.val), "m" (*mmvalp), "D" (mmvalp),
+ "a" (_regs.rax), "d" (_regs.rdx));
+ else
+#endif
+ emulate_stub("=c" (dst.val), "m" (*mmvalp), "D" (mmvalp),
+ "a" (_regs.eax), "d" (_regs.edx));
+
+ state->simd_size = simd_none;
+ if ( b & 1 )
+ _regs.r(cx) = (uint32_t)dst.val;
+ dst.type = OP_NONE;
+ break;
+
case X86EMUL_OPC_VEX_F2(0x0f3a, 0xf0): /* rorx imm,r/m,r */
vcpu_must_have(bmi2);
generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD);