INSN_FP(cmp, 0f, c2),
INSN(comisd, 66, 0f, 2f, el, q, el),
INSN(comiss, , 0f, 2f, el, d, el),
+ INSN(cvtpd2ps, 66, 0f, 5a, vl, q, vl),
+ INSN(cvtph2ps, 66, 0f38, 13, vl_2, d_nb, vl),
+ INSN(cvtps2pd, , 0f, 5a, vl_2, d, vl),
+ INSN(cvtps2ph, 66, 0f3a, 1d, vl_2, d_nb, vl),
+ INSN(cvtsd2ss, f2, 0f, 5a, el, q, el),
+ INSN(cvtss2sd, f3, 0f, 5a, el, d, el),
INSN_FP(div, 0f, 5e),
INSN(fmadd132, 66, 0f38, 98, vl, sd, vl),
INSN(fmadd132, 66, 0f38, 99, el, sd, el),
else
printf("skipped\n");
+ printf("%-40s", "Testing vcvtph2ps 32(%ecx),%zmm7{%k4}...");
+ if ( stack_exec && cpu_has_avx512f )
+ {
+ decl_insn(evex_vcvtph2ps);
+ decl_insn(evex_vcvtps2ph);
+
+ asm volatile ( "vpternlogd $0x81, %%zmm7, %%zmm7, %%zmm7\n\t"
+ "kmovw %1,%%k4\n"
+ put_insn(evex_vcvtph2ps, "vcvtph2ps 32(%0), %%zmm7%{%%k4%}")
+ :: "c" (NULL), "r" (0x3333) );
+
+ set_insn(evex_vcvtph2ps);
+ memset(res, 0xff, 128);
+ res[8] = 0x40003c00; /* (1.0, 2.0) */
+ res[10] = 0x44004200; /* (3.0, 4.0) */
+ res[12] = 0x3400b800; /* (-.5, .25) */
+ res[14] = 0xbc000000; /* (0.0, -1.) */
+ regs.ecx = (unsigned long)res;
+ rc = x86_emulate(&ctxt, &emulops);
+ asm volatile ( "vmovups %%zmm7, %0" : "=m" (res[16]) );
+ if ( rc != X86EMUL_OKAY || !check_eip(evex_vcvtph2ps) )
+ goto fail;
+ printf("okay\n");
+
+ printf("%-40s", "Testing vcvtps2ph $0,%zmm3,64(%edx){%k4}...");
+ asm volatile ( "vmovups %0, %%zmm3\n"
+ put_insn(evex_vcvtps2ph, "vcvtps2ph $0, %%zmm3, 128(%1)%{%%k4%}")
+ :: "m" (res[16]), "d" (NULL) );
+
+ set_insn(evex_vcvtps2ph);
+ regs.edx = (unsigned long)res;
+ memset(res + 32, 0xcc, 32);
+ rc = x86_emulate(&ctxt, &emulops);
+ if ( rc != X86EMUL_OKAY || !check_eip(evex_vcvtps2ph) )
+ goto fail;
+ res[15] = res[13] = res[11] = res[9] = 0xcccccccc;
+ if ( memcmp(res + 8, res + 32, 32) )
+ goto fail;
+ printf("okay\n");
+ }
+ else
+ printf("skipped\n");
+
#undef decl_insn
#undef put_insn
#undef set_insn
[0x52 ... 0x53] = { DstImplicit|SrcMem|ModRM|TwoOp, simd_single_fp },
[0x54 ... 0x57] = { DstImplicit|SrcMem|ModRM, simd_packed_fp, d8s_vl },
[0x58 ... 0x59] = { DstImplicit|SrcMem|ModRM, simd_any_fp, d8s_vl },
- [0x5a ... 0x5b] = { DstImplicit|SrcMem|ModRM|Mov, simd_other },
+ [0x5a] = { DstImplicit|SrcMem|ModRM|Mov, simd_any_fp, d8s_vl },
+ [0x5b] = { DstImplicit|SrcMem|ModRM|Mov, simd_other },
[0x5c ... 0x5f] = { DstImplicit|SrcMem|ModRM, simd_any_fp, d8s_vl },
[0x60 ... 0x62] = { DstImplicit|SrcMem|ModRM, simd_other, d8s_vl },
[0x63 ... 0x67] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
[0x0c ... 0x0d] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
[0x0e ... 0x0f] = { .simd_size = simd_packed_fp },
[0x10 ... 0x12] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
- [0x13] = { .simd_size = simd_other, .two_op = 1 },
+ [0x13] = { .simd_size = simd_other, .two_op = 1, .d8s = d8s_vl_by_2 },
[0x14 ... 0x16] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
[0x17] = { .simd_size = simd_packed_int, .two_op = 1 },
[0x18] = { .simd_size = simd_scalar_opc, .two_op = 1, .d8s = 2 },
[0x19] = { .simd_size = simd_128, .to_mem = 1, .two_op = 1, .d8s = 4 },
[0x1a] = { .simd_size = simd_256, .d8s = d8s_vl_by_2 },
[0x1b] = { .simd_size = simd_256, .to_mem = 1, .two_op = 1, .d8s = d8s_vl_by_2 },
- [0x1d] = { .simd_size = simd_other, .to_mem = 1, .two_op = 1 },
+ [0x1d] = { .simd_size = simd_other, .to_mem = 1, .two_op = 1, .d8s = d8s_vl_by_2 },
[0x1e ... 0x1f] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
[0x20] = { .simd_size = simd_none, .d8s = 0 },
[0x21] = { .simd_size = simd_other, .d8s = 2 },
modrm_mod = 3;
break;
+ case 0x5a: /* vcvtps2pd needs special casing */
+ if ( disp8scale && !evex.pfx && !evex.brs )
+ --disp8scale;
+ break;
+
case 0x7e: /* vmovq xmm/m64,xmm needs special casing */
if ( disp8scale == 2 && evex.pfx == vex_f3 )
disp8scale = 3;
CASE_SIMD_ALL_FP(_EVEX, 0x0f, 0x5d): /* vmin{p,s}{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
CASE_SIMD_ALL_FP(_EVEX, 0x0f, 0x5e): /* vdiv{p,s}{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
CASE_SIMD_ALL_FP(_EVEX, 0x0f, 0x5f): /* vmax{p,s}{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ avx512f_all_fp:
generate_exception_if((evex.w != (evex.pfx & VEX_PREFIX_DOUBLE_MASK) ||
(ea.type != OP_REG && evex.brs &&
(evex.pfx & VEX_PREFIX_SCALAR_MASK))),
goto simd_zmm;
CASE_SIMD_ALL_FP(, 0x0f, 0x5a): /* cvt{p,s}{s,d}2{p,s}{s,d} xmm/mem,xmm */
- CASE_SIMD_ALL_FP(_VEX, 0x0f, 0x5a): /* vcvtp{s,d}2p{s,d} xmm/mem,xmm */
+ CASE_SIMD_ALL_FP(_VEX, 0x0f, 0x5a): /* vcvtp{s,d}2p{s,d} {x,y}mm/mem,{x,y}mm */
/* vcvts{s,d}2s{s,d} xmm/mem,xmm,xmm */
op_bytes = 4 << (((vex.pfx & VEX_PREFIX_SCALAR_MASK) ? 0 : 1 + vex.l) +
!!(vex.pfx & VEX_PREFIX_DOUBLE_MASK));
goto simd_0f_sse2;
goto simd_0f_avx;
+ CASE_SIMD_ALL_FP(_EVEX, 0x0f, 0x5a): /* vcvtp{s,d}2p{s,d} [xyz]mm/mem,[xyz]mm{k} */
+ /* vcvts{s,d}2s{s,d} xmm/mem,xmm,xmm{k} */
+ op_bytes = 4 << (((evex.pfx & VEX_PREFIX_SCALAR_MASK) ? 0 : 1 + evex.lr) +
+ evex.w);
+ goto avx512f_all_fp;
+
CASE_SIMD_PACKED_FP(, 0x0f, 0x5b): /* cvt{ps,dq}2{dq,ps} xmm/mem,xmm */
CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x5b): /* vcvt{ps,dq}2{dq,ps} {x,y}mm/mem,{x,y}mm */
case X86EMUL_OPC_F3(0x0f, 0x5b): /* cvttps2dq xmm/mem,xmm */
op_bytes = 8 << vex.l;
goto simd_0f_ymm;
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0x13): /* vcvtph2ps {x,y}mm/mem,[xyz]mm{k} */
+ generate_exception_if(evex.w || (ea.type != OP_REG && evex.brs), EXC_UD);
+ host_and_vcpu_must_have(avx512f);
+ if ( !evex.brs )
+ avx512_vlen_check(false);
+ op_bytes = 8 << evex.lr;
+ elem_bytes = 2;
+ goto simd_zmm;
+
case X86EMUL_OPC_VEX_66(0x0f38, 0x16): /* vpermps ymm/m256,ymm,ymm */
case X86EMUL_OPC_VEX_66(0x0f38, 0x36): /* vpermd ymm/m256,ymm,ymm */
generate_exception_if(!vex.l || vex.w, EXC_UD);
goto avx512f_imm8_no_sae;
case X86EMUL_OPC_VEX_66(0x0f3a, 0x1d): /* vcvtps2ph $imm8,{x,y}mm,xmm/mem */
+ case X86EMUL_OPC_EVEX_66(0x0f3a, 0x1d): /* vcvtps2ph $imm8,[xyz]mm,{x,y}mm/mem{k} */
{
uint32_t mxcsr;
- generate_exception_if(vex.w || vex.reg != 0xf, EXC_UD);
- host_and_vcpu_must_have(f16c);
fail_if(!ops->write);
+ if ( evex_encoded() )
+ {
+ generate_exception_if((evex.w || evex.reg != 0xf || !evex.RX ||
+ (ea.type != OP_REG && (evex.z || evex.brs))),
+ EXC_UD);
+ host_and_vcpu_must_have(avx512f);
+ avx512_vlen_check(false);
+ opc = init_evex(stub);
+ }
+ else
+ {
+ generate_exception_if(vex.w || vex.reg != 0xf, EXC_UD);
+ host_and_vcpu_must_have(f16c);
+ opc = init_prefixes(stub);
+ }
+
+ op_bytes = 8 << evex.lr;
- opc = init_prefixes(stub);
opc[0] = b;
opc[1] = modrm;
if ( ea.type == OP_MEM )
{
/* Convert memory operand to (%rAX). */
vex.b = 1;
+ evex.b = 1;
opc[1] &= 0x38;
}
opc[2] = imm1;
- insn_bytes = PFX_BYTES + 3;
+ if ( evex_encoded() )
+ {
+ unsigned int full = 0;
+
+ insn_bytes = EVEX_PFX_BYTES + 3;
+ copy_EVEX(opc, evex);
+
+ if ( ea.type == OP_MEM && evex.opmsk )
+ {
+ full = 0xffff >> (16 - op_bytes / 2);
+ op_mask &= full;
+ if ( !op_mask )
+ goto complete_insn;
+
+ first_byte = __builtin_ctz(op_mask);
+ op_mask >>= first_byte;
+ full >>= first_byte;
+ first_byte <<= 1;
+ op_bytes = (32 - __builtin_clz(op_mask)) << 1;
+
+ /*
+ * We may need to read (parts of) the memory operand for the
+ * purpose of merging in order to avoid splitting the write
+ * below into multiple ones.
+ */
+ if ( op_mask != full &&
+ (rc = ops->read(ea.mem.seg,
+ truncate_ea(ea.mem.off + first_byte),
+ (void *)mmvalp + first_byte, op_bytes,
+ ctxt)) != X86EMUL_OKAY )
+ goto done;
+ }
+ }
+ else
+ {
+ insn_bytes = PFX_BYTES + 3;
+ copy_VEX(opc, vex);
+ }
opc[3] = 0xc3;
- copy_VEX(opc, vex);
/* Latch MXCSR - we may need to restore it below. */
invoke_stub("stmxcsr %[mxcsr]", "",
"=m" (*mmvalp), [mxcsr] "=m" (mxcsr) : "a" (mmvalp));
if ( ea.type == OP_MEM )
{
- rc = ops->write(ea.mem.seg, ea.mem.off, mmvalp, 8 << vex.l, ctxt);
+ rc = ops->write(ea.mem.seg, truncate_ea(ea.mem.off + first_byte),
+ (void *)mmvalp + first_byte, op_bytes, ctxt);
if ( rc != X86EMUL_OKAY )
{
asm volatile ( "ldmxcsr %0" :: "m" (mxcsr) );