INSN(maxsh, f3, map5, 5f, el, fp16, el),
INSN(minph, , map5, 5d, vl, fp16, vl),
INSN(minsh, f3, map5, 5d, el, fp16, el),
+ INSN(movsh, f3, map5, 10, el, fp16, el),
+ INSN(movsh, f3, map5, 11, el, fp16, el),
INSN(mulph, , map5, 59, vl, fp16, vl),
INSN(mulsh, f3, map5, 59, el, fp16, el),
INSN(reduceph, , 0f3a, 56, vl, fp16, vl),
INSN(ucomish, , map5, 2e, el, fp16, el),
};
+static const struct test avx512_fp16_128[] = {
+ INSN(movw, 66, map5, 6e, el, fp16, el),
+ INSN(movw, 66, map5, 7e, el, fp16, el),
+};
+
static const struct test gfni_all[] = {
INSN(gf2p8affineinvqb, 66, 0f3a, cf, vl, q, vl),
INSN(gf2p8affineqb, 66, 0f3a, ce, vl, q, vl),
RUN(avx512_vp2intersect, all);
RUN(avx512_vpopcntdq, all);
RUN(avx512_fp16, all);
+ RUN(avx512_fp16, 128);
if ( cpu_has_avx512f )
{
{ { 0xce }, 3, T, R, pfx_66, W1, Ln }, /* vgf2p8affineqb */
{ { 0xcf }, 3, T, R, pfx_66, W1, Ln }, /* vgf2p8affineinvqb */
}, evex_map5[] = {
+ { { 0x10 }, 2, T, R, pfx_f3, W0, LIG }, /* vmovsh */
+ { { 0x11 }, 2, T, W, pfx_f3, W0, LIG }, /* vmovsh */
{ { 0x2e }, 2, T, R, pfx_no, W0, LIG }, /* vucomish */
{ { 0x2f }, 2, T, R, pfx_no, W0, LIG }, /* vcomish */
{ { 0x51 }, 2, T, R, pfx_no, W0, Ln }, /* vsqrtph */
{ { 0x5e }, 2, T, R, pfx_f3, W0, LIG }, /* vdivsh */
{ { 0x5f }, 2, T, R, pfx_no, W0, Ln }, /* vmaxph */
{ { 0x5f }, 2, T, R, pfx_f3, W0, LIG }, /* vmaxsh */
+ { { 0x6e }, 2, T, R, pfx_66, WIG, L0 }, /* vmovw */
+ { { 0x7e }, 2, T, W, pfx_66, WIG, L0 }, /* vmovw */
};
static const struct {
else
printf("skipped\n");
+ printf("%-40s", "Testing vmovsh 8(%ecx),%xmm5...");
+ if ( stack_exec && cpu_has_avx512_fp16 )
+ {
+ decl_insn(vmovsh_from_mem);
+ decl_insn(vmovw_to_gpr);
+
+ asm volatile ( "vpcmpeqw %%ymm5, %%ymm5, %%ymm5\n\t"
+ put_insn(vmovsh_from_mem,
+ /* vmovsh 8(%0), %%xmm5 */
+ ".byte 0x62, 0xf5, 0x7e, 0x08\n\t"
+ ".byte 0x10, 0x69, 0x04")
+ :: "c" (NULL) );
+
+ set_insn(vmovsh_from_mem);
+ res[2] = 0x3c00bc00;
+ regs.ecx = (unsigned long)res;
+ rc = x86_emulate(&ctxt, &emulops);
+ if ( (rc != X86EMUL_OKAY) || !check_eip(vmovsh_from_mem) )
+ goto fail;
+ asm volatile ( "kmovw %2, %%k1\n\t"
+ "vmovdqu16 %1, %%zmm4%{%%k1%}%{z%}\n\t"
+ "vpcmpeqw %%zmm4, %%zmm5, %%k0\n\t"
+ "kmovw %%k0, %0"
+ : "=g" (rc)
+ : "m" (res[2]), "r" (1) );
+ if ( rc != 0xffff )
+ goto fail;
+ printf("okay\n");
+
+ printf("%-40s", "Testing vmovsh %xmm4,2(%eax){%k3}...");
+ memset(res, ~0, 8);
+ res[2] = 0xbc00ffff;
+ memset(res + 3, ~0, 8);
+ regs.eax = (unsigned long)res;
+ regs.ecx = ~0;
+ for ( i = 0; i < 2; ++i )
+ {
+ decl_insn(vmovsh_to_mem);
+
+ asm volatile ( "kmovw %1, %%k3\n\t"
+ put_insn(vmovsh_to_mem,
+ /* vmovsh %%xmm4, 2(%0)%{%%k3%} */
+ ".byte 0x62, 0xf5, 0x7e, 0x0b\n\t"
+ ".byte 0x11, 0x60, 0x01")
+ :: "a" (NULL), "r" (i) );
+
+ set_insn(vmovsh_to_mem);
+ rc = x86_emulate(&ctxt, &emulops);
+ if ( (rc != X86EMUL_OKAY) || !check_eip(vmovsh_to_mem) ||
+ memcmp(res, res + 3 - i, 8) )
+ goto fail;
+ }
+ printf("okay\n");
+
+ printf("%-40s", "Testing vmovw %xmm5,%ecx...");
+ asm volatile ( put_insn(vmovw_to_gpr,
+ /* vmovw %%xmm5, %0 */
+ ".byte 0x62, 0xf5, 0x7d, 0x08\n\t"
+ ".byte 0x7e, 0xe9")
+ :: "c" (NULL) );
+ set_insn(vmovw_to_gpr);
+ rc = x86_emulate(&ctxt, &emulops);
+ if ( (rc != X86EMUL_OKAY) || !check_eip(vmovw_to_gpr) ||
+ regs.ecx != 0xbc00 )
+ goto fail;
+ printf("okay\n");
+ }
+ else
+ printf("skipped\n");
+
printf("%-40s", "Testing invpcid 16(%ecx),%%edx...");
if ( stack_exec )
{
break;
case d8s_dq64:
- return 2 + (s->op_bytes == 8);
+ return 1 + !s->fp16 + (s->op_bytes == 8);
}
switch ( s->simd_size )
s->fp16 = true;
s->simd_size = simd_none;
break;
+
+ case 0x6e: /* vmovw r/m16, xmm */
+ d = (d & ~SrcMask) | SrcMem16;
+ /* fall through */
+ case 0x7e: /* vmovw xmm, r/m16 */
+ if ( s->evex.pfx == vex_66 )
+ s->fp16 = true;
+ s->simd_size = simd_none;
+ break;
}
/* Like above re-use twobyte_table[] here. */
#ifndef X86EMUL_NO_SIMD
+ case X86EMUL_OPC_EVEX_66(5, 0x7e): /* vmovw xmm,r/m16 */
+ ASSERT(dst.bytes >= 4);
+ if ( dst.type == OP_MEM )
+ dst.bytes = 2;
+ /* fall through */
+ case X86EMUL_OPC_EVEX_66(5, 0x6e): /* vmovw r/m16,xmm */
+ host_and_vcpu_must_have(avx512_fp16);
+ generate_exception_if(evex.w, X86_EXC_UD);
+ /* fall through */
case X86EMUL_OPC_EVEX_66(0x0f, 0x6e): /* vmov{d,q} r/m,xmm */
case X86EMUL_OPC_EVEX_66(0x0f, 0x7e): /* vmov{d,q} xmm,r/m */
generate_exception_if((evex.lr || evex.opmsk || evex.brs ||
#ifndef X86EMUL_NO_SIMD
+ case X86EMUL_OPC_EVEX_F3(5, 0x10): /* vmovsh m16,xmm{k} */
+ /* vmovsh xmm,xmm,xmm{k} */
+ case X86EMUL_OPC_EVEX_F3(5, 0x11): /* vmovsh xmm,m16{k} */
+ /* vmovsh xmm,xmm,xmm{k} */
+ generate_exception_if(evex.brs, X86_EXC_UD);
+ if ( ea.type == OP_MEM )
+ d |= TwoOp;
+ else
+ {
case X86EMUL_OPC_EVEX_F3(5, 0x51): /* vsqrtsh xmm/m16,xmm,xmm{k} */
- d &= ~TwoOp;
+ d &= ~TwoOp;
+ }
/* fall through */
case X86EMUL_OPC_EVEX(5, 0x51): /* vsqrtph [xyz]mm/mem,[xyz]mm{k} */
CASE_SIMD_SINGLE_FP(_EVEX, 5, 0x58): /* vadd{p,s}h [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */