ESZ_b,
ESZ_w,
ESZ_bw,
+ ESZ_fp16,
};
#ifndef __i386__
INSN(popcnt, 66, 0f38, 55, vl, dq, vl)
};
+static const struct test avx512_fp16_all[] = {
+ INSN(cmpph, , 0f3a, c2, vl, fp16, vl),
+ INSN(cmpsh, f3, 0f3a, c2, el, fp16, el),
+ INSN(fpclassph, , 0f3a, 66, vl, fp16, vl),
+ INSN(fpclasssh, , 0f3a, 67, el, fp16, el),
+ INSN(getmantph, , 0f3a, 26, vl, fp16, vl),
+ INSN(getmantsh, , 0f3a, 27, el, fp16, el),
+ INSN(reduceph, , 0f3a, 56, vl, fp16, vl),
+ INSN(reducesh, , 0f3a, 57, el, fp16, el),
+ INSN(rndscaleph, , 0f3a, 08, vl, fp16, vl),
+ INSN(rndscalesh, , 0f3a, 0a, el, fp16, el),
+};
+
static const struct test gfni_all[] = {
INSN(gf2p8affineinvqb, 66, 0f3a, cf, vl, q, vl),
INSN(gf2p8affineqb, 66, 0f3a, ce, vl, q, vl),
break;
case ESZ_w:
- esz = 2;
evex.w = 1;
+ /* fall through */
+ case ESZ_fp16:
+ esz = 2;
break;
#ifdef __i386__
case ESZ_b: case ESZ_w: case ESZ_bw:
return;
- case ESZ_d: case ESZ_q:
+ case ESZ_d: case ESZ_q: case ESZ_fp16:
break;
default:
RUN(avx512_vnni, all);
RUN(avx512_vp2intersect, all);
RUN(avx512_vpopcntdq, all);
+ RUN(avx512_fp16, all);
if ( cpu_has_avx512f )
{
{ { 0x03 }, 3, T, R, pfx_66, Wn, Ln }, /* valign{d,q} */
{ { 0x04 }, 3, T, R, pfx_66, W0, Ln }, /* vpermilps */
{ { 0x05 }, 3, T, R, pfx_66, W1, Ln }, /* vpermilpd */
+ { { 0x08 }, 3, T, R, pfx_no, W0, Ln }, /* vrndscaleph */
{ { 0x08 }, 3, T, R, pfx_66, W0, Ln }, /* vrndscaleps */
{ { 0x09 }, 3, T, R, pfx_66, W1, Ln }, /* vrndscalepd */
+ { { 0x0a }, 3, T, R, pfx_no, W0, LIG }, /* vrndscalesh */
{ { 0x0a }, 3, T, R, pfx_66, W0, LIG }, /* vrndscaless */
{ { 0x0b }, 3, T, R, pfx_66, W1, LIG }, /* vrndscalesd */
{ { 0x0f }, 3, T, R, pfx_66, WIG, Ln }, /* vpalignr */
{ { 0x22 }, 3, T, R, pfx_66, Wn, L0 }, /* vpinsr{d,q} */
{ { 0x23 }, 3, T, R, pfx_66, Wn, L1|L2 }, /* vshuff{32x4,64x2} */
{ { 0x25 }, 3, T, R, pfx_66, Wn, Ln }, /* vpternlog{d,q} */
+ { { 0x26 }, 3, T, R, pfx_no, W0, Ln }, /* vgetmantph */
{ { 0x26 }, 3, T, R, pfx_66, Wn, Ln }, /* vgetmantp{s,d} */
+ { { 0x27 }, 3, T, R, pfx_no, W0, LIG }, /* vgetmantsh */
{ { 0x27 }, 3, T, R, pfx_66, Wn, LIG }, /* vgetmants{s,d} */
{ { 0x38 }, 3, T, R, pfx_66, Wn, L1|L2 }, /* vinserti{32x4,64x2} */
{ { 0x39 }, 3, T, W, pfx_66, Wn, L1|L2 }, /* vextracti{32x4,64x2} */
{ { 0x51 }, 3, T, R, pfx_66, Wn, LIG }, /* vranges{s,d} */
{ { 0x54 }, 3, T, R, pfx_66, Wn, Ln }, /* vfixupimmp{s,d} */
{ { 0x55 }, 3, T, R, pfx_66, Wn, LIG }, /* vfixumpimms{s,d} */
+ { { 0x56 }, 3, T, R, pfx_no, W0, Ln }, /* vreduceph */
{ { 0x56 }, 3, T, R, pfx_66, Wn, Ln }, /* vreducep{s,d} */
+ { { 0x57 }, 3, T, R, pfx_no, W0, LIG }, /* vreducesh */
{ { 0x57 }, 3, T, R, pfx_66, Wn, LIG }, /* vreduces{s,d} */
+ { { 0x66 }, 3, T, R, pfx_no, W0, Ln }, /* vfpclassph */
{ { 0x66 }, 3, T, R, pfx_66, Wn, Ln }, /* vfpclassp{s,d} */
+ { { 0x67 }, 3, T, R, pfx_no, W0, LIG }, /* vfpclasssh */
{ { 0x67 }, 3, T, R, pfx_66, Wn, LIG }, /* vfpclasss{s,d} */
{ { 0x70 }, 3, T, R, pfx_66, W1, Ln }, /* vshldw */
{ { 0x71 }, 3, T, R, pfx_66, Wn, Ln }, /* vshld{d,q} */
{ { 0x72 }, 3, T, R, pfx_66, W1, Ln }, /* vshrdw */
{ { 0x73 }, 3, T, R, pfx_66, Wn, Ln }, /* vshrd{d,q} */
+ { { 0xc2 }, 3, T, R, pfx_no, W0, Ln }, /* vcmpph */
+ { { 0xc2 }, 3, T, R, pfx_f3, W0, LIG }, /* vcmpsh */
{ { 0xce }, 3, T, R, pfx_66, W1, Ln }, /* vgf2p8affineqb */
{ { 0xcf }, 3, T, R, pfx_66, W1, Ln }, /* vgf2p8affineinvqb */
};
else
printf("skipped\n");
+ printf("%-40s", "Testing vfpclassphz $0x46,128(%ecx),%k3...");
+ if ( stack_exec && cpu_has_avx512_fp16 )
+ {
+ decl_insn(vfpclassph);
+
+ asm volatile ( put_insn(vfpclassph,
+ /* 0x46: check for +/- 0 and neg. */
+ /* vfpclassphz $0x46, 128(%0), %%k3 */
+ ".byte 0x62, 0xf3, 0x7c, 0x48\n\t"
+ ".byte 0x66, 0x59, 0x02, 0x46")
+ :: "c" (NULL) );
+
+ set_insn(vfpclassph);
+ for ( i = 0; i < 3; ++i )
+ {
+ res[16 + i * 5 + 0] = 0x7fff0000; /* +0 / +NaN */
+ res[16 + i * 5 + 1] = 0xffff8000; /* -0 / -NaN */
+ res[16 + i * 5 + 2] = 0x80010001; /* +DEN / -DEN */
+ res[16 + i * 5 + 3] = 0xfc00f800; /* -FIN / -INF */
+ res[16 + i * 5 + 4] = 0x7c007800; /* +FIN / +INF */
+ }
+ res[31] = 0;
+ regs.ecx = (unsigned long)res - 64;
+ rc = x86_emulate(&ctxt, &emulops);
+ if ( rc != X86EMUL_OKAY || !check_eip(vfpclassph) )
+ goto fail;
+ asm volatile ( "kmovd %%k3, %0" : "=g" (rc) );
+ /*
+ * 0b11(0001100101)*3
+ * 0b1100_0110_0101_0001_1001_0100_0110_0101
+ */
+ if ( rc != 0xc6519465 )
+ goto fail;
+ printf("okay\n");
+ }
+ else
+ printf("skipped\n");
+
/*
* The following compress/expand tests are not only making sure the
* accessed data is correct, but they also verify (by placing operands
#define cpu_has_avx512_4fmaps (cp.feat.avx512_4fmaps && xcr0_mask(0xe6))
#define cpu_has_avx512_vp2intersect (cp.feat.avx512_vp2intersect && xcr0_mask(0xe6))
#define cpu_has_serialize cp.feat.serialize
+#define cpu_has_avx512_fp16 (cp.feat.avx512_fp16 && xcr0_mask(0xe6))
#define cpu_has_avx_vnni (cp.feat.avx_vnni && xcr0_mask(6))
#define cpu_has_avx512_bf16 (cp.feat.avx512_bf16 && xcr0_mask(0xe6))
#define cpu_has_avx_ifma (cp.feat.avx_ifma && xcr0_mask(6))
[0x7a ... 0x7b] = { .simd_size = simd_scalar_opc, .four_op = 1 },
[0x7c ... 0x7d] = { .simd_size = simd_packed_fp, .four_op = 1 },
[0x7e ... 0x7f] = { .simd_size = simd_scalar_opc, .four_op = 1 },
+ [0xc2] = { .simd_size = simd_any_fp, .d8s = d8s_vl },
[0xcc] = { .simd_size = simd_other },
[0xce ... 0xcf] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
[0xdf] = { .simd_size = simd_packed_int, .two_op = 1 },
if ( s->evex.brs )
{
case d8s_dq:
- return 2 + s->evex.w;
+ return 1 + !s->fp16 + s->evex.w;
}
break;
/* fall through */
case simd_scalar_opc:
case simd_scalar_vexw:
- return 2 + s->evex.w;
+ return 1 + !s->fp16 + s->evex.w;
case simd_128:
/* These should have an explicit size specified. */
*/
s->simd_size = ext0f3a_table[b].simd_size;
if ( evex_encoded() )
+ {
+ switch ( b )
+ {
+ case 0x08: /* vrndscaleph */
+ case 0x0a: /* vrndscalesh */
+ case 0x26: /* vfpclassph */
+ case 0x27: /* vfpclasssh */
+ case 0x56: /* vgetmantph */
+ case 0x57: /* vgetmantsh */
+ case 0x66: /* vreduceph */
+ case 0x67: /* vreducesh */
+ if ( !s->evex.pfx )
+ s->fp16 = true;
+ break;
+
+ case 0xc2: /* vpcmp{p,s}h */
+ if ( !(s->evex.pfx & VEX_PREFIX_DOUBLE_MASK) )
+ s->fp16 = true;
+ break;
+ }
+
disp8scale = decode_disp8scale(ext0f3a_table[b].d8s, s);
+ }
break;
case ext_8f09:
break;
case vex_f3:
generate_exception_if(evex_encoded() && s->evex.w, X86_EXC_UD);
- s->op_bytes = 4;
+ s->op_bytes = 4 >> s->fp16;
break;
case vex_f2:
generate_exception_if(evex_encoded() && !s->evex.w, X86_EXC_UD);
break;
case simd_scalar_opc:
- s->op_bytes = 4 << (ctxt->opcode & 1);
+ s->op_bytes = 2 << (!s->fp16 + (ctxt->opcode & 1));
break;
case simd_scalar_vexw:
- s->op_bytes = 4 << s->vex.w;
+ s->op_bytes = 2 << (!s->fp16 + s->vex.w);
break;
case simd_128:
bool lock_prefix;
bool not_64bit; /* Instruction not available in 64bit. */
bool fpu_ctrl; /* Instruction is an FPU control one. */
+ bool fp16; /* Instruction has half-precision FP source operand. */
opcode_desc_t desc;
union vex vex;
union evex evex;
#define vcpu_has_avx512_vp2intersect() (ctxt->cpuid->feat.avx512_vp2intersect)
#define vcpu_has_serialize() (ctxt->cpuid->feat.serialize)
#define vcpu_has_tsxldtrk() (ctxt->cpuid->feat.tsxldtrk)
+#define vcpu_has_avx512_fp16() (ctxt->cpuid->feat.avx512_fp16)
#define vcpu_has_avx_vnni() (ctxt->cpuid->feat.avx_vnni)
#define vcpu_has_avx512_bf16() (ctxt->cpuid->feat.avx512_bf16)
#define vcpu_has_wrmsrns() (ctxt->cpuid->feat.wrmsrns)
b = ctxt->opcode;
d = state.desc;
#define state (&state)
- elem_bytes = 4 << evex.w;
+ elem_bytes = 2 << (!state->fp16 + evex.w);
generate_exception_if(state->not_64bit && mode_64bit(), X86_EXC_UD);
avx512_vlen_check(b & 2);
goto simd_imm8_zmm;
+ case X86EMUL_OPC_EVEX(0x0f3a, 0x0a): /* vrndscalesh $imm8,xmm/mem,xmm,xmm{k} */
+ generate_exception_if(ea.type != OP_REG && evex.brs, X86_EXC_UD);
+ /* fall through */
+ case X86EMUL_OPC_EVEX(0x0f3a, 0x08): /* vrndscaleph $imm8,[xyz]mm/mem,[xyz]mm{k} */
+ host_and_vcpu_must_have(avx512_fp16);
+ generate_exception_if(evex.w, X86_EXC_UD);
+ avx512_vlen_check(b & 2);
+ goto simd_imm8_zmm;
+
#endif /* X86EMUL_NO_SIMD */
CASE_SIMD_PACKED_INT(0x0f3a, 0x0f): /* palignr $imm8,{,x}mm/mem,{,x}mm */
avx512_vlen_check(false);
goto simd_imm8_zmm;
+ case X86EMUL_OPC_EVEX(0x0f3a, 0x26): /* vgetmantph $imm8,[xyz]mm/mem,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX(0x0f3a, 0x56): /* vreduceph $imm8,[xyz]mm/mem,[xyz]mm{k} */
+ host_and_vcpu_must_have(avx512_fp16);
+ generate_exception_if(evex.w, X86_EXC_UD);
+ if ( ea.type != OP_REG || !evex.brs )
+ avx512_vlen_check(false);
+ goto simd_imm8_zmm;
+
case X86EMUL_OPC_EVEX_66(0x0f3a, 0x51): /* vranges{s,d} $imm8,xmm/mem,xmm,xmm{k} */
case X86EMUL_OPC_EVEX_66(0x0f3a, 0x57): /* vreduces{s,d} $imm8,xmm/mem,xmm,xmm{k} */
host_and_vcpu_must_have(avx512dq);
avx512_vlen_check(true);
goto simd_imm8_zmm;
+ case X86EMUL_OPC_EVEX(0x0f3a, 0x27): /* vgetmantsh $imm8,xmm/mem,xmm,xmm{k} */
+ case X86EMUL_OPC_EVEX(0x0f3a, 0x57): /* vreducesh $imm8,xmm/mem,xmm,xmm{k} */
+ host_and_vcpu_must_have(avx512_fp16);
+ generate_exception_if(evex.w, X86_EXC_UD);
+ if ( !evex.brs )
+ avx512_vlen_check(true);
+ else
+ generate_exception_if(ea.type != OP_REG, X86_EXC_UD);
+ goto simd_imm8_zmm;
+
case X86EMUL_OPC_VEX_66(0x0f3a, 0x30): /* kshiftr{b,w} $imm8,k,k */
case X86EMUL_OPC_VEX_66(0x0f3a, 0x32): /* kshiftl{b,w} $imm8,k,k */
if ( !vex.w )
avx512_vlen_check(true);
goto simd_imm8_zmm;
+ case X86EMUL_OPC_EVEX(0x0f3a, 0x66): /* vfpclassph $imm8,[xyz]mm/mem,k{k} */
+ case X86EMUL_OPC_EVEX(0x0f3a, 0x67): /* vfpclasssh $imm8,xmm/mem,k{k} */
+ host_and_vcpu_must_have(avx512_fp16);
+ generate_exception_if(evex.w || !evex.r || !evex.R || evex.z, X86_EXC_UD);
+ if ( !(b & 1) )
+ goto avx512f_imm8_no_sae;
+ generate_exception_if(evex.brs, X86_EXC_UD);
+ avx512_vlen_check(true);
+ goto simd_imm8_zmm;
+
case X86EMUL_OPC_EVEX_66(0x0f3a, 0x70): /* vpshldw $imm8,[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
case X86EMUL_OPC_EVEX_66(0x0f3a, 0x72): /* vpshrdw $imm8,[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
generate_exception_if(!evex.w, X86_EXC_UD);
host_and_vcpu_must_have(avx512_vbmi2);
goto avx512f_imm8_no_sae;
+ case X86EMUL_OPC_EVEX_F3(0x0f3a, 0xc2): /* vcmpsh $imm8,xmm/mem,xmm,k{k} */
+ generate_exception_if(ea.type != OP_REG && evex.brs, X86_EXC_UD);
+ /* fall through */
+ case X86EMUL_OPC_EVEX(0x0f3a, 0xc2): /* vcmpph $imm8,[xyz]mm/mem,[xyz]mm,k{k} */
+ host_and_vcpu_must_have(avx512_fp16);
+ generate_exception_if(evex.w || !evex.r || !evex.R || evex.z, X86_EXC_UD);
+ if ( ea.type != OP_REG || !evex.brs )
+ avx512_vlen_check(evex.pfx & VEX_PREFIX_SCALAR_MASK);
+ goto simd_imm8_zmm;
+
case X86EMUL_OPC(0x0f3a, 0xcc): /* sha1rnds4 $imm8,xmm/m128,xmm */
host_and_vcpu_must_have(sha);
op_bytes = 16;