INSN(pmaxu, 66, 0f38, 3f, vl, dq, vl),
INSN(pmins, 66, 0f38, 39, vl, dq, vl),
INSN(pminu, 66, 0f38, 3b, vl, dq, vl),
+ INSN(pmovsxbd, 66, 0f38, 21, vl_4, b, vl),
+ INSN(pmovsxbq, 66, 0f38, 22, vl_8, b, vl),
+ INSN(pmovsxwd, 66, 0f38, 23, vl_2, w, vl),
+ INSN(pmovsxwq, 66, 0f38, 24, vl_4, w, vl),
+ INSN(pmovsxdq, 66, 0f38, 25, vl_2, d_nb, vl),
+ INSN(pmovzxbd, 66, 0f38, 31, vl_4, b, vl),
+ INSN(pmovzxbq, 66, 0f38, 32, vl_8, b, vl),
+ INSN(pmovzxwd, 66, 0f38, 33, vl_2, w, vl),
+ INSN(pmovzxwq, 66, 0f38, 34, vl_4, w, vl),
+ INSN(pmovzxdq, 66, 0f38, 35, vl_2, d_nb, vl),
INSN(pmuldq, 66, 0f38, 28, vl, q, vl),
INSN(pmulld, 66, 0f38, 40, vl, d, vl),
INSN(pmuludq, 66, 0f, f4, vl, q, vl),
INSN(pminsw, 66, 0f, ea, vl, w, vl),
INSN(pminub, 66, 0f, da, vl, b, vl),
INSN(pminuw, 66, 0f38, 3a, vl, w, vl),
+ INSN(pmovsxbw, 66, 0f38, 20, vl_2, b, vl),
+ INSN(pmovzxbw, 66, 0f38, 30, vl_2, b, vl),
INSN(pmulhuw, 66, 0f, e4, vl, w, vl),
INSN(pmulhw, 66, 0f, e5, vl, w, vl),
INSN(pmullw, 66, 0f, d5, vl, w, vl),
# define max(x, y) B(pmaxsd, _mask, x, y, undef(), ~0)
# define min(x, y) B(pminsd, _mask, x, y, undef(), ~0)
# define mul_full(x, y) ((vec_t)B(pmuldq, _mask, x, y, (vdi_t)undef(), ~0))
+# define widen1(x) ((vec_t)B(pmovsxdq, _mask, x, (vdi_t)undef(), ~0))
# elif UINT_SIZE == 4
# define max(x, y) ((vec_t)B(pmaxud, _mask, (vsi_t)(x), (vsi_t)(y), (vsi_t)undef(), ~0))
# define min(x, y) ((vec_t)B(pminud, _mask, (vsi_t)(x), (vsi_t)(y), (vsi_t)undef(), ~0))
# define mul_full(x, y) ((vec_t)B(pmuludq, _mask, (vsi_t)(x), (vsi_t)(y), (vdi_t)undef(), ~0))
+# define widen1(x) ((vec_t)B(pmovzxdq, _mask, (vsi_half_t)(x), (vdi_t)undef(), ~0))
# elif INT_SIZE == 8
# define max(x, y) ((vec_t)B(pmaxsq, _mask, (vdi_t)(x), (vdi_t)(y), (vdi_t)undef(), ~0))
# define min(x, y) ((vec_t)B(pminsq, _mask, (vdi_t)(x), (vdi_t)(y), (vdi_t)undef(), ~0))
# endif
OVR(movntdq);
OVR(movntdqa);
+OVR(pmovsxbd);
+OVR(pmovsxbq);
+OVR(pmovsxdq);
+OVR(pmovsxwd);
+OVR(pmovsxwq);
+OVR(pmovzxbd);
+OVR(pmovzxbq);
+OVR(pmovzxdq);
+OVR(pmovzxwd);
+OVR(pmovzxwq);
OVR(pmulld);
OVR(pmuldq);
OVR(pmuludq);
[0x1a] = { .simd_size = simd_128, .two_op = 1, .d8s = 4 },
[0x1b] = { .simd_size = simd_256, .two_op = 1, .d8s = d8s_vl_by_2 },
[0x1c ... 0x1e] = { .simd_size = simd_packed_int, .two_op = 1 },
- [0x20 ... 0x25] = { .simd_size = simd_other, .two_op = 1 },
+ [0x20] = { .simd_size = simd_other, .two_op = 1, .d8s = d8s_vl_by_2 },
+ [0x21] = { .simd_size = simd_other, .two_op = 1, .d8s = d8s_vl_by_4 },
+ [0x22] = { .simd_size = simd_other, .two_op = 1, .d8s = d8s_vl_by_8 },
+ [0x23] = { .simd_size = simd_other, .two_op = 1, .d8s = d8s_vl_by_2 },
+ [0x24] = { .simd_size = simd_other, .two_op = 1, .d8s = d8s_vl_by_4 },
+ [0x25] = { .simd_size = simd_other, .two_op = 1, .d8s = d8s_vl_by_2 },
[0x26 ... 0x29] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
[0x2a] = { .simd_size = simd_packed_int, .two_op = 1, .d8s = d8s_vl },
[0x2b] = { .simd_size = simd_packed_int },
[0x2c ... 0x2d] = { .simd_size = simd_packed_fp },
[0x2e ... 0x2f] = { .simd_size = simd_packed_fp, .to_mem = 1 },
- [0x30 ... 0x35] = { .simd_size = simd_other, .two_op = 1 },
+ [0x30] = { .simd_size = simd_other, .two_op = 1, .d8s = d8s_vl_by_2 },
+ [0x31] = { .simd_size = simd_other, .two_op = 1, .d8s = d8s_vl_by_4 },
+ [0x32] = { .simd_size = simd_other, .two_op = 1, .d8s = d8s_vl_by_8 },
+ [0x33] = { .simd_size = simd_other, .two_op = 1, .d8s = d8s_vl_by_2 },
+ [0x34] = { .simd_size = simd_other, .two_op = 1, .d8s = d8s_vl_by_4 },
+ [0x35] = { .simd_size = simd_other, .two_op = 1, .d8s = d8s_vl_by_2 },
[0x36 ... 0x3f] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
[0x40] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
[0x41] = { .simd_size = simd_packed_int, .two_op = 1 },
op_bytes = 16 >> (pmov_convert_delta[b & 7] - vex.l);
goto simd_0f_int;
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0x20): /* vpmovsxbw {x,y}mm/mem,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0x30): /* vpmovzxbw {x,y}mm/mem,[xyz]mm{k} */
+ host_and_vcpu_must_have(avx512bw);
+ /* fall through */
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0x21): /* vpmovsxbd xmm/mem,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0x22): /* vpmovsxbq xmm/mem,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0x23): /* vpmovsxwd {x,y}mm/mem,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0x24): /* vpmovsxwq xmm/mem,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0x25): /* vpmovsxdq {x,y}mm/mem,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0x31): /* vpmovzxbd xmm/mem,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0x32): /* vpmovzxbq xmm/mem,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0x33): /* vpmovzxwd {x,y}mm/mem,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0x34): /* vpmovzxwq xmm/mem,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0x35): /* vpmovzxdq {x,y}mm/mem,[xyz]mm{k} */
+ generate_exception_if(evex.brs || (evex.w && (b & 7) == 5), EXC_UD);
+ op_bytes = 32 >> (pmov_convert_delta[b & 7] + 1 - evex.lr);
+ elem_bytes = (b & 7) < 3 ? 1 : (b & 7) != 5 ? 2 : 4;
+ goto avx512f_no_sae;
+
case X86EMUL_OPC_66(0x0f38, 0x2a): /* movntdqa m128,xmm */
case X86EMUL_OPC_VEX_66(0x0f38, 0x2a): /* vmovntdqa mem,{x,y}mm */
generate_exception_if(ea.type != OP_MEM, EXC_UD);