#define OPC_ARITH_EvIz (0x81)
#define OPC_ARITH_EvIb (0x83)
+#define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */
+#define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
#define OPC_BSWAP (0xc8 | P_EXT)
+#define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3))
+#define OPC_DEC_r32 (0x48)
+#define OPC_INC_r32 (0x40)
#define OPC_JCC_long (0x80 | P_EXT) /* ... plus condition code */
#define OPC_JCC_short (0x70) /* ... plus condition code */
#define OPC_JMP_long (0xe9)
#define OPC_SHIFT_1 (0xd1)
#define OPC_SHIFT_Ib (0xc1)
#define OPC_SHIFT_cl (0xd3)
+#define OPC_TESTL (0x85)
/* Group 1 opcode extensions for 0x80-0x83. */
#define ARITH_ADD 0
}
}
+/* Generate dest op= src. Uses the same ARITH_* codes as tgen_arithi. */
+static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
+{
+ tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3), dest, src);
+}
+
static inline void tcg_out_mov(TCGContext *s, int ret, int arg)
{
if (arg != ret) {
int ret, int32_t arg)
{
if (arg == 0) {
- /* xor r0,r0 */
- tcg_out_modrm(s, 0x01 | (ARITH_XOR << 3), ret, ret);
+ tgen_arithr(s, ARITH_XOR, ret, ret);
} else {
tcg_out8(s, 0xb8 + ret);
tcg_out32(s, arg);
tcg_out_modrm(s, OPC_MOVSWL, dest, src);
}
-static inline void tgen_arithi(TCGContext *s, int c, int r0, int32_t val, int cf)
+static inline void tgen_arithi(TCGContext *s, int c, int r0,
+ int32_t val, int cf)
{
- if (!cf && ((c == ARITH_ADD && val == 1) || (c == ARITH_SUB && val == -1))) {
- /* inc */
- tcg_out_opc(s, 0x40 + r0);
- } else if (!cf && ((c == ARITH_ADD && val == -1) || (c == ARITH_SUB && val == 1))) {
- /* dec */
- tcg_out_opc(s, 0x48 + r0);
+ /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
+ partial flags update stalls on Pentium4 and are not recommended
+ by current Intel optimization manuals. */
+ if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
+ int opc = ((c == ARITH_ADD) ^ (val < 0) ? OPC_INC_r32 : OPC_DEC_r32);
+ tcg_out_opc(s, opc + r0);
} else if (val == (int8_t)val) {
tcg_out_modrm(s, OPC_ARITH_EvIb, c, r0);
tcg_out8(s, val);
if (const_arg2) {
if (arg2 == 0) {
/* test r, r */
- tcg_out_modrm(s, 0x85, arg1, arg1);
+ tcg_out_modrm(s, OPC_TESTL, arg1, arg1);
} else {
tgen_arithi(s, ARITH_CMP, arg1, arg2, 0);
}
} else {
- tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3), arg2, arg1);
+ tgen_arithr(s, ARITH_CMP, arg1, arg2);
}
}
tcg_out32(s, offsetof(CPUState, tlb_table[mem_index][0].addr_read));
/* cmp 0(r1), r0 */
- tcg_out_modrm_offset(s, 0x3b, r0, r1, 0);
+ tcg_out_modrm_offset(s, OPC_CMP_GvEv, r0, r1, 0);
tcg_out_mov(s, r0, addr_reg);
s->code_ptr++;
/* cmp 4(r1), addr_reg2 */
- tcg_out_modrm_offset(s, 0x3b, addr_reg2, r1, 4);
+ tcg_out_modrm_offset(s, OPC_CMP_GvEv, addr_reg2, r1, 4);
/* je label1 */
tcg_out8(s, OPC_JCC_short + JCC_JE);
*label1_ptr = s->code_ptr - label1_ptr - 1;
/* add x(r1), r0 */
- tcg_out_modrm_offset(s, 0x03, r0, r1, offsetof(CPUTLBEntry, addend) -
+ tcg_out_modrm_offset(s, OPC_ADD_GvEv, r0, r1,
+ offsetof(CPUTLBEntry, addend) -
offsetof(CPUTLBEntry, addr_read));
#else
r0 = addr_reg;
tcg_out32(s, offsetof(CPUState, tlb_table[mem_index][0].addr_write));
/* cmp 0(r1), r0 */
- tcg_out_modrm_offset(s, 0x3b, r0, r1, 0);
+ tcg_out_modrm_offset(s, OPC_CMP_GvEv, r0, r1, 0);
tcg_out_mov(s, r0, addr_reg);
s->code_ptr++;
/* cmp 4(r1), addr_reg2 */
- tcg_out_modrm_offset(s, 0x3b, addr_reg2, r1, 4);
+ tcg_out_modrm_offset(s, OPC_CMP_GvEv, addr_reg2, r1, 4);
/* je label1 */
tcg_out8(s, OPC_JCC_short + JCC_JE);
*label1_ptr = s->code_ptr - label1_ptr - 1;
/* add x(r1), r0 */
- tcg_out_modrm_offset(s, 0x03, r0, r1, offsetof(CPUTLBEntry, addend) -
+ tcg_out_modrm_offset(s, OPC_ADD_GvEv, r0, r1,
+ offsetof(CPUTLBEntry, addend) -
offsetof(CPUTLBEntry, addr_write));
#else
r0 = addr_reg;
if (const_args[2]) {
tgen_arithi(s, c, args[0], args[2], 0);
} else {
- tcg_out_modrm(s, 0x01 | (c << 3), args[2], args[0]);
+ tgen_arithr(s, c, args[0], args[2]);
}
break;
case INDEX_op_mul_i32:
goto gen_shift32;
case INDEX_op_add2_i32:
- if (const_args[4])
+ if (const_args[4]) {
tgen_arithi(s, ARITH_ADD, args[0], args[4], 1);
- else
- tcg_out_modrm(s, 0x01 | (ARITH_ADD << 3), args[4], args[0]);
- if (const_args[5])
+ } else {
+ tgen_arithr(s, ARITH_ADD, args[0], args[4]);
+ }
+ if (const_args[5]) {
tgen_arithi(s, ARITH_ADC, args[1], args[5], 1);
- else
- tcg_out_modrm(s, 0x01 | (ARITH_ADC << 3), args[5], args[1]);
+ } else {
+ tgen_arithr(s, ARITH_ADC, args[1], args[5]);
+ }
break;
case INDEX_op_sub2_i32:
- if (const_args[4])
+ if (const_args[4]) {
tgen_arithi(s, ARITH_SUB, args[0], args[4], 1);
- else
- tcg_out_modrm(s, 0x01 | (ARITH_SUB << 3), args[4], args[0]);
- if (const_args[5])
+ } else {
+ tgen_arithr(s, ARITH_SUB, args[0], args[4]);
+ }
+ if (const_args[5]) {
tgen_arithi(s, ARITH_SBB, args[1], args[5], 1);
- else
- tcg_out_modrm(s, 0x01 | (ARITH_SBB << 3), args[5], args[1]);
+ } else {
+ tgen_arithr(s, ARITH_SBB, args[1], args[5]);
+ }
break;
case INDEX_op_brcond_i32:
tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],