From: Paolo Bonzini Date: Fri, 27 Oct 2023 02:12:59 +0000 (+0200) Subject: target/i386: optimize computation of JL and JLE from flags X-Git-Tag: qemu-xen-4.20.0~357^2~44 X-Git-Url: http://xenbits.xensource.com/gitweb?a=commitdiff_plain;h=6032627f07a1632114f09612c80cf806ba03de4a;p=qemu-xen.git target/i386: optimize computation of JL and JLE from flags Take advantage of the fact that there can be no 1 bits between SF and OF. If they were adjacent, you could sum SF and get a carry only if SF was already set. Then the value of OF in the sum is the XOR of OF itself, the carry (which is SF) and 0 (the value of the OF bit in the addend): this is OF^SF exactly. Because OF and SF are not adjacent, just place more 1 bits to the left so that the carry propagates, which means summing CC_O - CC_S. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 037bc47e7c..8fb80011a2 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -1126,10 +1126,9 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg) if (reg == cpu_cc_src) { reg = s->tmp0; } - tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */ - tcg_gen_xor_tl(reg, reg, cpu_cc_src); + tcg_gen_addi_tl(reg, cpu_cc_src, CC_O - CC_S); cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg, - .mask = CC_S }; + .mask = CC_O }; break; default: case JCC_LE: @@ -1137,10 +1136,9 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg) if (reg == cpu_cc_src) { reg = s->tmp0; } - tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */ - tcg_gen_xor_tl(reg, reg, cpu_cc_src); + tcg_gen_addi_tl(reg, cpu_cc_src, CC_O - CC_S); cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg, - .mask = CC_S | CC_Z }; + .mask = CC_O | CC_Z }; break; } break;