Decrement followed by cmov improvements.
The following patch to the x86_64 backend improves the code generated for a decrement followed by a conditional move. The primary change is to recognize that after subtracting one, checking the result is -1 (or equivalently that the original value was zero) can be implemented using the borrow/carry flag instead of requiring an explicit test instruction. This is achieved by a new define_insn_and_split that allows combine to split the desired sequence/composite into a *subsi_3 and *movsicc_noc. The other change with this patch is/are a pair of peephole2 optimizations to eliminate register-to-register moves generated during register allocation. During reload, the compiler doesn't know that inverting the condition of a conditional cmove can sometimes reduce register pressure, but this is easy to tidy up during the peephole2 pass (where swapping the order of the insn's operands performs the required logic inversion). Both improvements are demonstrated by the case below: int foo(int x) { if (x == 0) x = 16; else x--; return x; } Before: foo: leal -1(%rdi), %eax testl %edi, %edi movl $16, %edx cmove %edx, %eax ret After: foo: subl $1, %edi movl $16, %eax cmovnc %edi, %eax ret And the value of the peephole2 clean-up can be seen on its own in: int bar(int x) { x--; if (x == 0) x = 16; return x; } Before: bar: movl %edi, %eax movl $16, %edx subl $1, %eax cmove %edx, %eax ret After: bar: subl $1, %edi movl $16, %eax cmovne %edi, %eax ret These idioms were inspired by the source code of NIST SciMark4's Random_nextDouble function, where the tweaks above result in a ~1% improvement in the MonteCarlo benchmark kernel. 2021-07-30 Roger Sayle <roger@nextmovesoftware.com> Uroš Bizjak <ubizjak@gmail.com> gcc/ChangeLog * config/i386/i386.md (*dec_cmov<mode>): New define_insn_and_split to generate a conditional move using the carry flag after sub $1. (peephole2): Eliminate a register-to-register move by inverting the condition of a conditional move. gcc/testsuite/ChangeLog * gcc.target/i386/dec-cmov-1.c: New test. * gcc.target/i386/dec-cmov-2.c: New test.
This commit is contained in:
parent
5b2515f5ae
commit
f7bf03cf69
3 changed files with 249 additions and 0 deletions
|
@ -6756,6 +6756,29 @@
|
|||
? GEU : LTU, VOIDmode, cc, const0_rtx);
|
||||
})
|
||||
|
||||
;; Help combine use borrow flag to test for -1 after dec (add $-1).
|
||||
(define_insn_and_split "*dec_cmov<mode>"
|
||||
[(set (match_operand:SWI248 0 "register_operand" "=r")
|
||||
(if_then_else:SWI248
|
||||
(match_operator 1 "bt_comparison_operator"
|
||||
[(match_operand:SWI248 2 "register_operand" "0") (const_int 0)])
|
||||
(plus:SWI248 (match_dup 2) (const_int -1))
|
||||
(match_operand:SWI248 3 "nonimmediate_operand" "rm")))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"TARGET_CMOVE"
|
||||
"#"
|
||||
"&& reload_completed"
|
||||
[(parallel [(set (reg:CC FLAGS_REG)
|
||||
(compare:CC (match_dup 2) (const_int 1)))
|
||||
(set (match_dup 0) (minus:SWI248 (match_dup 2) (const_int 1)))])
|
||||
(set (match_dup 0)
|
||||
(if_then_else:SWI248 (match_dup 4) (match_dup 0) (match_dup 3)))]
|
||||
{
|
||||
rtx cc = gen_rtx_REG (CCCmode, FLAGS_REG);
|
||||
operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[1]) == NE
|
||||
? GEU : LTU, VOIDmode, cc, const0_rtx);
|
||||
})
|
||||
|
||||
(define_insn "*subsi_3_zext"
|
||||
[(set (reg FLAGS_REG)
|
||||
(compare (match_operand:SI 1 "register_operand" "0")
|
||||
|
@ -19182,6 +19205,70 @@
|
|||
gcc_unreachable ();
|
||||
})
|
||||
|
||||
;; Eliminate a reg-reg mov by inverting the condition of a cmov (#1).
|
||||
;; mov r0,r1; dec r0; mov r2,r3; cmov r0,r2 -> dec r1; mov r0,r3; cmov r0, r1
|
||||
(define_peephole2
|
||||
[(set (match_operand:SWI248 0 "register_operand")
|
||||
(match_operand:SWI248 1 "register_operand"))
|
||||
(parallel [(set (reg FLAGS_REG) (match_operand 5))
|
||||
(set (match_dup 0) (match_operand:SWI248 6))])
|
||||
(set (match_operand:SWI248 2 "register_operand")
|
||||
(match_operand:SWI248 3))
|
||||
(set (match_dup 0)
|
||||
(if_then_else:SWI248 (match_operator 4 "ix86_comparison_operator"
|
||||
[(reg FLAGS_REG) (const_int 0)])
|
||||
(match_dup 0)
|
||||
(match_dup 2)))]
|
||||
"TARGET_CMOVE
|
||||
&& REGNO (operands[2]) != REGNO (operands[0])
|
||||
&& REGNO (operands[2]) != REGNO (operands[1])
|
||||
&& peep2_reg_dead_p (1, operands[1])
|
||||
&& peep2_reg_dead_p (4, operands[2])
|
||||
&& !reg_overlap_mentioned_p (operands[0], operands[3])"
|
||||
[(parallel [(set (match_dup 7) (match_dup 8))
|
||||
(set (match_dup 1) (match_dup 9))])
|
||||
(set (match_dup 0) (match_dup 3))
|
||||
(set (match_dup 0) (if_then_else:SWI248 (match_dup 4)
|
||||
(match_dup 1)
|
||||
(match_dup 0)))]
|
||||
{
|
||||
operands[7] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (1)), 0, 0));
|
||||
operands[8] = replace_rtx (operands[5], operands[0], operands[1]);
|
||||
operands[9] = replace_rtx (operands[6], operands[0], operands[1]);
|
||||
})
|
||||
|
||||
;; Eliminate a reg-reg mov by inverting the condition of a cmov (#2).
|
||||
;; mov r2,r3; mov r0,r1; dec r0; cmov r0,r2 -> dec r1; mov r0,r3; cmov r0, r1
|
||||
(define_peephole2
|
||||
[(set (match_operand:SWI248 2 "register_operand")
|
||||
(match_operand:SWI248 3))
|
||||
(set (match_operand:SWI248 0 "register_operand")
|
||||
(match_operand:SWI248 1 "register_operand"))
|
||||
(parallel [(set (reg FLAGS_REG) (match_operand 5))
|
||||
(set (match_dup 0) (match_operand:SWI248 6))])
|
||||
(set (match_dup 0)
|
||||
(if_then_else:SWI248 (match_operator 4 "ix86_comparison_operator"
|
||||
[(reg FLAGS_REG) (const_int 0)])
|
||||
(match_dup 0)
|
||||
(match_dup 2)))]
|
||||
"TARGET_CMOVE
|
||||
&& REGNO (operands[2]) != REGNO (operands[0])
|
||||
&& REGNO (operands[2]) != REGNO (operands[1])
|
||||
&& peep2_reg_dead_p (2, operands[1])
|
||||
&& peep2_reg_dead_p (4, operands[2])
|
||||
&& !reg_overlap_mentioned_p (operands[0], operands[3])"
|
||||
[(parallel [(set (match_dup 7) (match_dup 8))
|
||||
(set (match_dup 1) (match_dup 9))])
|
||||
(set (match_dup 0) (match_dup 3))
|
||||
(set (match_dup 0) (if_then_else:SWI248 (match_dup 4)
|
||||
(match_dup 1)
|
||||
(match_dup 0)))]
|
||||
{
|
||||
operands[7] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (2)), 0, 0));
|
||||
operands[8] = replace_rtx (operands[5], operands[0], operands[1]);
|
||||
operands[9] = replace_rtx (operands[6], operands[0], operands[1]);
|
||||
})
|
||||
|
||||
(define_expand "mov<mode>cc"
|
||||
[(set (match_operand:X87MODEF 0 "register_operand")
|
||||
(if_then_else:X87MODEF
|
||||
|
|
105
gcc/testsuite/gcc.target/i386/dec-cmov-1.c
Normal file
105
gcc/testsuite/gcc.target/i386/dec-cmov-1.c
Normal file
|
@ -0,0 +1,105 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2" } */
|
||||
/* { dg-additional-options "-march=pentiumpro -mregparm=3" { target ia32 } } */
|
||||
|
||||
int foo_m1(int x)
|
||||
{
|
||||
x--;
|
||||
if (x == 0)
|
||||
x = 16;
|
||||
return x;
|
||||
}
|
||||
|
||||
int foo_m2(int x)
|
||||
{
|
||||
x -= 2;
|
||||
if (x == 0)
|
||||
x = 16;
|
||||
return x;
|
||||
}
|
||||
|
||||
int foo_p1(int x)
|
||||
{
|
||||
x++;
|
||||
if (x == 0)
|
||||
x = 16;
|
||||
return x;
|
||||
}
|
||||
|
||||
int foo_p2(int x)
|
||||
{
|
||||
x += 2;
|
||||
if (x == 0)
|
||||
x = 16;
|
||||
return x;
|
||||
}
|
||||
|
||||
|
||||
#ifdef __x86_64__
|
||||
long long fool_m1(long long x)
|
||||
{
|
||||
x--;
|
||||
if (x == 0)
|
||||
x = 16;
|
||||
return x;
|
||||
}
|
||||
|
||||
long long fool_m2(long long x)
|
||||
{
|
||||
x -= 2;
|
||||
if (x == 0)
|
||||
x = 16;
|
||||
return x;
|
||||
}
|
||||
|
||||
long long fool_p1(long long x)
|
||||
{
|
||||
x++;
|
||||
if (x == 0)
|
||||
x = 16;
|
||||
return x;
|
||||
}
|
||||
|
||||
long long fool_p2(long long x)
|
||||
{
|
||||
x += 2;
|
||||
if (x == 0)
|
||||
x = 16;
|
||||
return x;
|
||||
}
|
||||
#endif /* __X86_64__ */
|
||||
|
||||
short foos_m1(short x)
|
||||
{
|
||||
x--;
|
||||
if (x == 0)
|
||||
x = 16;
|
||||
return x;
|
||||
}
|
||||
|
||||
short foos_m2(short x)
|
||||
{
|
||||
x -= 2;
|
||||
if (x == 0)
|
||||
x = 16;
|
||||
return x;
|
||||
}
|
||||
|
||||
short foos_p1(short x)
|
||||
{
|
||||
x++;
|
||||
if (x == 0)
|
||||
x = 16;
|
||||
return x;
|
||||
}
|
||||
|
||||
short foos_p2(short x)
|
||||
{
|
||||
x += 2;
|
||||
if (x == 0)
|
||||
x = 16;
|
||||
return x;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-not "mov(l|q)\[ \\t\]*%(e|r)(cx|di), %(e|r)ax" } } */
|
||||
|
57
gcc/testsuite/gcc.target/i386/dec-cmov-2.c
Normal file
57
gcc/testsuite/gcc.target/i386/dec-cmov-2.c
Normal file
|
@ -0,0 +1,57 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2" } */
|
||||
/* { dg-additional-options "-march=pentiumpro -mregparm=3" { target ia32 } } */
|
||||
|
||||
int foo(int x)
|
||||
{
|
||||
x--;
|
||||
if (x == -1)
|
||||
x = 16;
|
||||
return x;
|
||||
}
|
||||
|
||||
int bar(int x)
|
||||
{
|
||||
if (x == 0)
|
||||
x = 16;
|
||||
else x--;
|
||||
return x;
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
long long fool(long long x)
|
||||
{
|
||||
x--;
|
||||
if (x == -1)
|
||||
x = 16;
|
||||
return x;
|
||||
}
|
||||
|
||||
long long barl(long long x)
|
||||
{
|
||||
if (x == 0)
|
||||
x = 16;
|
||||
else x--;
|
||||
return x;
|
||||
}
|
||||
#endif
|
||||
|
||||
short foos(short x)
|
||||
{
|
||||
x--;
|
||||
if (x == -1)
|
||||
x = 16;
|
||||
return x;
|
||||
}
|
||||
|
||||
short bars(short x)
|
||||
{
|
||||
if (x == 0)
|
||||
x = 16;
|
||||
else x--;
|
||||
return x;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-not "lea(l|q)" } } */
|
||||
/* { dg-final { scan-assembler-not "test(l|q|w)" } } */
|
||||
|
Loading…
Add table
Reference in a new issue