Decrement followed by cmov improvements.

The following patch to the x86_64 backend improves the code generated
for a decrement followed by a conditional move.  The primary change is
to recognize that after subtracting one, checking the result is -1 (or
equivalently that the original value was zero) can be implemented using
the borrow/carry flag instead of requiring an explicit test instruction.
This is achieved by a new define_insn_and_split that allows combine to
split the desired sequence/composite into a *subsi_3 and *movsicc_noc.

The other change with this patch is/are a pair of peephole2 optimizations
to eliminate register-to-register moves generated during register
allocation.  During reload, the compiler doesn't know that inverting
the condition of a conditional cmove can sometimes reduce register
pressure, but this is easy to tidy up during the peephole2 pass (where
swapping the order of the insn's operands performs the required
logic inversion).

Both improvements are demonstrated by the case below:

int foo(int x) {
  if (x == 0)
    x = 16;
  else x--;
  return x;
}

Before:
foo:	leal    -1(%rdi), %eax
        testl   %edi, %edi
        movl    $16, %edx
        cmove   %edx, %eax
        ret

After:
foo:	subl    $1, %edi
        movl    $16, %eax
        cmovnc  %edi, %eax
        ret

And the value of the peephole2 clean-up can be seen on its own in:

int bar(int x) {
  x--;
  if (x == 0)
    x = 16;
  return x;
}

Before:
bar:	movl    %edi, %eax
        movl    $16, %edx
        subl    $1, %eax
        cmove   %edx, %eax
        ret

After:
bar:	subl    $1, %edi
        movl    $16, %eax
        cmovne  %edi, %eax
        ret

These idioms were inspired by the source code of NIST SciMark4's
Random_nextDouble function, where the tweaks above result in
a ~1% improvement in the MonteCarlo benchmark kernel.

2021-07-30  Roger Sayle  <roger@nextmovesoftware.com>
	    Uroš Bizjak  <ubizjak@gmail.com>

gcc/ChangeLog
	* config/i386/i386.md (*dec_cmov<mode>): New define_insn_and_split
	to generate a conditional move using the carry flag after sub $1.
	(peephole2): Eliminate a register-to-register move by inverting
	the condition of a conditional move.

gcc/testsuite/ChangeLog
	* gcc.target/i386/dec-cmov-1.c: New test.
	* gcc.target/i386/dec-cmov-2.c: New test.
This commit is contained in:
Roger Sayle 2021-07-30 22:46:32 +01:00
parent 5b2515f5ae
commit f7bf03cf69
3 changed files with 249 additions and 0 deletions

View file

@ -6756,6 +6756,29 @@
? GEU : LTU, VOIDmode, cc, const0_rtx);
})
;; Help combine use borrow flag to test for -1 after dec (add $-1).
(define_insn_and_split "*dec_cmov<mode>"
[(set (match_operand:SWI248 0 "register_operand" "=r")
(if_then_else:SWI248
(match_operator 1 "bt_comparison_operator"
[(match_operand:SWI248 2 "register_operand" "0") (const_int 0)])
(plus:SWI248 (match_dup 2) (const_int -1))
(match_operand:SWI248 3 "nonimmediate_operand" "rm")))
(clobber (reg:CC FLAGS_REG))]
"TARGET_CMOVE"
"#"
"&& reload_completed"
[(parallel [(set (reg:CC FLAGS_REG)
(compare:CC (match_dup 2) (const_int 1)))
(set (match_dup 0) (minus:SWI248 (match_dup 2) (const_int 1)))])
(set (match_dup 0)
(if_then_else:SWI248 (match_dup 4) (match_dup 0) (match_dup 3)))]
{
rtx cc = gen_rtx_REG (CCCmode, FLAGS_REG);
operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[1]) == NE
? GEU : LTU, VOIDmode, cc, const0_rtx);
})
(define_insn "*subsi_3_zext"
[(set (reg FLAGS_REG)
(compare (match_operand:SI 1 "register_operand" "0")
@ -19182,6 +19205,70 @@
gcc_unreachable ();
})
;; Eliminate a reg-reg mov by inverting the condition of a cmov (#1).
;; mov r0,r1; dec r0; mov r2,r3; cmov r0,r2 -> dec r1; mov r0,r3; cmov r0, r1
(define_peephole2
[(set (match_operand:SWI248 0 "register_operand")
(match_operand:SWI248 1 "register_operand"))
(parallel [(set (reg FLAGS_REG) (match_operand 5))
(set (match_dup 0) (match_operand:SWI248 6))])
(set (match_operand:SWI248 2 "register_operand")
(match_operand:SWI248 3))
(set (match_dup 0)
(if_then_else:SWI248 (match_operator 4 "ix86_comparison_operator"
[(reg FLAGS_REG) (const_int 0)])
(match_dup 0)
(match_dup 2)))]
"TARGET_CMOVE
&& REGNO (operands[2]) != REGNO (operands[0])
&& REGNO (operands[2]) != REGNO (operands[1])
&& peep2_reg_dead_p (1, operands[1])
&& peep2_reg_dead_p (4, operands[2])
&& !reg_overlap_mentioned_p (operands[0], operands[3])"
[(parallel [(set (match_dup 7) (match_dup 8))
(set (match_dup 1) (match_dup 9))])
(set (match_dup 0) (match_dup 3))
(set (match_dup 0) (if_then_else:SWI248 (match_dup 4)
(match_dup 1)
(match_dup 0)))]
{
operands[7] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (1)), 0, 0));
operands[8] = replace_rtx (operands[5], operands[0], operands[1]);
operands[9] = replace_rtx (operands[6], operands[0], operands[1]);
})
;; Eliminate a reg-reg mov by inverting the condition of a cmov (#2).
;; mov r2,r3; mov r0,r1; dec r0; cmov r0,r2 -> dec r1; mov r0,r3; cmov r0, r1
(define_peephole2
[(set (match_operand:SWI248 2 "register_operand")
(match_operand:SWI248 3))
(set (match_operand:SWI248 0 "register_operand")
(match_operand:SWI248 1 "register_operand"))
(parallel [(set (reg FLAGS_REG) (match_operand 5))
(set (match_dup 0) (match_operand:SWI248 6))])
(set (match_dup 0)
(if_then_else:SWI248 (match_operator 4 "ix86_comparison_operator"
[(reg FLAGS_REG) (const_int 0)])
(match_dup 0)
(match_dup 2)))]
"TARGET_CMOVE
&& REGNO (operands[2]) != REGNO (operands[0])
&& REGNO (operands[2]) != REGNO (operands[1])
&& peep2_reg_dead_p (2, operands[1])
&& peep2_reg_dead_p (4, operands[2])
&& !reg_overlap_mentioned_p (operands[0], operands[3])"
[(parallel [(set (match_dup 7) (match_dup 8))
(set (match_dup 1) (match_dup 9))])
(set (match_dup 0) (match_dup 3))
(set (match_dup 0) (if_then_else:SWI248 (match_dup 4)
(match_dup 1)
(match_dup 0)))]
{
operands[7] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (2)), 0, 0));
operands[8] = replace_rtx (operands[5], operands[0], operands[1]);
operands[9] = replace_rtx (operands[6], operands[0], operands[1]);
})
(define_expand "mov<mode>cc"
[(set (match_operand:X87MODEF 0 "register_operand")
(if_then_else:X87MODEF

View file

@ -0,0 +1,105 @@
/* { dg-do compile } */
/* { dg-options "-O2" } */
/* { dg-additional-options "-march=pentiumpro -mregparm=3" { target ia32 } } */
int foo_m1(int x)
{
x--;
if (x == 0)
x = 16;
return x;
}
int foo_m2(int x)
{
x -= 2;
if (x == 0)
x = 16;
return x;
}
int foo_p1(int x)
{
x++;
if (x == 0)
x = 16;
return x;
}
int foo_p2(int x)
{
x += 2;
if (x == 0)
x = 16;
return x;
}
#ifdef __x86_64__
long long fool_m1(long long x)
{
x--;
if (x == 0)
x = 16;
return x;
}
long long fool_m2(long long x)
{
x -= 2;
if (x == 0)
x = 16;
return x;
}
long long fool_p1(long long x)
{
x++;
if (x == 0)
x = 16;
return x;
}
long long fool_p2(long long x)
{
x += 2;
if (x == 0)
x = 16;
return x;
}
#endif /* __X86_64__ */
short foos_m1(short x)
{
x--;
if (x == 0)
x = 16;
return x;
}
short foos_m2(short x)
{
x -= 2;
if (x == 0)
x = 16;
return x;
}
short foos_p1(short x)
{
x++;
if (x == 0)
x = 16;
return x;
}
short foos_p2(short x)
{
x += 2;
if (x == 0)
x = 16;
return x;
}
/* { dg-final { scan-assembler-not "mov(l|q)\[ \\t\]*%(e|r)(cx|di), %(e|r)ax" } } */

View file

@ -0,0 +1,57 @@
/* { dg-do compile } */
/* { dg-options "-O2" } */
/* { dg-additional-options "-march=pentiumpro -mregparm=3" { target ia32 } } */
int foo(int x)
{
x--;
if (x == -1)
x = 16;
return x;
}
int bar(int x)
{
if (x == 0)
x = 16;
else x--;
return x;
}
#ifdef __x86_64__
long long fool(long long x)
{
x--;
if (x == -1)
x = 16;
return x;
}
long long barl(long long x)
{
if (x == 0)
x = 16;
else x--;
return x;
}
#endif
short foos(short x)
{
x--;
if (x == -1)
x = 16;
return x;
}
short bars(short x)
{
if (x == 0)
x = 16;
else x--;
return x;
}
/* { dg-final { scan-assembler-not "lea(l|q)" } } */
/* { dg-final { scan-assembler-not "test(l|q|w)" } } */