Use add for a = a + b and a = b + a when possible.

Since except for Bonnell,

01 fb                   add    %edi,%ebx

is faster and shorter than

8d 1c 1f                lea    (%rdi,%rbx,1),%ebx

we should use add for a = a + b and a = b + a when possible if not
optimizing for Bonnell.

Tested on x86-64.

2019-12-17  H.J. Lu  <hjl.tools@gmail.com>

gcc/

	PR target/92807
	* config/i386/i386.c (ix86_lea_outperforms): Check !TARGET_BONNELL.
	(ix86_avoid_lea_for_addr): When not optimizing for Bonnell, use add
	for a = a + b and a = b + a.

gcc/testsuite/

	PR target/92807
	* gcc.target/i386/pr92807-1.c: New test.

From-SVN: r279451
This commit is contained in:
H.J. Lu 2019-12-16 17:29:09 -08:00 committed by Hongtao Liu
parent d187dab387
commit 62dd2904f0
4 changed files with 41 additions and 9 deletions

View file

@ -1,3 +1,10 @@
2019-12-17 H.J. Lu <hjl.tools@gmail.com>
PR target/92807
* config/i386/i386.c (ix86_lea_outperforms): Check !TARGET_BONNELL.
(ix86_avoid_lea_for_addr): When not optimizing for Bonnell, use add
for a = a + b and a = b + a.
2019-12-16 Martin Sebor <msebor@redhat.com>
PR middle-end/92952

View file

@ -14433,11 +14433,10 @@ ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
{
int dist_define, dist_use;
/* For Silvermont if using a 2-source or 3-source LEA for
non-destructive destination purposes, or due to wanting
ability to use SCALE, the use of LEA is justified. */
if (TARGET_SILVERMONT || TARGET_GOLDMONT || TARGET_GOLDMONT_PLUS
|| TARGET_TREMONT || TARGET_INTEL)
/* For Atom processors newer than Bonnell, if using a 2-source or
3-source LEA for non-destructive destination purposes, or due to
wanting ability to use SCALE, the use of LEA is justified. */
if (!TARGET_BONNELL)
{
if (has_scale)
return true;
@ -14572,10 +14571,6 @@ ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
struct ix86_address parts;
int ok;
/* Check we need to optimize. */
if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
return false;
/* The "at least two components" test below might not catch simple
move or zero extension insns if parts.base is non-NULL and parts.disp
is const0_rtx as the only components in the address, e.g. if the
@ -14612,6 +14607,20 @@ ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
if (parts.index)
regno2 = true_regnum (parts.index);
/* Use add for a = a + b and a = b + a since it is faster and shorter
than lea for most processors. For the processors like BONNELL, if
the destination register of LEA holds an actual address which will
be used soon, LEA is better and otherwise ADD is better. */
if (!TARGET_BONNELL
&& parts.scale == 1
&& (!parts.disp || parts.disp == const0_rtx)
&& (regno0 == regno1 || regno0 == regno2))
return true;
/* Check we need to optimize. */
if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
return false;
split_cost = 0;
/* Compute how many cycles we will add to execution time

View file

@ -1,3 +1,8 @@
2019-12-17 H.J. Lu <hjl.tools@gmail.com>
PR target/92807
* gcc.target/i386/pr92807-1.c: New test.
2019-12-16 Jozef Lawrynowicz <jozef.l@mittosystems.com>
* g++.dg/init/dso_handle1.C: Require cxa_atexit support.

View file

@ -0,0 +1,11 @@
/* { dg-do compile } */
/* { dg-options "-O2" } */
unsigned int
abs2 (unsigned int a)
{
unsigned int s = ((a>>15)&0x10001)*0xffff;
return (a+s)^s;
}
/* { dg-final { scan-assembler-not "leal" } } */