Use add for a = a + b and a = b + a when possible.
Since except for Bonnell, 01 fb add %edi,%ebx is faster and shorter than 8d 1c 1f lea (%rdi,%rbx,1),%ebx we should use add for a = a + b and a = b + a when possible if not optimizing for Bonnell. Tested on x86-64. 2019-12-17 H.J. Lu <hjl.tools@gmail.com> gcc/ PR target/92807 * config/i386/i386.c (ix86_lea_outperforms): Check !TARGET_BONNELL. (ix86_avoid_lea_for_addr): When not optimizing for Bonnell, use add for a = a + b and a = b + a. gcc/testsuite/ PR target/92807 * gcc.target/i386/pr92807-1.c: New test. From-SVN: r279451
This commit is contained in:
parent
d187dab387
commit
62dd2904f0
4 changed files with 41 additions and 9 deletions
|
@ -1,3 +1,10 @@
|
|||
2019-12-17 H.J. Lu <hjl.tools@gmail.com>
|
||||
|
||||
PR target/92807
|
||||
* config/i386/i386.c (ix86_lea_outperforms): Check !TARGET_BONNELL.
|
||||
(ix86_avoid_lea_for_addr): When not optimizing for Bonnell, use add
|
||||
for a = a + b and a = b + a.
|
||||
|
||||
2019-12-16 Martin Sebor <msebor@redhat.com>
|
||||
|
||||
PR middle-end/92952
|
||||
|
|
|
@ -14433,11 +14433,10 @@ ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
|
|||
{
|
||||
int dist_define, dist_use;
|
||||
|
||||
/* For Silvermont if using a 2-source or 3-source LEA for
|
||||
non-destructive destination purposes, or due to wanting
|
||||
ability to use SCALE, the use of LEA is justified. */
|
||||
if (TARGET_SILVERMONT || TARGET_GOLDMONT || TARGET_GOLDMONT_PLUS
|
||||
|| TARGET_TREMONT || TARGET_INTEL)
|
||||
/* For Atom processors newer than Bonnell, if using a 2-source or
|
||||
3-source LEA for non-destructive destination purposes, or due to
|
||||
wanting ability to use SCALE, the use of LEA is justified. */
|
||||
if (!TARGET_BONNELL)
|
||||
{
|
||||
if (has_scale)
|
||||
return true;
|
||||
|
@ -14572,10 +14571,6 @@ ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
|
|||
struct ix86_address parts;
|
||||
int ok;
|
||||
|
||||
/* Check we need to optimize. */
|
||||
if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
|
||||
return false;
|
||||
|
||||
/* The "at least two components" test below might not catch simple
|
||||
move or zero extension insns if parts.base is non-NULL and parts.disp
|
||||
is const0_rtx as the only components in the address, e.g. if the
|
||||
|
@ -14612,6 +14607,20 @@ ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
|
|||
if (parts.index)
|
||||
regno2 = true_regnum (parts.index);
|
||||
|
||||
/* Use add for a = a + b and a = b + a since it is faster and shorter
|
||||
than lea for most processors. For the processors like BONNELL, if
|
||||
the destination register of LEA holds an actual address which will
|
||||
be used soon, LEA is better and otherwise ADD is better. */
|
||||
if (!TARGET_BONNELL
|
||||
&& parts.scale == 1
|
||||
&& (!parts.disp || parts.disp == const0_rtx)
|
||||
&& (regno0 == regno1 || regno0 == regno2))
|
||||
return true;
|
||||
|
||||
/* Check we need to optimize. */
|
||||
if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
|
||||
return false;
|
||||
|
||||
split_cost = 0;
|
||||
|
||||
/* Compute how many cycles we will add to execution time
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
2019-12-17 H.J. Lu <hjl.tools@gmail.com>
|
||||
|
||||
PR target/92807
|
||||
* gcc.target/i386/pr92807-1.c: New test.
|
||||
|
||||
2019-12-16 Jozef Lawrynowicz <jozef.l@mittosystems.com>
|
||||
|
||||
* g++.dg/init/dso_handle1.C: Require cxa_atexit support.
|
||||
|
|
11
gcc/testsuite/gcc.target/i386/pr92807-1.c
Normal file
11
gcc/testsuite/gcc.target/i386/pr92807-1.c
Normal file
|
@ -0,0 +1,11 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2" } */
|
||||
|
||||
unsigned int
|
||||
abs2 (unsigned int a)
|
||||
{
|
||||
unsigned int s = ((a>>15)&0x10001)*0xffff;
|
||||
return (a+s)^s;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-not "leal" } } */
|
Loading…
Add table
Reference in a new issue