i386: Split lea into shorter left shift by 2 or 3 bits with -Oz.
This patch avoids long lea instructions for performing x<<2 and x<<3 by splitting them into shorter sal and move (or xchg instructions). Because this increases the number of instructions, but reduces the total size, its suitable for -Oz (but not -Os). The impact can be seen in the new test case: int foo(int x) { return x<<2; } int bar(int x) { return x<<3; } long long fool(long long x) { return x<<2; } long long barl(long long x) { return x<<3; } where with -O2 we generate: foo: lea 0x0(,%rdi,4),%eax // 7 bytes retq bar: lea 0x0(,%rdi,8),%eax // 7 bytes retq fool: lea 0x0(,%rdi,4),%rax // 8 bytes retq barl: lea 0x0(,%rdi,8),%rax // 8 bytes retq and with -Oz we now generate: foo: xchg %eax,%edi // 1 byte shl $0x2,%eax // 3 bytes retq bar: xchg %eax,%edi // 1 byte shl $0x3,%eax // 3 bytes retq fool: xchg %rax,%rdi // 2 bytes shl $0x2,%rax // 4 bytes retq barl: xchg %rax,%rdi // 2 bytes shl $0x3,%rax // 4 bytes retq Over the entirety of the CSiBE code size benchmark this saves 1347 bytes (0.037%) for x86_64, and 1312 bytes (0.036%) with -m32. Conveniently, there's already a backend function in i386.cc for deciding whether to split an lea into its component instructions, ix86_avoid_lea_for_addr, all that's required is an additional clause checking for -Oz (i.e. optimize_size > 1). 2023-10-06 Roger Sayle <roger@nextmovesoftware.com> Uros Bizjak <ubizjak@gmail.com> gcc/ChangeLog * config/i386/i386.cc (ix86_avoid_lea_for_addr): Split LEAs used to perform left shifts into shorter instructions with -Oz. gcc/testsuite/ChangeLog * gcc.target/i386/lea-2.c: New test case.
This commit is contained in:
parent
c1bc7513b1
commit
fa8c99c4a4
2 changed files with 14 additions and 0 deletions
|
@ -15543,6 +15543,13 @@ ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
|
|||
&& (regno0 == regno1 || regno0 == regno2))
|
||||
return true;
|
||||
|
||||
/* Split with -Oz if the encoding requires fewer bytes. */
|
||||
if (optimize_size > 1
|
||||
&& parts.scale > 1
|
||||
&& !parts.base
|
||||
&& (!parts.disp || parts.disp == const0_rtx))
|
||||
return true;
|
||||
|
||||
/* Check we need to optimize. */
|
||||
if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
|
||||
return false;
|
||||
|
|
7
gcc/testsuite/gcc.target/i386/lea-2.c
Normal file
7
gcc/testsuite/gcc.target/i386/lea-2.c
Normal file
|
@ -0,0 +1,7 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-Oz" } */
|
||||
int foo(int x) { return x<<2; }
|
||||
int bar(int x) { return x<<3; }
|
||||
long long fool(long long x) { return x<<2; }
|
||||
long long barl(long long x) { return x<<3; }
|
||||
/* { dg-final { scan-assembler-not "lea\[lq\]" } } */
|
Loading…
Add table
Reference in a new issue