PR target/110598: Fix rega = 0; rega ^= rega regression in i386.md
This patch fixes the regression PR target/110598 caused by my recent addition of a peephole2. The intention of that optimization was to simplify zeroing a register, followed by an IOR, XOR or PLUS operation on it into a move, or as described in the comment: ;; Peephole2 rega = 0; rega op= regb into rega = regb. The issue is that I'd failed to consider the (rare and unusual) case, where regb is rega, where the transformation leads to the incorrect "rega = rega", when it should be "rega = 0". The minimal fix is to add a !reg_mentioned_p check to the recent peephole2. In addition to resolving the regression, I've added a second peephole2 to optimize the problematic case above, which contains a false dependency and is therefore tricky to optimize elsewhere. This is an improvement over GCC 13, for example, that generates the redundant: xorl %edx, %edx xorq %rdx, %rdx 2023-07-12 Roger Sayle <roger@nextmovesoftware.com> gcc/ChangeLog PR target/110598 * config/i386/i386.md (peephole2): Check !reg_mentioned_p when optimizing rega = 0; rega op= regb for op in [XOR,IOR,PLUS]. (peephole2): Simplify rega = 0; rega op= rega cases. gcc/testsuite/ChangeLog PR target/110598 * gcc.target/i386/pr110598.c: New test case.
This commit is contained in:
parent
46ade8c9cc
commit
d2c18b4a16
2 changed files with 60 additions and 2 deletions
|
@ -12325,9 +12325,21 @@
|
|||
(any_or_plus:SWI (match_dup 0)
|
||||
(match_operand:SWI 1 "<general_operand>")))
|
||||
(clobber (reg:CC FLAGS_REG))])]
|
||||
""
|
||||
"!reg_mentioned_p (operands[0], operands[1])"
|
||||
[(set (match_dup 0) (match_dup 1))])
|
||||
|
||||
|
||||
;; Peephole2 dead instruction in rega = 0; rega op= rega.
|
||||
(define_peephole2
|
||||
[(parallel [(set (match_operand:SWI 0 "general_reg_operand")
|
||||
(const_int 0))
|
||||
(clobber (reg:CC FLAGS_REG))])
|
||||
(parallel [(set (match_dup 0)
|
||||
(any_or_plus:SWI (match_dup 0) (match_dup 0)))
|
||||
(clobber (reg:CC FLAGS_REG))])]
|
||||
""
|
||||
[(parallel [(set (match_dup 0) (const_int 0))
|
||||
(clobber (reg:CC FLAGS_REG))])])
|
||||
|
||||
;; Split DST = (HI<<32)|LO early to minimize register usage.
|
||||
(define_insn_and_split "*concat<mode><dwi>3_1"
|
||||
[(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
|
||||
|
|
46
gcc/testsuite/gcc.target/i386/pr110598.c
Normal file
46
gcc/testsuite/gcc.target/i386/pr110598.c
Normal file
|
@ -0,0 +1,46 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-options "-O2" } */
|
||||
|
||||
typedef unsigned long long u64;
|
||||
|
||||
#define MAX_SUBTARGET_WORDS 4
|
||||
|
||||
int notequal(const void *a, const void *b)
|
||||
{
|
||||
return __builtin_memcmp(a,b,MAX_SUBTARGET_WORDS*sizeof(u64)) != 0;
|
||||
}
|
||||
typedef struct FeatureBitset {
|
||||
u64 Bits[MAX_SUBTARGET_WORDS];
|
||||
}FeatureBitset;
|
||||
|
||||
__attribute__((noipa))
|
||||
_Bool is_eq_buggy (const FeatureBitset * lf, const FeatureBitset * rf) {
|
||||
u64 Bits_l[MAX_SUBTARGET_WORDS];
|
||||
Bits_l[0] = lf->Bits[0]&1;
|
||||
Bits_l[1] = 0;
|
||||
Bits_l[2] = 0;
|
||||
Bits_l[3] = 0;
|
||||
u64 Bits_r[MAX_SUBTARGET_WORDS];
|
||||
Bits_r[0] = rf->Bits[0]&1;
|
||||
Bits_r[1] = 0;
|
||||
Bits_r[2] = 0;
|
||||
Bits_r[3] = 0;
|
||||
return !notequal(Bits_l, Bits_r);
|
||||
}
|
||||
|
||||
__attribute__((noipa))
|
||||
void bug(void) {
|
||||
FeatureBitset lf, rf;
|
||||
lf.Bits[0] = rf.Bits[0] = 1;
|
||||
lf.Bits[1] = rf.Bits[1] = 1;
|
||||
lf.Bits[2] = rf.Bits[2] = 1;
|
||||
lf.Bits[3] = rf.Bits[3] = 1;
|
||||
|
||||
_Bool r = is_eq_buggy (&lf, &rf);
|
||||
if (!r) __builtin_trap();
|
||||
}
|
||||
|
||||
__attribute__((noipa))
|
||||
int main(void) {
|
||||
bug();
|
||||
}
|
Loading…
Add table
Reference in a new issue