Fix PR 110042: ifcvt regression due to paradoxical subregs
After r14-1014-gc5df248509b489364c573e8, GCC started to emit directly a zero_extract for `(t1&0x8)!=0`. This introduced a small regression where ifcvt would not do the ifconversion as there is now a paradoxical subreg in the dest which was being rejected. Since paradoxical subreg set the whole register, we can treat it as the same as a reg in the two places. OK? Bootstrapped and tested on x86_64-linux-gnu and aarch64-linux-gnu. gcc/ChangeLog: PR rtl-optimization/110042 * ifcvt.cc (bbs_ok_for_cmove_arith): Allow paradoxical subregs. (bb_valid_for_noce_process_p): Strip the subreg for the SET_DEST. gcc/testsuite/ChangeLog: PR rtl-optimization/110042 * gcc.target/aarch64/csel_bfx_2.c: New test.
This commit is contained in:
parent
84d080a29a
commit
df0853d72d
2 changed files with 36 additions and 5 deletions
14
gcc/ifcvt.cc
14
gcc/ifcvt.cc
|
@ -2022,7 +2022,7 @@ bbs_ok_for_cmove_arith (basic_block bb_a, basic_block bb_b, rtx to_rename)
|
|||
}
|
||||
|
||||
/* Make sure this is a REG and not some instance
|
||||
of ZERO_EXTRACT or SUBREG or other dangerous stuff.
|
||||
of ZERO_EXTRACT or non-paradoxical SUBREG or other dangerous stuff.
|
||||
If we have a memory destination then we have a pair of simple
|
||||
basic blocks performing an operation of the form [addr] = c ? a : b.
|
||||
bb_valid_for_noce_process_p will have ensured that these are
|
||||
|
@ -2030,7 +2030,8 @@ bbs_ok_for_cmove_arith (basic_block bb_a, basic_block bb_b, rtx to_rename)
|
|||
to be renamed. Assert that the callers set this up properly. */
|
||||
if (MEM_P (SET_DEST (sset_b)))
|
||||
gcc_assert (rtx_equal_p (SET_DEST (sset_b), to_rename));
|
||||
else if (!REG_P (SET_DEST (sset_b)))
|
||||
else if (!REG_P (SET_DEST (sset_b))
|
||||
&& !paradoxical_subreg_p (SET_DEST (sset_b)))
|
||||
{
|
||||
BITMAP_FREE (bba_sets);
|
||||
return false;
|
||||
|
@ -3136,14 +3137,17 @@ bb_valid_for_noce_process_p (basic_block test_bb, rtx cond,
|
|||
|
||||
rtx sset = single_set (insn);
|
||||
gcc_assert (sset);
|
||||
rtx dest = SET_DEST (sset);
|
||||
if (SUBREG_P (dest))
|
||||
dest = SUBREG_REG (dest);
|
||||
|
||||
if (contains_mem_rtx_p (SET_SRC (sset))
|
||||
|| !REG_P (SET_DEST (sset))
|
||||
|| reg_overlap_mentioned_p (SET_DEST (sset), cond))
|
||||
|| !REG_P (dest)
|
||||
|| reg_overlap_mentioned_p (dest, cond))
|
||||
goto free_bitmap_and_fail;
|
||||
|
||||
potential_cost += pattern_cost (sset, speed_p);
|
||||
bitmap_set_bit (test_bb_temps, REGNO (SET_DEST (sset)));
|
||||
bitmap_set_bit (test_bb_temps, REGNO (dest));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
27
gcc/testsuite/gcc.target/aarch64/csel_bfx_2.c
Normal file
27
gcc/testsuite/gcc.target/aarch64/csel_bfx_2.c
Normal file
|
@ -0,0 +1,27 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2" } */
|
||||
unsigned
|
||||
f1(int t, int t1)
|
||||
{
|
||||
int tt = 0;
|
||||
if(t)
|
||||
tt = (t1&0x8)!=0;
|
||||
return tt;
|
||||
}
|
||||
struct f
|
||||
{
|
||||
unsigned t:3;
|
||||
unsigned t1:4;
|
||||
};
|
||||
unsigned
|
||||
f2(int t, struct f y)
|
||||
{
|
||||
int tt = 0;
|
||||
if(t)
|
||||
tt = y.t1;
|
||||
return tt;
|
||||
}
|
||||
/* Both f1 and f2 should produce a csel and not a cbz on the argument. */
|
||||
/* { dg-final { scan-assembler-times "csel\t" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "ubfx\t" 2 } } */
|
||||
/* { dg-final { scan-assembler-not "cbz\t" } } */
|
Loading…
Add table
Reference in a new issue