expand: Allow widdening optab when expanding popcount==1 [PR116508]
After adding popcount{qi,hi}2 to the aarch64 backend, I noticed that the expansion for popcount==1 was no longer trying to do the trick of handling popcount==1 as `(arg ^ (arg - 1)) > arg - 1`. The problem is the expansion was using OPTAB_DIRECT, when using OPTAB_WIDEN will allow modes which are smaller than SImode (in the aarch64 case). Note QImode's cost still needs some improvements so part of popcnt-eq-1.c is xfailed. Though there is a check to make sure the costs are compared now. Built and tested on aarch64-linux-gnu. PR middle-end/116508 gcc/ChangeLog: * internal-fn.cc (expand_POPCOUNT): Use OPTAB_WIDEN for PLUS and XOR/AND expansion. gcc/testsuite/ChangeLog: * gcc.target/aarch64/popcnt-eq-1.c: New test. Signed-off-by: Andrew Pinski <quic_apinski@quicinc.com>
This commit is contained in:
parent
cdd5dd2125
commit
215c7e3084
2 changed files with 47 additions and 2 deletions
|
@ -5332,11 +5332,11 @@ expand_POPCOUNT (internal_fn fn, gcall *stmt)
|
|||
start_sequence ();
|
||||
rtx op0 = expand_normal (arg);
|
||||
rtx argm1 = expand_simple_binop (mode, PLUS, op0, constm1_rtx, NULL_RTX,
|
||||
1, OPTAB_DIRECT);
|
||||
1, OPTAB_WIDEN);
|
||||
if (argm1 == NULL_RTX)
|
||||
goto fail;
|
||||
rtx argxorargm1 = expand_simple_binop (mode, nonzero_arg ? AND : XOR, op0,
|
||||
argm1, NULL_RTX, 1, OPTAB_DIRECT);
|
||||
argm1, NULL_RTX, 1, OPTAB_WIDEN);
|
||||
if (argxorargm1 == NULL_RTX)
|
||||
goto fail;
|
||||
rtx cmp;
|
||||
|
|
45
gcc/testsuite/gcc.target/aarch64/popcnt-eq-1.c
Normal file
45
gcc/testsuite/gcc.target/aarch64/popcnt-eq-1.c
Normal file
|
@ -0,0 +1,45 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -fdump-rtl-expand-details" } */
|
||||
/* { dg-final { check-function-bodies "**" "" } } */
|
||||
/* PR middle-end/116508 */
|
||||
|
||||
#pragma GCC target "+nocssc"
|
||||
|
||||
/*
|
||||
** h16:
|
||||
** sub w([0-9]+), w0, #1
|
||||
** eor w([0-9]+), w0, w\1
|
||||
** and w([0-9]+), w\1, 65535
|
||||
** cmp w\3, w\2, uxth
|
||||
** cset w0, cc
|
||||
** ret
|
||||
*/
|
||||
|
||||
/* when expanding popcount == 1, should use
|
||||
`(arg ^ (arg - 1)) > arg - 1` as that has a lower latency
|
||||
than doing the popcount then comparing against 1.
|
||||
The popcount/addv can be costly. */
|
||||
unsigned h16 (const unsigned short a) {
|
||||
return __builtin_popcountg (a) == 1;
|
||||
}
|
||||
|
||||
/* unsigned char should also do the same trick */
|
||||
/* Currently xfailed since the cost does not take into account the
|
||||
moving between gprs and vector regs correctly. */
|
||||
/*
|
||||
** h8: { xfail *-*-* }
|
||||
** sub w([0-9]+), w0, #1
|
||||
** eor w([0-9]+), w0, w\1
|
||||
** and w([0-9]+), w\1, 255
|
||||
** cmp w\3, w\2, uxtb
|
||||
** cset w0, cc
|
||||
** ret
|
||||
*/
|
||||
|
||||
|
||||
unsigned h8 (const unsigned char a) {
|
||||
return __builtin_popcountg (a) == 1;
|
||||
}
|
||||
|
||||
/* There should be printing out the costs for h8 and h16's popcount == 1 */
|
||||
/* { dg-final { scan-rtl-dump-times "popcount == 1:" 2 "expand"} } */
|
Loading…
Add table
Reference in a new issue