amdgcn: Add instruction pattern for conditional shift operations

gcc/ChangeLog:

	* config/gcn/gcn-valu.md (cond_<expander><mode>): Add
	cond_{ashl|ashr|lshr}

gcc/testsuite/ChangeLog:

	* gcc.target/gcn/cond_shift_3.c: New test.
	* gcc.target/gcn/cond_shift_3_run.c: New test.
	* gcc.target/gcn/cond_shift_4.c: New test.
	* gcc.target/gcn/cond_shift_4_run.c: New test.
	* gcc.target/gcn/cond_shift_8.c: New test.
	* gcc.target/gcn/cond_shift_8_run.c: New test.
	* gcc.target/gcn/cond_shift_9.c: New test.
	* gcc.target/gcn/cond_shift_9_run.c: New test.
This commit is contained in:
Paul-Antoine Arras 2023-02-01 16:13:23 +01:00
parent cd41085a37
commit 9c7e898bbd
9 changed files with 279 additions and 0 deletions

View file

@ -3489,6 +3489,29 @@
DONE;
})
(define_code_iterator cond_shiftop [ashift lshiftrt ashiftrt])
(define_expand "cond_<expander><mode>"
[(match_operand:V_INT_noHI 0 "register_operand")
(match_operand:DI 1 "register_operand")
(cond_shiftop:V_INT_noHI
(match_operand:V_INT_noHI 2 "gcn_alu_operand")
(match_operand:V_INT_noHI 3 "gcn_alu_operand"))
(match_operand:V_INT_noHI 4 "register_operand")]
""
{
operands[1] = force_reg (DImode, operands[1]);
operands[2] = force_reg (<MODE>mode, operands[2]);
rtx shiftby = gen_reg_rtx (<VnSI>mode);
convert_move (shiftby, operands[3], 0);
emit_insn (gen_v<expander><mode>3_exec (operands[0], operands[2],
shiftby, operands[4],
operands[1]));
DONE;
})
;; }}}
;; {{{ Vector reductions

View file

@ -0,0 +1,37 @@
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize -dp" } */
#include <stdint.h>
#define DEF_LOOP(TYPE, NAME, OP) \
void __attribute__ ((noipa)) \
test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a, \
TYPE *__restrict b, int n) \
{ \
for (int i = 0; i < n; ++i) \
r[i] = a[i] > 20 ? b[i] OP 3 : 72; \
}
#define TEST_TYPE(T, TYPE) \
T (TYPE, shl, <<) \
T (TYPE, shr, >>)
#define TEST_ALL(T) \
TEST_TYPE (T, int32_t) \
TEST_TYPE (T, uint32_t) \
TEST_TYPE (T, int64_t) \
TEST_TYPE (T, uint64_t)
TEST_ALL (DEF_LOOP)
/* { dg-final { scan-assembler-times {\tv_lshlrev_b32\tv[0-9]+, 3, v[0-9]+} 10 } } */
/* { dg-final { scan-assembler-times {\tv_ashrrev_i32\tv[0-9]+, 3, v[0-9]+} 1 } } */
/* { dg-final { scan-assembler-times {vashlv64si3_exec} 18 } } */
/* { dg-final { scan-assembler-times {vashrv64si3_exec} 1 } } */
/* { dg-final { scan-assembler-times {vashlv64di3_exec} 2 } } */
/* { dg-final { scan-assembler-times {vashrv64di3_exec} 1 } } */
/* { dg-final { scan-assembler-times {vlshrv64si3_exec} 1 } } */
/* { dg-final { scan-assembler-times {vlshrv64di3_exec} 1 } } */
/* { dg-final { scan-assembler-not {v_cndmask_b32} } } */
/* { dg-final { scan-assembler-not {movv64di_exec/2} } } */

View file

@ -0,0 +1,27 @@
/* { dg-do run } */
/* { dg-options "-O2 -ftree-vectorize" } */
#include "cond_shift_3.c"
#define N 99
#define TEST_LOOP(TYPE, NAME, OP) \
{ \
TYPE r[N], a[N], b[N]; \
for (int i = 0; i < N; ++i) \
{ \
a[i] = (i & 1 ? i : 3 * i); \
b[i] = (i >> 4) << (i & 15); \
asm volatile ("" ::: "memory"); \
} \
test_##TYPE##_##NAME (r, a, b, N); \
for (int i = 0; i < N; ++i) \
if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP 3 : 72)) \
__builtin_abort (); \
}
int main ()
{
TEST_ALL (TEST_LOOP)
return 0;
}

View file

@ -0,0 +1,38 @@
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize -dp" } */
#include <stdint.h>
#define DEF_LOOP(TYPE, NAME, OP) \
void __attribute__ ((noipa)) \
test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a, \
TYPE *__restrict b, int n) \
{ \
for (int i = 0; i < n; ++i) \
r[i] = a[i] > 20 ? b[i] OP 3 : 0; \
}
#define TEST_TYPE(T, TYPE) \
T (TYPE, shl, <<) \
T (TYPE, shr, >>)
#define TEST_ALL(T) \
TEST_TYPE (T, int32_t) \
TEST_TYPE (T, uint32_t) \
TEST_TYPE (T, int64_t) \
TEST_TYPE (T, uint64_t)
TEST_ALL (DEF_LOOP)
/* { dg-final { scan-assembler-times {\tv_lshlrev_b32\tv[0-9]+, 3, v[0-9]+} 10 } } */
/* { dg-final { scan-assembler-times {\tv_ashrrev_i32\tv[0-9]+, 3, v[0-9]+} 1 } } */
/* { dg-final { scan-assembler-times {vashlv64si3_exec} 18 } } */
/* { dg-final { scan-assembler-times {vashrv64si3_exec} 1 } } */
/* { dg-final { scan-assembler-times {vashlv64di3_exec} 2 } } */
/* { dg-final { scan-assembler-times {vashrv64di3_exec} 1 } } */
/* { dg-final { scan-assembler-times {vlshrv64si3_exec} 1 } } */
/* { dg-final { scan-assembler-times {vlshrv64di3_exec} 1 } } */
/* { dg-final { scan-assembler-not {v_cndmask_b32} } } */
/* { dg-final { scan-assembler-not {movv64di_exec/2} } } */

View file

@ -0,0 +1,27 @@
/* { dg-do run } */
/* { dg-options "-O2 -ftree-vectorize" } */
#include "cond_shift_4.c"
#define N 99
#define TEST_LOOP(TYPE, NAME, OP) \
{ \
TYPE r[N], a[N], b[N]; \
for (int i = 0; i < N; ++i) \
{ \
a[i] = (i & 1 ? i : 3 * i); \
b[i] = (i >> 4) << (i & 15); \
asm volatile ("" ::: "memory"); \
} \
test_##TYPE##_##NAME (r, a, b, N); \
for (int i = 0; i < N; ++i) \
if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP 3 : 0)) \
__builtin_abort (); \
}
int main ()
{
TEST_ALL (TEST_LOOP)
return 0;
}

View file

@ -0,0 +1,35 @@
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize -dp" } */
#include <stdint.h>
#define DEF_LOOP(TYPE, NAME, OP) \
void __attribute__ ((noipa)) \
test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a, \
TYPE *__restrict b, TYPE *__restrict c, int n) \
{ \
for (int i = 0; i < n; ++i) \
r[i] = a[i] > 20 ? b[i] OP c[i] : 91; \
}
#define TEST_TYPE(T, TYPE) \
T (TYPE, shl, <<) \
T (TYPE, shr, >>)
#define TEST_ALL(T) \
TEST_TYPE (T, int32_t) \
TEST_TYPE (T, uint32_t) \
TEST_TYPE (T, int64_t) \
TEST_TYPE (T, uint64_t)
TEST_ALL (DEF_LOOP)
/* { dg-final { scan-assembler-times {vashlv64si3_exec} 18 } } */
/* { dg-final { scan-assembler-times {vashrv64si3_exec} 1 } } */
/* { dg-final { scan-assembler-times {vashlv64di3_exec} 2 } } */
/* { dg-final { scan-assembler-times {vashrv64di3_exec} 1 } } */
/* { dg-final { scan-assembler-times {vlshrv64si3_exec} 1 } } */
/* { dg-final { scan-assembler-times {vlshrv64di3_exec} 1 } } */
/* { dg-final { scan-assembler-not {movv64si_exec/0} } } */
/* { dg-final { scan-assembler-not {movv64di_exec/0} } } */

View file

@ -0,0 +1,28 @@
/* { dg-do run } */
/* { dg-options "-O2 -ftree-vectorize" } */
#include "cond_shift_8.c"
#define N 99
#define TEST_LOOP(TYPE, NAME, OP) \
{ \
TYPE r[N], a[N], b[N], c[N]; \
for (int i = 0; i < N; ++i) \
{ \
a[i] = (i & 1 ? i : 3 * i); \
b[i] = (i >> 4) << (i & 15); \
c[i] = ~i & 7; \
asm volatile ("" ::: "memory"); \
} \
test_##TYPE##_##NAME (r, a, b, c, N); \
for (int i = 0; i < N; ++i) \
if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP c[i] : 91)) \
__builtin_abort (); \
}
int main ()
{
TEST_ALL (TEST_LOOP)
return 0;
}

View file

@ -0,0 +1,36 @@
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize -dp" } */
#include <stdint.h>
#define DEF_LOOP(TYPE, NAME, OP) \
void __attribute__ ((noipa)) \
test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a, \
TYPE *__restrict b, TYPE *__restrict c, int n) \
{ \
for (int i = 0; i < n; ++i) \
r[i] = a[i] > 20 ? b[i] OP c[i] : 0; \
}
#define TEST_TYPE(T, TYPE) \
T (TYPE, shl, <<) \
T (TYPE, shr, >>)
#define TEST_ALL(T) \
TEST_TYPE (T, int32_t) \
TEST_TYPE (T, uint32_t) \
TEST_TYPE (T, int64_t) \
TEST_TYPE (T, uint64_t)
TEST_ALL (DEF_LOOP)
/* { dg-final { scan-assembler-times {vashlv64si3_exec} 18 } } */
/* { dg-final { scan-assembler-times {vashrv64si3_exec} 1 } } */
/* { dg-final { scan-assembler-times {vashlv64di3_exec} 2 } } */
/* { dg-final { scan-assembler-times {vashrv64di3_exec} 1 } } */
/* { dg-final { scan-assembler-times {vlshrv64si3_exec} 1 } } */
/* { dg-final { scan-assembler-times {vlshrv64di3_exec} 1 } } */
/* { dg-final { scan-assembler-not {v_cndmask_b32} } } */
/* { dg-final { scan-assembler-not {movv64si_exec/2} } } */
/* { dg-final { scan-assembler-not {movv64di_exec/1} } } */

View file

@ -0,0 +1,28 @@
/* { dg-do run } */
/* { dg-options "-O2 -ftree-vectorize" } */
#include "cond_shift_9.c"
#define N 99
#define TEST_LOOP(TYPE, NAME, OP) \
{ \
TYPE r[N], a[N], b[N], c[N]; \
for (int i = 0; i < N; ++i) \
{ \
a[i] = (i & 1 ? i : 3 * i); \
b[i] = (i >> 4) << (i & 15); \
c[i] = ~i & 7; \
asm volatile ("" ::: "memory"); \
} \
test_##TYPE##_##NAME (r, a, b, c, N); \
for (int i = 0; i < N; ++i) \
if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP c[i] : 0)) \
__builtin_abort (); \
}
int main ()
{
TEST_ALL (TEST_LOOP)
return 0;
}