Add support for conditional shifts

This patch adds support for IFN_COND shifts left and shifts right.
This is mostly mechanical, but since we try to handle conditional
operations in the same way as unconditional operations in match.pd,
we need to support IFN_COND shifts by scalars as well as vectors.
E.g.:

   IFN_COND_SHL (cond, a, { 1, 1, ... }, fallback)

and:

   IFN_COND_SHL (cond, a, 1, fallback)

are the same operation, with:

   (for shiftrotate (lrotate rrotate lshift rshift)
    ...
    /* Prefer vector1 << scalar to vector1 << vector2
       if vector2 is uniform.  */
    (for vec (VECTOR_CST CONSTRUCTOR)
     (simplify
      (shiftrotate @0 vec@1)
      (with { tree tem = uniform_vector_p (@1); }
       (if (tem)
	(shiftrotate @0 { tem; }))))))

preferring the latter.  The patch copes with this by extending
create_convert_operand_from to handle scalar-to-vector conversions.

2019-08-15  Richard Sandiford  <richard.sandiford@arm.com>
	    Prathamesh Kulkarni  <prathamesh.kulkarni@linaro.org>

gcc/
	* internal-fn.def (IFN_COND_SHL, IFN_COND_SHR): New internal functions.
	* internal-fn.c (FOR_EACH_CODE_MAPPING): Handle shifts.
	* match.pd (UNCOND_BINARY, COND_BINARY): Likewise.
	* optabs.def (cond_ashl_optab, cond_ashr_optab, cond_lshr_optab): New
	optabs.
	* optabs.h (create_convert_operand_from): Expand comment.
	* optabs.c (maybe_legitimize_operand): Allow implicit broadcasts
	when mapping scalar rtxes to vector operands.
	* config/aarch64/iterators.md (SVE_INT_BINARY): Add ashift,
	ashiftrt and lshiftrt.
	(sve_int_op, sve_int_op_rev, sve_pred_int_rhs2_operand): Handle them.
	* config/aarch64/aarch64-sve.md (*cond_<optab><mode>_2_const)
	(*cond_<optab><mode>_any_const): New patterns.

gcc/testsuite/
	* gcc.target/aarch64/sve/cond_shift_1.c: New test.
	* gcc.target/aarch64/sve/cond_shift_1_run.c: Likewise.
	* gcc.target/aarch64/sve/cond_shift_2.c: Likewise.
	* gcc.target/aarch64/sve/cond_shift_2_run.c: Likewise.
	* gcc.target/aarch64/sve/cond_shift_3.c: Likewise.
	* gcc.target/aarch64/sve/cond_shift_3_run.c: Likewise.
	* gcc.target/aarch64/sve/cond_shift_4.c: Likewise.
	* gcc.target/aarch64/sve/cond_shift_4_run.c: Likewise.
	* gcc.target/aarch64/sve/cond_shift_5.c: Likewise.
	* gcc.target/aarch64/sve/cond_shift_5_run.c: Likewise.
	* gcc.target/aarch64/sve/cond_shift_6.c: Likewise.
	* gcc.target/aarch64/sve/cond_shift_6_run.c: Likewise.
	* gcc.target/aarch64/sve/cond_shift_7.c: Likewise.
	* gcc.target/aarch64/sve/cond_shift_7_run.c: Likewise.
	* gcc.target/aarch64/sve/cond_shift_8.c: Likewise.
	* gcc.target/aarch64/sve/cond_shift_8_run.c: Likewise.
	* gcc.target/aarch64/sve/cond_shift_9.c: Likewise.
	* gcc.target/aarch64/sve/cond_shift_9_run.c: Likewise.

Co-Authored-By: Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org>

From-SVN: r274505
This commit is contained in:
Richard Sandiford 2019-08-15 08:05:50 +00:00 committed by Richard Sandiford
parent cc8495056e
commit 20103c0ea9
28 changed files with 772 additions and 17 deletions

View file

@ -1,3 +1,20 @@
2019-08-15 Richard Sandiford <richard.sandiford@arm.com>
Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org>
* internal-fn.def (IFN_COND_SHL, IFN_COND_SHR): New internal functions.
* internal-fn.c (FOR_EACH_CODE_MAPPING): Handle shifts.
* match.pd (UNCOND_BINARY, COND_BINARY): Likewise.
* optabs.def (cond_ashl_optab, cond_ashr_optab, cond_lshr_optab): New
optabs.
* optabs.h (create_convert_operand_from): Expand comment.
* optabs.c (maybe_legitimize_operand): Allow implicit broadcasts
when mapping scalar rtxes to vector operands.
* config/aarch64/iterators.md (SVE_INT_BINARY): Add ashift,
ashiftrt and lshiftrt.
(sve_int_op, sve_int_op_rev, sve_pred_int_rhs2_operand): Handle them.
* config/aarch64/aarch64-sve.md (*cond_<optab><mode>_2_const)
(*cond_<optab><mode>_any_const): New patterns.
2019-08-15 Martin Liska <mliska@suse.cz>
PR ipa/91438

View file

@ -1772,7 +1772,10 @@
;; Includes:
;; - ADD (merging form only)
;; - AND (merging form only)
;; - ASR (merging form only)
;; - EOR (merging form only)
;; - LSL (merging form only)
;; - LSR (merging form only)
;; - MUL
;; - ORR (merging form only)
;; - SMAX
@ -2405,6 +2408,49 @@
"<shift>\t%0.<Vetype>, %1.<Vetype>, #%2"
)
;; Predicated integer shift, merging with the first input.
(define_insn "*cond_<optab><mode>_2_const"
[(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
(unspec:SVE_I
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
(ASHIFT:SVE_I
(match_operand:SVE_I 2 "register_operand" "0, w")
(match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm"))
(match_dup 2)]
UNSPEC_SEL))]
"TARGET_SVE"
"@
<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
[(set_attr "movprfx" "*,yes")]
)
;; Predicated integer shift, merging with an independent value.
(define_insn_and_rewrite "*cond_<optab><mode>_any_const"
[(set (match_operand:SVE_I 0 "register_operand" "=w, &w, ?&w")
(unspec:SVE_I
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
(ASHIFT:SVE_I
(match_operand:SVE_I 2 "register_operand" "w, w, w")
(match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm"))
(match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
UNSPEC_SEL))]
"TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
"@
movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
#"
"&& reload_completed
&& register_operand (operands[4], <MODE>mode)
&& !rtx_equal_p (operands[0], operands[4])"
{
emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
operands[4], operands[1]));
operands[4] = operands[2] = operands[0];
}
[(set_attr "movprfx" "yes")]
)
;; -------------------------------------------------------------------------
;; ---- [FP] General binary arithmetic corresponding to rtx codes
;; -------------------------------------------------------------------------

View file

@ -1280,6 +1280,7 @@
;; SVE integer binary operations.
(define_code_iterator SVE_INT_BINARY [plus minus mult smax umax smin umin
ashift ashiftrt lshiftrt
and ior xor])
;; SVE integer binary division operations.
@ -1475,6 +1476,9 @@
(smax "smax")
(umin "umin")
(umax "umax")
(ashift "lsl")
(ashiftrt "asr")
(lshiftrt "lsr")
(and "and")
(ior "orr")
(xor "eor")
@ -1484,17 +1488,20 @@
(popcount "cnt")])
(define_code_attr sve_int_op_rev [(plus "add")
(minus "subr")
(mult "mul")
(div "sdivr")
(udiv "udivr")
(smin "smin")
(smax "smax")
(umin "umin")
(umax "umax")
(and "and")
(ior "orr")
(xor "eor")])
(minus "subr")
(mult "mul")
(div "sdivr")
(udiv "udivr")
(smin "smin")
(smax "smax")
(umin "umin")
(umax "umax")
(ashift "lslr")
(ashiftrt "asrr")
(lshiftrt "lsrr")
(and "and")
(ior "orr")
(xor "eor")])
;; The floating-point SVE instruction that implements an rtx code.
(define_code_attr sve_fp_op [(plus "fadd")
@ -1535,6 +1542,9 @@
(umax "register_operand")
(smin "register_operand")
(umin "register_operand")
(ashift "aarch64_sve_lshift_operand")
(ashiftrt "aarch64_sve_rshift_operand")
(lshiftrt "aarch64_sve_rshift_operand")
(and "aarch64_sve_pred_and_operand")
(ior "register_operand")
(xor "register_operand")])

View file

@ -3286,7 +3286,9 @@ static void (*const internal_fn_expanders[]) (internal_fn, gcall *) = {
T (MAX_EXPR, IFN_COND_MAX) \
T (BIT_AND_EXPR, IFN_COND_AND) \
T (BIT_IOR_EXPR, IFN_COND_IOR) \
T (BIT_XOR_EXPR, IFN_COND_XOR)
T (BIT_XOR_EXPR, IFN_COND_XOR) \
T (LSHIFT_EXPR, IFN_COND_SHL) \
T (RSHIFT_EXPR, IFN_COND_SHR)
/* Return a function that only performs CODE when a certain condition is met
and that uses a given fallback value otherwise. For example, if CODE is

View file

@ -167,6 +167,10 @@ DEF_INTERNAL_OPTAB_FN (COND_IOR, ECF_CONST | ECF_NOTHROW,
cond_ior, cond_binary)
DEF_INTERNAL_OPTAB_FN (COND_XOR, ECF_CONST | ECF_NOTHROW,
cond_xor, cond_binary)
DEF_INTERNAL_OPTAB_FN (COND_SHL, ECF_CONST | ECF_NOTHROW,
cond_ashl, cond_binary)
DEF_INTERNAL_SIGNED_OPTAB_FN (COND_SHR, ECF_CONST | ECF_NOTHROW, first,
cond_ashr, cond_lshr, cond_binary)
DEF_INTERNAL_OPTAB_FN (COND_FMA, ECF_CONST, cond_fma, cond_ternary)
DEF_INTERNAL_OPTAB_FN (COND_FMS, ECF_CONST, cond_fms, cond_ternary)

View file

@ -83,12 +83,14 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
plus minus
mult trunc_div trunc_mod rdiv
min max
bit_and bit_ior bit_xor)
bit_and bit_ior bit_xor
lshift rshift)
(define_operator_list COND_BINARY
IFN_COND_ADD IFN_COND_SUB
IFN_COND_MUL IFN_COND_DIV IFN_COND_MOD IFN_COND_RDIV
IFN_COND_MIN IFN_COND_MAX
IFN_COND_AND IFN_COND_IOR IFN_COND_XOR)
IFN_COND_AND IFN_COND_IOR IFN_COND_XOR
IFN_COND_SHL IFN_COND_SHR)
/* Same for ternary operations. */
(define_operator_list UNCOND_TERNARY

View file

@ -7212,7 +7212,7 @@ static bool
maybe_legitimize_operand (enum insn_code icode, unsigned int opno,
class expand_operand *op)
{
machine_mode mode, imode;
machine_mode mode, imode, tmode;
mode = op->mode;
switch (op->type)
@ -7259,9 +7259,17 @@ maybe_legitimize_operand (enum insn_code icode, unsigned int opno,
gcc_assert (mode != VOIDmode);
imode = insn_data[(int) icode].operand[opno].mode;
tmode = (VECTOR_MODE_P (imode) && !VECTOR_MODE_P (mode)
? GET_MODE_INNER (imode) : imode);
if (tmode != VOIDmode && tmode != mode)
{
op->value = convert_modes (tmode, mode, op->value, op->unsigned_p);
mode = tmode;
}
if (imode != VOIDmode && imode != mode)
{
op->value = convert_modes (imode, mode, op->value, op->unsigned_p);
gcc_assert (VECTOR_MODE_P (imode) && !VECTOR_MODE_P (mode));
op->value = expand_vector_broadcast (imode, op->value);
mode = imode;
}
goto input;

View file

@ -230,6 +230,9 @@ OPTAB_D (cond_umod_optab, "cond_umod$a")
OPTAB_D (cond_and_optab, "cond_and$a")
OPTAB_D (cond_ior_optab, "cond_ior$a")
OPTAB_D (cond_xor_optab, "cond_xor$a")
OPTAB_D (cond_ashl_optab, "cond_ashl$a")
OPTAB_D (cond_ashr_optab, "cond_ashr$a")
OPTAB_D (cond_lshr_optab, "cond_lshr$a")
OPTAB_D (cond_smin_optab, "cond_smin$a")
OPTAB_D (cond_smax_optab, "cond_smax$a")
OPTAB_D (cond_umin_optab, "cond_umin$a")

View file

@ -129,7 +129,11 @@ create_convert_operand_to (class expand_operand *op, rtx value,
/* Make OP describe an input operand that should have the same value
as VALUE, after any mode conversion that the backend might request.
If VALUE is a CONST_INT, it should be treated as having mode MODE.
UNSIGNED_P says whether VALUE is unsigned. */
UNSIGNED_P says whether VALUE is unsigned.
The conversion of VALUE can include a combination of numerical
conversion (as for convert_modes) and duplicating a scalar to fill
a vector (if VALUE is a scalar but the operand is a vector). */
static inline void
create_convert_operand_from (class expand_operand *op, rtx value,

View file

@ -1,3 +1,25 @@
2019-08-15 Richard Sandiford <richard.sandiford@arm.com>
Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org>
* gcc.target/aarch64/sve/cond_shift_1.c: New test.
* gcc.target/aarch64/sve/cond_shift_1_run.c: Likewise.
* gcc.target/aarch64/sve/cond_shift_2.c: Likewise.
* gcc.target/aarch64/sve/cond_shift_2_run.c: Likewise.
* gcc.target/aarch64/sve/cond_shift_3.c: Likewise.
* gcc.target/aarch64/sve/cond_shift_3_run.c: Likewise.
* gcc.target/aarch64/sve/cond_shift_4.c: Likewise.
* gcc.target/aarch64/sve/cond_shift_4_run.c: Likewise.
* gcc.target/aarch64/sve/cond_shift_5.c: Likewise.
* gcc.target/aarch64/sve/cond_shift_5_run.c: Likewise.
* gcc.target/aarch64/sve/cond_shift_6.c: Likewise.
* gcc.target/aarch64/sve/cond_shift_6_run.c: Likewise.
* gcc.target/aarch64/sve/cond_shift_7.c: Likewise.
* gcc.target/aarch64/sve/cond_shift_7_run.c: Likewise.
* gcc.target/aarch64/sve/cond_shift_8.c: Likewise.
* gcc.target/aarch64/sve/cond_shift_8_run.c: Likewise.
* gcc.target/aarch64/sve/cond_shift_9.c: Likewise.
* gcc.target/aarch64/sve/cond_shift_9_run.c: Likewise.
2019-08-14 Martin Sebor <msebor@redhat.com>
PR testsuite/91449

View file

@ -0,0 +1,48 @@
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize" } */
#include <stdint.h>
#define DEF_LOOP(TYPE, NAME, OP) \
void __attribute__ ((noipa)) \
test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a, \
TYPE *__restrict b, int n) \
{ \
for (int i = 0; i < n; ++i) \
r[i] = a[i] > 20 ? b[i] OP 3 : b[i]; \
}
#define TEST_TYPE(T, TYPE) \
T (TYPE, shl, <<) \
T (TYPE, shr, >>)
#define TEST_ALL(T) \
TEST_TYPE (T, int8_t) \
TEST_TYPE (T, uint8_t) \
TEST_TYPE (T, int16_t) \
TEST_TYPE (T, uint16_t) \
TEST_TYPE (T, int32_t) \
TEST_TYPE (T, uint32_t) \
TEST_TYPE (T, int64_t) \
TEST_TYPE (T, uint64_t)
TEST_ALL (DEF_LOOP)
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */
/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */
/* { dg-final { scan-assembler-not {\tsel\t} } } */

View file

@ -0,0 +1,27 @@
/* { dg-do run { target { aarch64_sve_hw } } } */
/* { dg-options "-O2 -ftree-vectorize" } */
#include "cond_shift_1.c"
#define N 99
#define TEST_LOOP(TYPE, NAME, OP) \
{ \
TYPE r[N], a[N], b[N]; \
for (int i = 0; i < N; ++i) \
{ \
a[i] = (i & 1 ? i : 3 * i); \
b[i] = (i >> 4) << (i & 15); \
asm volatile ("" ::: "memory"); \
} \
test_##TYPE##_##NAME (r, a, b, N); \
for (int i = 0; i < N; ++i) \
if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP 3 : b[i])) \
__builtin_abort (); \
}
int main ()
{
TEST_ALL (TEST_LOOP)
return 0;
}

View file

@ -0,0 +1,52 @@
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize" } */
#include <stdint.h>
#define DEF_LOOP(TYPE, NAME, OP) \
void __attribute__ ((noipa)) \
test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a, \
TYPE *__restrict b, int n) \
{ \
for (int i = 0; i < n; ++i) \
r[i] = a[i] > 20 ? b[i] OP 3 : a[i]; \
}
#define TEST_TYPE(T, TYPE) \
T (TYPE, shl, <<) \
T (TYPE, shr, >>)
#define TEST_ALL(T) \
TEST_TYPE (T, int8_t) \
TEST_TYPE (T, uint8_t) \
TEST_TYPE (T, int16_t) \
TEST_TYPE (T, uint16_t) \
TEST_TYPE (T, int32_t) \
TEST_TYPE (T, uint32_t) \
TEST_TYPE (T, int64_t) \
TEST_TYPE (T, uint64_t)
TEST_ALL (DEF_LOOP)
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b\n} 4 } } */
/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 4 } } */
/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 4 } } */
/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 4 } } */
/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */
/* { dg-final { scan-assembler-not {\tsel\t} } } */

View file

@ -0,0 +1,27 @@
/* { dg-do run { target { aarch64_sve_hw } } } */
/* { dg-options "-O2 -ftree-vectorize" } */
#include "cond_shift_2.c"
#define N 99
#define TEST_LOOP(TYPE, NAME, OP) \
{ \
TYPE r[N], a[N], b[N]; \
for (int i = 0; i < N; ++i) \
{ \
a[i] = (i & 1 ? i : 3 * i); \
b[i] = (i >> 4) << (i & 15); \
asm volatile ("" ::: "memory"); \
} \
test_##TYPE##_##NAME (r, a, b, N); \
for (int i = 0; i < N; ++i) \
if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP 3 : a[i])) \
__builtin_abort (); \
}
int main ()
{
TEST_ALL (TEST_LOOP)
return 0;
}

View file

@ -0,0 +1,48 @@
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize" } */
#include <stdint.h>
#define DEF_LOOP(TYPE, NAME, OP) \
void __attribute__ ((noipa)) \
test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a, \
TYPE *__restrict b, int n) \
{ \
for (int i = 0; i < n; ++i) \
r[i] = a[i] > 20 ? b[i] OP 3 : 72; \
}
#define TEST_TYPE(T, TYPE) \
T (TYPE, shl, <<) \
T (TYPE, shr, >>)
#define TEST_ALL(T) \
TEST_TYPE (T, int8_t) \
TEST_TYPE (T, uint8_t) \
TEST_TYPE (T, int16_t) \
TEST_TYPE (T, uint16_t) \
TEST_TYPE (T, int32_t) \
TEST_TYPE (T, uint32_t) \
TEST_TYPE (T, int64_t) \
TEST_TYPE (T, uint64_t)
TEST_ALL (DEF_LOOP)
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */
/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */
/* { dg-final { scan-assembler-times {\tsel\t} 16 } } */

View file

@ -0,0 +1,27 @@
/* { dg-do run { target { aarch64_sve_hw } } } */
/* { dg-options "-O2 -ftree-vectorize" } */
#include "cond_shift_3.c"
#define N 99
#define TEST_LOOP(TYPE, NAME, OP) \
{ \
TYPE r[N], a[N], b[N]; \
for (int i = 0; i < N; ++i) \
{ \
a[i] = (i & 1 ? i : 3 * i); \
b[i] = (i >> 4) << (i & 15); \
asm volatile ("" ::: "memory"); \
} \
test_##TYPE##_##NAME (r, a, b, N); \
for (int i = 0; i < N; ++i) \
if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP 3 : 72)) \
__builtin_abort (); \
}
int main ()
{
TEST_ALL (TEST_LOOP)
return 0;
}

View file

@ -0,0 +1,52 @@
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize" } */
#include <stdint.h>
#define DEF_LOOP(TYPE, NAME, OP) \
void __attribute__ ((noipa)) \
test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a, \
TYPE *__restrict b, int n) \
{ \
for (int i = 0; i < n; ++i) \
r[i] = a[i] > 20 ? b[i] OP 3 : 0; \
}
#define TEST_TYPE(T, TYPE) \
T (TYPE, shl, <<) \
T (TYPE, shr, >>)
#define TEST_ALL(T) \
TEST_TYPE (T, int8_t) \
TEST_TYPE (T, uint8_t) \
TEST_TYPE (T, int16_t) \
TEST_TYPE (T, uint16_t) \
TEST_TYPE (T, int32_t) \
TEST_TYPE (T, uint32_t) \
TEST_TYPE (T, int64_t) \
TEST_TYPE (T, uint64_t)
TEST_ALL (DEF_LOOP)
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.b, p[0-7]/z, z[0-9]+\.b\n} 4 } } */
/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 4 } } */
/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 4 } } */
/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/z, z[0-9]+\.d\n} 4 } } */
/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */
/* { dg-final { scan-assembler-not {\tsel\t} } } */

View file

@ -0,0 +1,27 @@
/* { dg-do run { target { aarch64_sve_hw } } } */
/* { dg-options "-O2 -ftree-vectorize" } */
#include "cond_shift_4.c"
#define N 99
#define TEST_LOOP(TYPE, NAME, OP) \
{ \
TYPE r[N], a[N], b[N]; \
for (int i = 0; i < N; ++i) \
{ \
a[i] = (i & 1 ? i : 3 * i); \
b[i] = (i >> 4) << (i & 15); \
asm volatile ("" ::: "memory"); \
} \
test_##TYPE##_##NAME (r, a, b, N); \
for (int i = 0; i < N; ++i) \
if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP 3 : 0)) \
__builtin_abort (); \
}
int main ()
{
TEST_ALL (TEST_LOOP)
return 0;
}

View file

@ -0,0 +1,38 @@
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize" } */
#include <stdint.h>
#define DEF_LOOP(TYPE, NAME, OP) \
void __attribute__ ((noipa)) \
test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a, \
TYPE *__restrict b, TYPE *__restrict c, int n) \
{ \
for (int i = 0; i < n; ++i) \
r[i] = a[i] > 20 ? b[i] OP c[i] : b[i]; \
}
#define TEST_TYPE(T, TYPE) \
T (TYPE, shl, <<) \
T (TYPE, shr, >>)
#define TEST_ALL(T) \
TEST_TYPE (T, int32_t) \
TEST_TYPE (T, uint32_t) \
TEST_TYPE (T, int64_t) \
TEST_TYPE (T, uint64_t)
TEST_ALL (DEF_LOOP)
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */
/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */
/* { dg-final { scan-assembler-not {\tsel\t} } } */

View file

@ -0,0 +1,28 @@
/* { dg-do run { target { aarch64_sve_hw } } } */
/* { dg-options "-O2 -ftree-vectorize" } */
#include "cond_shift_5.c"
#define N 99
#define TEST_LOOP(TYPE, NAME, OP) \
{ \
TYPE r[N], a[N], b[N], c[N]; \
for (int i = 0; i < N; ++i) \
{ \
a[i] = (i & 1 ? i : 3 * i); \
b[i] = (i >> 4) << (i & 15); \
c[i] = ~i & 7; \
asm volatile ("" ::: "memory"); \
} \
test_##TYPE##_##NAME (r, a, b, c, N); \
for (int i = 0; i < N; ++i) \
if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP c[i] : b[i])) \
__builtin_abort (); \
}
int main ()
{
TEST_ALL (TEST_LOOP)
return 0;
}

View file

@ -0,0 +1,33 @@
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize" } */
#include <stdint.h>
#define DEF_LOOP(TYPE, NAME, OP) \
void __attribute__ ((noipa)) \
test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a, \
TYPE *__restrict b, TYPE *__restrict c, int n) \
{ \
for (int i = 0; i < n; ++i) \
r[i] = a[i] > 20 ? b[i] OP c[i] : c[i]; \
}
#define TEST_TYPE(T, TYPE) \
T (TYPE, shl, <<) \
T (TYPE, shr, >>)
#define TEST_ALL(T) \
TEST_TYPE (T, int32_t) \
TEST_TYPE (T, uint32_t)
TEST_ALL (DEF_LOOP)
/* { dg-final { scan-assembler-times {\tlslr\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
/* { dg-final { scan-assembler-times {\tasrr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tlsrr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */
/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */
/* { dg-final { scan-assembler-not {\tsel\t} } } */

View file

@ -0,0 +1,28 @@
/* { dg-do run { target { aarch64_sve_hw } } } */
/* { dg-options "-O2 -ftree-vectorize" } */
#include "cond_shift_6.c"
#define N 99
#define TEST_LOOP(TYPE, NAME, OP) \
{ \
TYPE r[N], a[N], b[N], c[N]; \
for (int i = 0; i < N; ++i) \
{ \
a[i] = (i & 1 ? i : 3 * i); \
b[i] = (i >> 4) << (i & 15); \
c[i] = ~i & 7; \
asm volatile ("" ::: "memory"); \
} \
test_##TYPE##_##NAME (r, a, b, c, N); \
for (int i = 0; i < N; ++i) \
if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP c[i] : c[i])) \
__builtin_abort (); \
}
int main ()
{
TEST_ALL (TEST_LOOP)
return 0;
}

View file

@ -0,0 +1,40 @@
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize" } */
#include <stdint.h>
#define DEF_LOOP(TYPE, NAME, OP) \
void __attribute__ ((noipa)) \
test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a, \
TYPE *__restrict b, TYPE *__restrict c, int n) \
{ \
for (int i = 0; i < n; ++i) \
r[i] = a[i] > 20 ? b[i] OP c[i] : a[i]; \
}
#define TEST_TYPE(T, TYPE) \
T (TYPE, shl, <<) \
T (TYPE, shr, >>)
#define TEST_ALL(T) \
TEST_TYPE (T, int32_t) \
TEST_TYPE (T, uint32_t) \
TEST_TYPE (T, int64_t) \
TEST_TYPE (T, uint64_t)
TEST_ALL (DEF_LOOP)
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 4 } } */
/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 4 } } */
/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */
/* { dg-final { scan-assembler-not {\tsel\t} } } */

View file

@ -0,0 +1,28 @@
/* { dg-do run { target { aarch64_sve_hw } } } */
/* { dg-options "-O2 -ftree-vectorize" } */
#include "cond_shift_7.c"
#define N 99
#define TEST_LOOP(TYPE, NAME, OP) \
{ \
TYPE r[N], a[N], b[N], c[N]; \
for (int i = 0; i < N; ++i) \
{ \
a[i] = (i & 1 ? i : 3 * i); \
b[i] = (i >> 4) << (i & 15); \
c[i] = ~i & 7; \
asm volatile ("" ::: "memory"); \
} \
test_##TYPE##_##NAME (r, a, b, c, N); \
for (int i = 0; i < N; ++i) \
if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP c[i] : a[i])) \
__builtin_abort (); \
}
int main ()
{
TEST_ALL (TEST_LOOP)
return 0;
}

View file

@ -0,0 +1,38 @@
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize" } */
#include <stdint.h>
#define DEF_LOOP(TYPE, NAME, OP) \
void __attribute__ ((noipa)) \
test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a, \
TYPE *__restrict b, TYPE *__restrict c, int n) \
{ \
for (int i = 0; i < n; ++i) \
r[i] = a[i] > 20 ? b[i] OP c[i] : 91; \
}
#define TEST_TYPE(T, TYPE) \
T (TYPE, shl, <<) \
T (TYPE, shr, >>)
#define TEST_ALL(T) \
TEST_TYPE (T, int32_t) \
TEST_TYPE (T, uint32_t) \
TEST_TYPE (T, int64_t) \
TEST_TYPE (T, uint64_t)
TEST_ALL (DEF_LOOP)
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */
/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */
/* { dg-final { scan-assembler-times {\tsel\t} 8 } } */

View file

@ -0,0 +1,28 @@
/* { dg-do run { target { aarch64_sve_hw } } } */
/* { dg-options "-O2 -ftree-vectorize" } */
#include "cond_shift_8.c"
#define N 99
#define TEST_LOOP(TYPE, NAME, OP) \
{ \
TYPE r[N], a[N], b[N], c[N]; \
for (int i = 0; i < N; ++i) \
{ \
a[i] = (i & 1 ? i : 3 * i); \
b[i] = (i >> 4) << (i & 15); \
c[i] = ~i & 7; \
asm volatile ("" ::: "memory"); \
} \
test_##TYPE##_##NAME (r, a, b, c, N); \
for (int i = 0; i < N; ++i) \
if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP c[i] : 91)) \
__builtin_abort (); \
}
int main ()
{
TEST_ALL (TEST_LOOP)
return 0;
}

View file

@ -0,0 +1,40 @@
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize" } */
#include <stdint.h>
#define DEF_LOOP(TYPE, NAME, OP) \
void __attribute__ ((noipa)) \
test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a, \
TYPE *__restrict b, TYPE *__restrict c, int n) \
{ \
for (int i = 0; i < n; ++i) \
r[i] = a[i] > 20 ? b[i] OP c[i] : 0; \
}
#define TEST_TYPE(T, TYPE) \
T (TYPE, shl, <<) \
T (TYPE, shr, >>)
#define TEST_ALL(T) \
TEST_TYPE (T, int32_t) \
TEST_TYPE (T, uint32_t) \
TEST_TYPE (T, int64_t) \
TEST_TYPE (T, uint64_t)
TEST_ALL (DEF_LOOP)
/* { dg-final { scan-assembler-times {\tlslr?\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
/* { dg-final { scan-assembler-times {\tlslr?\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
/* { dg-final { scan-assembler-times {\tasrr?\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tasrr?\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tlsrr?\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tlsrr?\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 4 } } */
/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/z, z[0-9]+\.d\n} 4 } } */
/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */
/* { dg-final { scan-assembler-not {\tsel\t} } } */

View file

@ -0,0 +1,28 @@
/* { dg-do run { target { aarch64_sve_hw } } } */
/* { dg-options "-O2 -ftree-vectorize" } */
#include "cond_shift_9.c"
#define N 99
#define TEST_LOOP(TYPE, NAME, OP) \
{ \
TYPE r[N], a[N], b[N], c[N]; \
for (int i = 0; i < N; ++i) \
{ \
a[i] = (i & 1 ? i : 3 * i); \
b[i] = (i >> 4) << (i & 15); \
c[i] = ~i & 7; \
asm volatile ("" ::: "memory"); \
} \
test_##TYPE##_##NAME (r, a, b, c, N); \
for (int i = 0; i < N; ++i) \
if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP c[i] : 0)) \
__builtin_abort (); \
}
int main ()
{
TEST_ALL (TEST_LOOP)
return 0;
}