diff --git a/gcc/ChangeLog b/gcc/ChangeLog index cf85cc83882..748cf932f19 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,12 @@ +2018-05-08 Richard Sandiford + Alan Hayward + David Sherwood + + * config/aarch64/aarch64-sve.md (*pred_cmp_combine) + (*pred_cmp, *fcm_and_combine) + (*fcmuo_and_combine, *fcm_and) + (*fcmuo_and): New patterns. + 2018-05-08 Richard Sandiford * config/aarch64/iterators.md (UNSPEC_COND_LO, UNSPEC_COND_LS) diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 7a1f58a8091..5c1427d22b1 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -1358,6 +1358,49 @@ cmp\t%0., %1/z, %2., %3." ) +;; Predicated integer comparisons, formed by combining a PTRUE-predicated +;; comparison with an AND. Split the instruction into its preferred form +;; (below) at the earliest opportunity, in order to get rid of the +;; redundant operand 1. +(define_insn_and_split "*pred_cmp_combine" + [(set (match_operand: 0 "register_operand" "=Upa, Upa") + (and: + (unspec: + [(match_operand: 1) + (SVE_INT_CMP: + (match_operand:SVE_I 2 "register_operand" "w, w") + (match_operand:SVE_I 3 "aarch64_sve_cmp__operand" ", w"))] + UNSPEC_MERGE_PTRUE) + (match_operand: 4 "register_operand" "Upl, Upl"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_SVE" + "#" + "&& 1" + [(parallel + [(set (match_dup 0) + (and: + (SVE_INT_CMP: + (match_dup 2) + (match_dup 3)) + (match_dup 4))) + (clobber (reg:CC CC_REGNUM))])] +) + +;; Predicated integer comparisons. +(define_insn "*pred_cmp" + [(set (match_operand: 0 "register_operand" "=Upa, Upa") + (and: + (SVE_INT_CMP: + (match_operand:SVE_I 2 "register_operand" "w, w") + (match_operand:SVE_I 3 "aarch64_sve_cmp__operand" ", w")) + (match_operand: 1 "register_operand" "Upl, Upl"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_SVE" + "@ + cmp\t%0., %1/z, %2., #%3 + cmp\t%0., %1/z, %2., %3." +) + ;; Floating-point comparisons predicated with a PTRUE. (define_insn "*fcm" [(set (match_operand: 0 "register_operand" "=Upa, Upa") @@ -1385,6 +1428,83 @@ "fcmuo\t%0., %1/z, %2., %3." ) +;; Floating-point comparisons predicated on a PTRUE, with the results ANDed +;; with another predicate P. This does not have the same trapping behavior +;; as predicating the comparison itself on P, but it's a legitimate fold, +;; since we can drop any potentially-trapping operations whose results +;; are not needed. +;; +;; Split the instruction into its preferred form (below) at the earliest +;; opportunity, in order to get rid of the redundant operand 1. +(define_insn_and_split "*fcm_and_combine" + [(set (match_operand: 0 "register_operand" "=Upa, Upa") + (and: + (unspec: + [(match_operand: 1) + (SVE_FP_CMP + (match_operand:SVE_F 2 "register_operand" "w, w") + (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))] + UNSPEC_MERGE_PTRUE) + (match_operand: 4 "register_operand" "Upl, Upl")))] + "TARGET_SVE" + "#" + "&& 1" + [(set (match_dup 0) + (and: + (SVE_FP_CMP: + (match_dup 2) + (match_dup 3)) + (match_dup 4)))] +) + +(define_insn_and_split "*fcmuo_and_combine" + [(set (match_operand: 0 "register_operand" "=Upa") + (and: + (unspec: + [(match_operand: 1) + (unordered + (match_operand:SVE_F 2 "register_operand" "w") + (match_operand:SVE_F 3 "register_operand" "w"))] + UNSPEC_MERGE_PTRUE) + (match_operand: 4 "register_operand" "Upl")))] + "TARGET_SVE" + "#" + "&& 1" + [(set (match_dup 0) + (and: + (unordered: + (match_dup 2) + (match_dup 3)) + (match_dup 4)))] +) + +;; Unpredicated floating-point comparisons, with the results ANDed +;; with another predicate. This is a valid fold for the same reasons +;; as above. +(define_insn "*fcm_and" + [(set (match_operand: 0 "register_operand" "=Upa, Upa") + (and: + (SVE_FP_CMP: + (match_operand:SVE_F 2 "register_operand" "w, w") + (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")) + (match_operand: 1 "register_operand" "Upl, Upl")))] + "TARGET_SVE" + "@ + fcm\t%0., %1/z, %2., #0.0 + fcm\t%0., %1/z, %2., %3." +) + +(define_insn "*fcmuo_and" + [(set (match_operand: 0 "register_operand" "=Upa") + (and: + (unordered: + (match_operand:SVE_F 2 "register_operand" "w") + (match_operand:SVE_F 3 "register_operand" "w")) + (match_operand: 1 "register_operand" "Upl")))] + "TARGET_SVE" + "fcmuo\t%0., %1/z, %2., %3." +) + ;; Predicated floating-point comparisons. We don't need a version ;; of this for unordered comparisons. (define_insn "*pred_fcm" diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index d5126b3eb6d..a292ea04a6d 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,12 @@ +2018-05-08 Richard Sandiford + Alan Hayward + David Sherwood + + * gcc.target/aarch64/sve/vcond_6.c: Do not expect any ANDs. + XFAIL the BIC test. + * gcc.target/aarch64/sve/vcond_7.c: New test. + * gcc.target/aarch64/sve/vcond_7_run.c: Likewise. + 2018-05-08 Paolo Carlini PR c++/70563 diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_6.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_6.c index 718afae74a8..a59f08d553a 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/vcond_6.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_6.c @@ -43,10 +43,16 @@ TEST_ALL (LOOP) -/* { dg-final { scan-assembler-times {\tand\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b, p[0-9]+\.b} 3 } } */ +/* ??? We predicate one of the comparisons on the result of the other, + but whether that's a win or a loss will depend on the schedule. */ +/* { dg-final { scan-assembler-not {\tand\t} } } */ /* { dg-final { scan-assembler-times {\torr\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b, p[0-9]+\.b} 3 } } */ /* { dg-final { scan-assembler-times {\teor\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b, p[0-9]+\.b} 3 } } */ /* { dg-final { scan-assembler-times {\tnand\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b, p[0-9]+\.b} 3 } } */ /* { dg-final { scan-assembler-times {\tnor\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b, p[0-9]+\.b} 3 } } */ -/* { dg-final { scan-assembler-times {\tbic\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b, p[0-9]+\.b} 3 } } */ +/* Currently we predicate one of the comparisons on the result of the other + and then use NOT, but the original BIC sequence is better. It's a fairly + niche failure though. We'd handle most other types of comparison by + using the inverse operation instead of a separate NOT. */ +/* { dg-final { scan-assembler-times {\tbic\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b, p[0-9]+\.b} 3 { xfail *-*-* } } */ /* { dg-final { scan-assembler-times {\torn\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b, p[0-9]+\.b} 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_7.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_7.c new file mode 100644 index 00000000000..d2cdbdcff42 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_7.c @@ -0,0 +1,216 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include + +#define N 100 + +#define eq(A, B) ((A) == (B)) +#define ne(A, B) ((A) != (B)) +#define lt(A, B) ((A) < (B)) +#define le(A, B) ((A) <= (B)) +#define ge(A, B) ((A) >= (B)) +#define gt(A, B) ((A) > (B)) +#define unordered(A, B) (__builtin_isunordered (A, B)) + +#define DEF_CONST_LOOP(NAME, SUFFIX, TYPE, CONST) \ + void __attribute__ ((noipa)) \ + NAME##_##SUFFIX##_##TYPE (TYPE *restrict dst, TYPE *restrict src) \ + { \ + for (int i = 0; i < N; ++i) \ + if (NAME (src[i], CONST)) \ + dst[i] = 1; \ + } + +#define DEF_LOOP(NAME, TYPE, CONST1, CONST2) \ + void __attribute__ ((noipa)) \ + NAME##_var_##TYPE (TYPE *restrict dst, TYPE *restrict src, TYPE x) \ + { \ + for (int i = 0; i < N; ++i) \ + if (NAME (src[i], x)) \ + dst[i] = x; \ + } \ + DEF_CONST_LOOP (NAME, const1, TYPE, CONST1) \ + DEF_CONST_LOOP (NAME, const2, TYPE, CONST2) + +#define FOR_EACH_INT_OPERATOR(T, TYPE, CONST1, CONST2) \ + T (eq, TYPE, CONST1, CONST2) \ + T (ne, TYPE, CONST1, CONST2) \ + T (le, TYPE, CONST1, CONST2) \ + T (lt, TYPE, CONST1, CONST2) \ + T (gt, TYPE, CONST1, CONST2) \ + T (ge, TYPE, CONST1, CONST2) + +#define FOR_EACH_FLOAT_OPERATOR(T, TYPE, CONST1, CONST2) \ + FOR_EACH_INT_OPERATOR(T, TYPE, CONST1, CONST2) \ + T (unordered, TYPE, CONST1, CONST2) + +#define FOR_EACH_TYPE(T) \ + FOR_EACH_INT_OPERATOR (T, int8_t, 2, 100) \ + FOR_EACH_INT_OPERATOR (T, int16_t, 3, 1000) \ + FOR_EACH_INT_OPERATOR (T, int32_t, 4, 2000) \ + FOR_EACH_INT_OPERATOR (T, int64_t, 5, 3000) \ + FOR_EACH_INT_OPERATOR (T, uint8_t, 2, 160) \ + FOR_EACH_INT_OPERATOR (T, uint16_t, 3, 500) \ + FOR_EACH_INT_OPERATOR (T, uint32_t, 4, 1500) \ + FOR_EACH_INT_OPERATOR (T, uint64_t, 5, 2500) \ + FOR_EACH_FLOAT_OPERATOR (T, _Float16, 0, 1) \ + FOR_EACH_FLOAT_OPERATOR (T, float, 0, 1) \ + FOR_EACH_FLOAT_OPERATOR (T, double, 0, 1) + +FOR_EACH_TYPE (DEF_LOOP) + +/* { dg-final { scan-assembler-not {\tand\t} } } */ + +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */ + +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #2\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #3\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #4\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #5\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */ + +/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #2\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #3\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #4\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #5\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #2\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #3\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #4\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */ + +/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #2\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #3\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #4\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #5\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */ + +/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #2\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #3\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #4\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #5\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #2\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #3\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #4\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tcmplo\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmplo\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmplo\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmplo\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #2\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #3\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #4\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */ + +/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #2\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #3\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #4\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #5\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */ + +/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #2\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #3\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #4\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #5\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tcmphs\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmphs\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmphs\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmphs\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #2\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #3\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #4\n} 1 } } */ + + +/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #0\.0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfcmne\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfcmne\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfcmne\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tfcmne\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #0\.0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfcmne\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfcmne\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfcmle\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfcmle\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfcmle\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tfcmle\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #0\.0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfcmle\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfcmle\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #0\.0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfcmge\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfcmge\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfcmge\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tfcmge\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #0\.0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfcmge\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfcmge\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #0\.0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_7_run.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_7_run.c new file mode 100644 index 00000000000..e440f3a6794 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_7_run.c @@ -0,0 +1,40 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include "vcond_7.c" + +#define TEST_CONST_LOOP(NAME, SUFFIX, TYPE, CONST) \ + { \ + for (int i = 0; i < N; ++i) \ + { \ + dst[i] = i * 3; \ + src[i] = i % (CONST + 3); \ + } \ + NAME##_##SUFFIX##_##TYPE (dst, src); \ + for (int i = 0; i < N; ++i) \ + if (dst[i] != (NAME (src[i], CONST) ? (TYPE) 1 : (TYPE) (i * 3))) \ + __builtin_abort (); \ + } + +#define TEST_LOOPS(NAME, TYPE, CONST1, CONST2) \ + { \ + TYPE dst[N], src[N]; \ + for (int i = 0; i < N; ++i) \ + { \ + dst[i] = i * 2; \ + src[i] = i % 5; \ + } \ + NAME##_var_##TYPE (dst, src, 3); \ + for (int i = 0; i < N; ++i) \ + if (dst[i] != (NAME (src[i], 3) ? (TYPE) 3 : (TYPE) (i * 2))) \ + __builtin_abort (); \ + TEST_CONST_LOOP (NAME, const1, TYPE, CONST1) \ + TEST_CONST_LOOP (NAME, const2, TYPE, CONST2) \ + } + +int __attribute__ ((noipa)) +main (void) +{ + FOR_EACH_TYPE (TEST_LOOPS); + return 0; +}