[AArch64] Predicated SVE comparison folds
This patch adds SVE patterns that combine a PTRUE-predicated comparison with a separate AND. The main benefit is for optimising ANDs with the loop predicate, as in the testcase. However, one of the potential drawbacks is that it triggers even for cases in which two naturally-parallel comparisons are ANDed together. Whether that's a win or a less will depend on the schedule, but it has the potential to be a win more often than a loss. The combine patterns are undeniably ugly. One way of getting around them would be to allow 1->1 "splits" when combining 2 instructions, as well as 1->2 splits when combining more than 2 instructions (although that wouldn't really be a split). Another would be to have a way of defining target-specific rtx simplifications. branches/ARM/sve-branch has a prototype implementation of that, but it would need some clean-up before being ready to submit. It would also be good to make it closer to the match.pd style. Until then, I think what the combine patterns are doing is the "correct" implementation given the current infrastructure. 2018-05-08 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-sve.md (*pred_cmp<cmp_op><mode>_combine) (*pred_cmp<cmp_op><mode>, *fcm<cmp_op><mode>_and_combine) (*fcmuo<mode>_and_combine, *fcm<cmp_op><mode>_and) (*fcmuo<mode>_and): New patterns. gcc/testsuite/ * gcc.target/aarch64/sve/vcond_6.c: Do not expect any ANDs. XFAIL the BIC test. * gcc.target/aarch64/sve/vcond_7.c: New test. * gcc.target/aarch64/sve/vcond_7_run.c: Likewise. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r260031
This commit is contained in:
parent
4430130d28
commit
cee99fa01b
6 changed files with 402 additions and 2 deletions
|
@ -1,3 +1,12 @@
|
|||
2018-05-08 Richard Sandiford <richard.sandiford@linaro.org>
|
||||
Alan Hayward <alan.hayward@arm.com>
|
||||
David Sherwood <david.sherwood@arm.com>
|
||||
|
||||
* config/aarch64/aarch64-sve.md (*pred_cmp<cmp_op><mode>_combine)
|
||||
(*pred_cmp<cmp_op><mode>, *fcm<cmp_op><mode>_and_combine)
|
||||
(*fcmuo<mode>_and_combine, *fcm<cmp_op><mode>_and)
|
||||
(*fcmuo<mode>_and): New patterns.
|
||||
|
||||
2018-05-08 Richard Sandiford <richard.sandiford@linaro.org>
|
||||
|
||||
* config/aarch64/iterators.md (UNSPEC_COND_LO, UNSPEC_COND_LS)
|
||||
|
|
|
@ -1358,6 +1358,49 @@
|
|||
cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
|
||||
)
|
||||
|
||||
;; Predicated integer comparisons, formed by combining a PTRUE-predicated
|
||||
;; comparison with an AND. Split the instruction into its preferred form
|
||||
;; (below) at the earliest opportunity, in order to get rid of the
|
||||
;; redundant operand 1.
|
||||
(define_insn_and_split "*pred_cmp<cmp_op><mode>_combine"
|
||||
[(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
|
||||
(and:<VPRED>
|
||||
(unspec:<VPRED>
|
||||
[(match_operand:<VPRED> 1)
|
||||
(SVE_INT_CMP:<VPRED>
|
||||
(match_operand:SVE_I 2 "register_operand" "w, w")
|
||||
(match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
|
||||
UNSPEC_MERGE_PTRUE)
|
||||
(match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))
|
||||
(clobber (reg:CC CC_REGNUM))]
|
||||
"TARGET_SVE"
|
||||
"#"
|
||||
"&& 1"
|
||||
[(parallel
|
||||
[(set (match_dup 0)
|
||||
(and:<VPRED>
|
||||
(SVE_INT_CMP:<VPRED>
|
||||
(match_dup 2)
|
||||
(match_dup 3))
|
||||
(match_dup 4)))
|
||||
(clobber (reg:CC CC_REGNUM))])]
|
||||
)
|
||||
|
||||
;; Predicated integer comparisons.
|
||||
(define_insn "*pred_cmp<cmp_op><mode>"
|
||||
[(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
|
||||
(and:<VPRED>
|
||||
(SVE_INT_CMP:<VPRED>
|
||||
(match_operand:SVE_I 2 "register_operand" "w, w")
|
||||
(match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))
|
||||
(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")))
|
||||
(clobber (reg:CC CC_REGNUM))]
|
||||
"TARGET_SVE"
|
||||
"@
|
||||
cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
|
||||
cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
|
||||
)
|
||||
|
||||
;; Floating-point comparisons predicated with a PTRUE.
|
||||
(define_insn "*fcm<cmp_op><mode>"
|
||||
[(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
|
||||
|
@ -1385,6 +1428,83 @@
|
|||
"fcmuo\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
|
||||
)
|
||||
|
||||
;; Floating-point comparisons predicated on a PTRUE, with the results ANDed
|
||||
;; with another predicate P. This does not have the same trapping behavior
|
||||
;; as predicating the comparison itself on P, but it's a legitimate fold,
|
||||
;; since we can drop any potentially-trapping operations whose results
|
||||
;; are not needed.
|
||||
;;
|
||||
;; Split the instruction into its preferred form (below) at the earliest
|
||||
;; opportunity, in order to get rid of the redundant operand 1.
|
||||
(define_insn_and_split "*fcm<cmp_op><mode>_and_combine"
|
||||
[(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
|
||||
(and:<VPRED>
|
||||
(unspec:<VPRED>
|
||||
[(match_operand:<VPRED> 1)
|
||||
(SVE_FP_CMP
|
||||
(match_operand:SVE_F 2 "register_operand" "w, w")
|
||||
(match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))]
|
||||
UNSPEC_MERGE_PTRUE)
|
||||
(match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))]
|
||||
"TARGET_SVE"
|
||||
"#"
|
||||
"&& 1"
|
||||
[(set (match_dup 0)
|
||||
(and:<VPRED>
|
||||
(SVE_FP_CMP:<VPRED>
|
||||
(match_dup 2)
|
||||
(match_dup 3))
|
||||
(match_dup 4)))]
|
||||
)
|
||||
|
||||
(define_insn_and_split "*fcmuo<mode>_and_combine"
|
||||
[(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
|
||||
(and:<VPRED>
|
||||
(unspec:<VPRED>
|
||||
[(match_operand:<VPRED> 1)
|
||||
(unordered
|
||||
(match_operand:SVE_F 2 "register_operand" "w")
|
||||
(match_operand:SVE_F 3 "register_operand" "w"))]
|
||||
UNSPEC_MERGE_PTRUE)
|
||||
(match_operand:<VPRED> 4 "register_operand" "Upl")))]
|
||||
"TARGET_SVE"
|
||||
"#"
|
||||
"&& 1"
|
||||
[(set (match_dup 0)
|
||||
(and:<VPRED>
|
||||
(unordered:<VPRED>
|
||||
(match_dup 2)
|
||||
(match_dup 3))
|
||||
(match_dup 4)))]
|
||||
)
|
||||
|
||||
;; Unpredicated floating-point comparisons, with the results ANDed
|
||||
;; with another predicate. This is a valid fold for the same reasons
|
||||
;; as above.
|
||||
(define_insn "*fcm<cmp_op><mode>_and"
|
||||
[(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
|
||||
(and:<VPRED>
|
||||
(SVE_FP_CMP:<VPRED>
|
||||
(match_operand:SVE_F 2 "register_operand" "w, w")
|
||||
(match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))
|
||||
(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")))]
|
||||
"TARGET_SVE"
|
||||
"@
|
||||
fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0
|
||||
fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
|
||||
)
|
||||
|
||||
(define_insn "*fcmuo<mode>_and"
|
||||
[(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
|
||||
(and:<VPRED>
|
||||
(unordered:<VPRED>
|
||||
(match_operand:SVE_F 2 "register_operand" "w")
|
||||
(match_operand:SVE_F 3 "register_operand" "w"))
|
||||
(match_operand:<VPRED> 1 "register_operand" "Upl")))]
|
||||
"TARGET_SVE"
|
||||
"fcmuo\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
|
||||
)
|
||||
|
||||
;; Predicated floating-point comparisons. We don't need a version
|
||||
;; of this for unordered comparisons.
|
||||
(define_insn "*pred_fcm<cmp_op><mode>"
|
||||
|
|
|
@ -1,3 +1,12 @@
|
|||
2018-05-08 Richard Sandiford <richard.sandiford@linaro.org>
|
||||
Alan Hayward <alan.hayward@arm.com>
|
||||
David Sherwood <david.sherwood@arm.com>
|
||||
|
||||
* gcc.target/aarch64/sve/vcond_6.c: Do not expect any ANDs.
|
||||
XFAIL the BIC test.
|
||||
* gcc.target/aarch64/sve/vcond_7.c: New test.
|
||||
* gcc.target/aarch64/sve/vcond_7_run.c: Likewise.
|
||||
|
||||
2018-05-08 Paolo Carlini <paolo.carlini@oracle.com>
|
||||
|
||||
PR c++/70563
|
||||
|
|
|
@ -43,10 +43,16 @@
|
|||
|
||||
TEST_ALL (LOOP)
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tand\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b, p[0-9]+\.b} 3 } } */
|
||||
/* ??? We predicate one of the comparisons on the result of the other,
|
||||
but whether that's a win or a loss will depend on the schedule. */
|
||||
/* { dg-final { scan-assembler-not {\tand\t} } } */
|
||||
/* { dg-final { scan-assembler-times {\torr\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b, p[0-9]+\.b} 3 } } */
|
||||
/* { dg-final { scan-assembler-times {\teor\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b, p[0-9]+\.b} 3 } } */
|
||||
/* { dg-final { scan-assembler-times {\tnand\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b, p[0-9]+\.b} 3 } } */
|
||||
/* { dg-final { scan-assembler-times {\tnor\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b, p[0-9]+\.b} 3 } } */
|
||||
/* { dg-final { scan-assembler-times {\tbic\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b, p[0-9]+\.b} 3 } } */
|
||||
/* Currently we predicate one of the comparisons on the result of the other
|
||||
and then use NOT, but the original BIC sequence is better. It's a fairly
|
||||
niche failure though. We'd handle most other types of comparison by
|
||||
using the inverse operation instead of a separate NOT. */
|
||||
/* { dg-final { scan-assembler-times {\tbic\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b, p[0-9]+\.b} 3 { xfail *-*-* } } */
|
||||
/* { dg-final { scan-assembler-times {\torn\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b, p[0-9]+\.b} 3 } } */
|
||||
|
|
216
gcc/testsuite/gcc.target/aarch64/sve/vcond_7.c
Normal file
216
gcc/testsuite/gcc.target/aarch64/sve/vcond_7.c
Normal file
|
@ -0,0 +1,216 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -ftree-vectorize" } */
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#define N 100
|
||||
|
||||
#define eq(A, B) ((A) == (B))
|
||||
#define ne(A, B) ((A) != (B))
|
||||
#define lt(A, B) ((A) < (B))
|
||||
#define le(A, B) ((A) <= (B))
|
||||
#define ge(A, B) ((A) >= (B))
|
||||
#define gt(A, B) ((A) > (B))
|
||||
#define unordered(A, B) (__builtin_isunordered (A, B))
|
||||
|
||||
#define DEF_CONST_LOOP(NAME, SUFFIX, TYPE, CONST) \
|
||||
void __attribute__ ((noipa)) \
|
||||
NAME##_##SUFFIX##_##TYPE (TYPE *restrict dst, TYPE *restrict src) \
|
||||
{ \
|
||||
for (int i = 0; i < N; ++i) \
|
||||
if (NAME (src[i], CONST)) \
|
||||
dst[i] = 1; \
|
||||
}
|
||||
|
||||
#define DEF_LOOP(NAME, TYPE, CONST1, CONST2) \
|
||||
void __attribute__ ((noipa)) \
|
||||
NAME##_var_##TYPE (TYPE *restrict dst, TYPE *restrict src, TYPE x) \
|
||||
{ \
|
||||
for (int i = 0; i < N; ++i) \
|
||||
if (NAME (src[i], x)) \
|
||||
dst[i] = x; \
|
||||
} \
|
||||
DEF_CONST_LOOP (NAME, const1, TYPE, CONST1) \
|
||||
DEF_CONST_LOOP (NAME, const2, TYPE, CONST2)
|
||||
|
||||
#define FOR_EACH_INT_OPERATOR(T, TYPE, CONST1, CONST2) \
|
||||
T (eq, TYPE, CONST1, CONST2) \
|
||||
T (ne, TYPE, CONST1, CONST2) \
|
||||
T (le, TYPE, CONST1, CONST2) \
|
||||
T (lt, TYPE, CONST1, CONST2) \
|
||||
T (gt, TYPE, CONST1, CONST2) \
|
||||
T (ge, TYPE, CONST1, CONST2)
|
||||
|
||||
#define FOR_EACH_FLOAT_OPERATOR(T, TYPE, CONST1, CONST2) \
|
||||
FOR_EACH_INT_OPERATOR(T, TYPE, CONST1, CONST2) \
|
||||
T (unordered, TYPE, CONST1, CONST2)
|
||||
|
||||
#define FOR_EACH_TYPE(T) \
|
||||
FOR_EACH_INT_OPERATOR (T, int8_t, 2, 100) \
|
||||
FOR_EACH_INT_OPERATOR (T, int16_t, 3, 1000) \
|
||||
FOR_EACH_INT_OPERATOR (T, int32_t, 4, 2000) \
|
||||
FOR_EACH_INT_OPERATOR (T, int64_t, 5, 3000) \
|
||||
FOR_EACH_INT_OPERATOR (T, uint8_t, 2, 160) \
|
||||
FOR_EACH_INT_OPERATOR (T, uint16_t, 3, 500) \
|
||||
FOR_EACH_INT_OPERATOR (T, uint32_t, 4, 1500) \
|
||||
FOR_EACH_INT_OPERATOR (T, uint64_t, 5, 2500) \
|
||||
FOR_EACH_FLOAT_OPERATOR (T, _Float16, 0, 1) \
|
||||
FOR_EACH_FLOAT_OPERATOR (T, float, 0, 1) \
|
||||
FOR_EACH_FLOAT_OPERATOR (T, double, 0, 1)
|
||||
|
||||
FOR_EACH_TYPE (DEF_LOOP)
|
||||
|
||||
/* { dg-final { scan-assembler-not {\tand\t} } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 4 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #2\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #3\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #4\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #5\n} 2 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 4 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #2\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #3\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #4\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #5\n} 2 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #1\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #2\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #3\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #4\n} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 3 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 3 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #2\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #3\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #4\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #5\n} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 3 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 3 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #2\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #3\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #4\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #5\n} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #1\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #2\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #3\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #4\n} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tcmplo\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmplo\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmplo\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmplo\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #1\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #2\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #3\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #4\n} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 3 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 3 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #2\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #3\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #4\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #5\n} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 3 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 3 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #2\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #3\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #4\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #5\n} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tcmphs\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmphs\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmphs\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmphs\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #1\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #2\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #3\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #4\n} 1 } } */
|
||||
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #0\.0\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tfcmne\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmne\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmne\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tfcmne\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #0\.0\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmne\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmne\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tfcmle\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmle\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmle\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tfcmle\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #0\.0\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmle\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmle\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #0\.0\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tfcmge\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmge\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmge\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tfcmge\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #0\.0\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmge\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmge\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #0\.0\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 3 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */
|
40
gcc/testsuite/gcc.target/aarch64/sve/vcond_7_run.c
Normal file
40
gcc/testsuite/gcc.target/aarch64/sve/vcond_7_run.c
Normal file
|
@ -0,0 +1,40 @@
|
|||
/* { dg-do run { target aarch64_sve_hw } } */
|
||||
/* { dg-options "-O2 -ftree-vectorize" } */
|
||||
|
||||
#include "vcond_7.c"
|
||||
|
||||
#define TEST_CONST_LOOP(NAME, SUFFIX, TYPE, CONST) \
|
||||
{ \
|
||||
for (int i = 0; i < N; ++i) \
|
||||
{ \
|
||||
dst[i] = i * 3; \
|
||||
src[i] = i % (CONST + 3); \
|
||||
} \
|
||||
NAME##_##SUFFIX##_##TYPE (dst, src); \
|
||||
for (int i = 0; i < N; ++i) \
|
||||
if (dst[i] != (NAME (src[i], CONST) ? (TYPE) 1 : (TYPE) (i * 3))) \
|
||||
__builtin_abort (); \
|
||||
}
|
||||
|
||||
#define TEST_LOOPS(NAME, TYPE, CONST1, CONST2) \
|
||||
{ \
|
||||
TYPE dst[N], src[N]; \
|
||||
for (int i = 0; i < N; ++i) \
|
||||
{ \
|
||||
dst[i] = i * 2; \
|
||||
src[i] = i % 5; \
|
||||
} \
|
||||
NAME##_var_##TYPE (dst, src, 3); \
|
||||
for (int i = 0; i < N; ++i) \
|
||||
if (dst[i] != (NAME (src[i], 3) ? (TYPE) 3 : (TYPE) (i * 2))) \
|
||||
__builtin_abort (); \
|
||||
TEST_CONST_LOOP (NAME, const1, TYPE, CONST1) \
|
||||
TEST_CONST_LOOP (NAME, const2, TYPE, CONST2) \
|
||||
}
|
||||
|
||||
int __attribute__ ((noipa))
|
||||
main (void)
|
||||
{
|
||||
FOR_EACH_TYPE (TEST_LOOPS);
|
||||
return 0;
|
||||
}
|
Loading…
Add table
Reference in a new issue