[PATCH AArch64 1/2] Improve codegen of vector compares inc. tst instruction
gcc/: * config/aarch64/aarch64-builtins.c (aarch64_types_cmtst_qualifiers, TYPES_TST): Define. (aarch64_fold_builtin): Update pattern for cmtst. * config/aarch64/aarch64-protos.h (aarch64_const_vec_all_same_int_p): Declare. * config/aarch64/aarch64-simd-builtins.def (cmtst): Update qualifiers. * config/aarch64/aarch64-simd.md (aarch64_vcond_internal<mode><mode>): Switch operands, separate out more cases, refactor. (aarch64_cmtst<mode>): Rewrite pattern to match (plus ... -1). * config/aarch64.c (aarch64_const_vec_all_same_int_p): Take single argument; rename old version to... (aarch64_const_vec_all_same_in_range_p): ...this. (aarch64_print_operand, aarch64_simd_shift_imm_p): Follow renaming. * config/aarch64/predicates.md (aarch64_simd_imm_minus_one): Define. gcc/testsuite/: * gcc.target/aarch64/simd/int_comparisons.x: New file. * gcc.target/aarch64/simd/int_comparisons_1.c: New test. * gcc.target/aarch64/simd/int_comparisons_2.c: Ditto. From-SVN: r214948
This commit is contained in:
parent
e625e71548
commit
ddeabd3e66
11 changed files with 389 additions and 60 deletions
|
@ -1,3 +1,26 @@
|
|||
2014-09-05 Alan Lawrence <alan.lawrence@arm.com>
|
||||
|
||||
* config/aarch64/aarch64-builtins.c (aarch64_types_cmtst_qualifiers,
|
||||
TYPES_TST): Define.
|
||||
(aarch64_fold_builtin): Update pattern for cmtst.
|
||||
|
||||
* config/aarch64/aarch64-protos.h (aarch64_const_vec_all_same_int_p):
|
||||
Declare.
|
||||
|
||||
* config/aarch64/aarch64-simd-builtins.def (cmtst): Update qualifiers.
|
||||
|
||||
* config/aarch64/aarch64-simd.md (aarch64_vcond_internal<mode><mode>):
|
||||
Switch operands, separate out more cases, refactor.
|
||||
|
||||
(aarch64_cmtst<mode>): Rewrite pattern to match (plus ... -1).
|
||||
|
||||
* config/aarch64.c (aarch64_const_vec_all_same_int_p): Take single
|
||||
argument; rename old version to...
|
||||
(aarch64_const_vec_all_same_in_range_p): ...this.
|
||||
(aarch64_print_operand, aarch64_simd_shift_imm_p): Follow renaming.
|
||||
|
||||
* config/aarch64/predicates.md (aarch64_simd_imm_minus_one): Define.
|
||||
|
||||
2014-09-05 Alan Lawrence <alan.lawrence@arm.com>
|
||||
|
||||
* config/aarch64/aarch64-builtins.c (enum aarch64_type_qualifiers):
|
||||
|
|
|
@ -144,6 +144,11 @@ aarch64_types_binop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|||
= { qualifier_none, qualifier_none, qualifier_maybe_immediate };
|
||||
#define TYPES_BINOP (aarch64_types_binop_qualifiers)
|
||||
static enum aarch64_type_qualifiers
|
||||
aarch64_types_cmtst_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
||||
= { qualifier_none, qualifier_none, qualifier_none,
|
||||
qualifier_internal, qualifier_internal };
|
||||
#define TYPES_TST (aarch64_types_cmtst_qualifiers)
|
||||
static enum aarch64_type_qualifiers
|
||||
aarch64_types_binopv_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
||||
= { qualifier_void, qualifier_none, qualifier_none };
|
||||
#define TYPES_BINOPV (aarch64_types_binopv_qualifiers)
|
||||
|
@ -1285,7 +1290,7 @@ aarch64_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, tree *args,
|
|||
BUILTIN_VALLDI (BINOP, cmeq, 0)
|
||||
return fold_build2 (EQ_EXPR, type, args[0], args[1]);
|
||||
break;
|
||||
BUILTIN_VSDQ_I_DI (BINOP, cmtst, 0)
|
||||
BUILTIN_VSDQ_I_DI (TST, cmtst, 0)
|
||||
{
|
||||
tree and_node = fold_build2 (BIT_AND_EXPR, type, args[0], args[1]);
|
||||
tree vec_zero_node = build_zero_cst (type);
|
||||
|
|
|
@ -180,6 +180,7 @@ bool aarch64_cannot_change_mode_class (enum machine_mode,
|
|||
enum reg_class);
|
||||
enum aarch64_symbol_type
|
||||
aarch64_classify_symbolic_expression (rtx, enum aarch64_symbol_context);
|
||||
bool aarch64_const_vec_all_same_int_p (rtx, HOST_WIDE_INT);
|
||||
bool aarch64_constant_address_p (rtx);
|
||||
bool aarch64_expand_movmem (rtx *);
|
||||
bool aarch64_float_const_zero_rtx_p (rtx);
|
||||
|
|
|
@ -246,7 +246,7 @@
|
|||
/* Implemented by aarch64_cm<cmp><mode>. */
|
||||
BUILTIN_VSDQ_I_DI (BINOP, cmgeu, 0)
|
||||
BUILTIN_VSDQ_I_DI (BINOP, cmgtu, 0)
|
||||
BUILTIN_VSDQ_I_DI (BINOP, cmtst, 0)
|
||||
BUILTIN_VSDQ_I_DI (TST, cmtst, 0)
|
||||
|
||||
/* Implemented by reduc_<sur>plus_<mode>. */
|
||||
BUILTIN_VALL (UNOP, reduc_splus_, 10)
|
||||
|
|
|
@ -1912,58 +1912,94 @@
|
|||
(match_operand:VDQ 2 "nonmemory_operand")))]
|
||||
"TARGET_SIMD"
|
||||
{
|
||||
int inverse = 0, has_zero_imm_form = 0;
|
||||
rtx op1 = operands[1];
|
||||
rtx op2 = operands[2];
|
||||
rtx mask = gen_reg_rtx (<MODE>mode);
|
||||
enum rtx_code code = GET_CODE (operands[3]);
|
||||
|
||||
switch (GET_CODE (operands[3]))
|
||||
/* Switching OP1 and OP2 is necessary for NE (to output a cmeq insn),
|
||||
and desirable for other comparisons if it results in FOO ? -1 : 0
|
||||
(this allows direct use of the comparison result without a bsl). */
|
||||
if (code == NE
|
||||
|| (code != EQ
|
||||
&& op1 == CONST0_RTX (<V_cmp_result>mode)
|
||||
&& op2 == CONSTM1_RTX (<V_cmp_result>mode)))
|
||||
{
|
||||
op1 = operands[2];
|
||||
op2 = operands[1];
|
||||
switch (code)
|
||||
{
|
||||
case LE: code = GT; break;
|
||||
case LT: code = GE; break;
|
||||
case GE: code = LT; break;
|
||||
case GT: code = LE; break;
|
||||
/* No case EQ. */
|
||||
case NE: code = EQ; break;
|
||||
case LTU: code = GEU; break;
|
||||
case LEU: code = GTU; break;
|
||||
case GTU: code = LEU; break;
|
||||
case GEU: code = LTU; break;
|
||||
default: gcc_unreachable ();
|
||||
}
|
||||
}
|
||||
|
||||
/* Make sure we can handle the last operand. */
|
||||
switch (code)
|
||||
{
|
||||
case NE:
|
||||
/* Normalized to EQ above. */
|
||||
gcc_unreachable ();
|
||||
|
||||
case LE:
|
||||
case LT:
|
||||
case NE:
|
||||
inverse = 1;
|
||||
/* Fall through. */
|
||||
case GE:
|
||||
case GT:
|
||||
case EQ:
|
||||
has_zero_imm_form = 1;
|
||||
break;
|
||||
case LEU:
|
||||
case LTU:
|
||||
inverse = 1;
|
||||
break;
|
||||
/* These instructions have a form taking an immediate zero. */
|
||||
if (operands[5] == CONST0_RTX (<MODE>mode))
|
||||
break;
|
||||
/* Fall through, as may need to load into register. */
|
||||
default:
|
||||
if (!REG_P (operands[5]))
|
||||
operands[5] = force_reg (<MODE>mode, operands[5]);
|
||||
break;
|
||||
}
|
||||
|
||||
if (!REG_P (operands[5])
|
||||
&& (operands[5] != CONST0_RTX (<MODE>mode) || !has_zero_imm_form))
|
||||
operands[5] = force_reg (<MODE>mode, operands[5]);
|
||||
|
||||
switch (GET_CODE (operands[3]))
|
||||
switch (code)
|
||||
{
|
||||
case LT:
|
||||
emit_insn (gen_aarch64_cmlt<mode> (mask, operands[4], operands[5]));
|
||||
break;
|
||||
|
||||
case GE:
|
||||
emit_insn (gen_aarch64_cmge<mode> (mask, operands[4], operands[5]));
|
||||
break;
|
||||
|
||||
case LE:
|
||||
emit_insn (gen_aarch64_cmle<mode> (mask, operands[4], operands[5]));
|
||||
break;
|
||||
|
||||
case GT:
|
||||
emit_insn (gen_aarch64_cmgt<mode> (mask, operands[4], operands[5]));
|
||||
break;
|
||||
|
||||
case LTU:
|
||||
emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[5], operands[4]));
|
||||
break;
|
||||
|
||||
case GEU:
|
||||
emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[4], operands[5]));
|
||||
break;
|
||||
|
||||
case LEU:
|
||||
emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[5], operands[4]));
|
||||
break;
|
||||
|
||||
case GTU:
|
||||
emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[4], operands[5]));
|
||||
break;
|
||||
|
||||
case NE:
|
||||
/* NE has been normalized to EQ above. */
|
||||
case EQ:
|
||||
emit_insn (gen_aarch64_cmeq<mode> (mask, operands[4], operands[5]));
|
||||
break;
|
||||
|
@ -1972,12 +2008,6 @@
|
|||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
if (inverse)
|
||||
{
|
||||
op1 = operands[2];
|
||||
op2 = operands[1];
|
||||
}
|
||||
|
||||
/* If we have (a = (b CMP c) ? -1 : 0);
|
||||
Then we can simply move the generated mask. */
|
||||
|
||||
|
@ -3932,14 +3962,22 @@
|
|||
|
||||
;; cmtst
|
||||
|
||||
;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
|
||||
;; we don't have any insns using ne, and aarch64_vcond_internal outputs
|
||||
;; not (neg (eq (and x y) 0))
|
||||
;; which is rewritten by simplify_rtx as
|
||||
;; plus (eq (and x y) 0) -1.
|
||||
|
||||
(define_insn "aarch64_cmtst<mode>"
|
||||
[(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
|
||||
(neg:<V_cmp_result>
|
||||
(ne:<V_cmp_result>
|
||||
(plus:<V_cmp_result>
|
||||
(eq:<V_cmp_result>
|
||||
(and:VDQ
|
||||
(match_operand:VDQ 1 "register_operand" "w")
|
||||
(match_operand:VDQ 2 "register_operand" "w"))
|
||||
(vec_duplicate:<V_cmp_result> (const_int 0)))))]
|
||||
(match_operand:VDQ 3 "aarch64_simd_imm_zero"))
|
||||
(match_operand:<V_cmp_result> 4 "aarch64_simd_imm_minus_one")))
|
||||
]
|
||||
"TARGET_SIMD"
|
||||
"cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
|
||||
[(set_attr "type" "neon_tst<q>")]
|
||||
|
|
|
@ -137,9 +137,6 @@ static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
|
|||
static void aarch64_override_options_after_change (void);
|
||||
static bool aarch64_vector_mode_supported_p (enum machine_mode);
|
||||
static unsigned bit_count (unsigned HOST_WIDE_INT);
|
||||
static bool aarch64_const_vec_all_same_int_p (rtx,
|
||||
HOST_WIDE_INT, HOST_WIDE_INT);
|
||||
|
||||
static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
|
||||
const unsigned char *sel);
|
||||
static int aarch64_address_cost (rtx, enum machine_mode, addr_space_t, bool);
|
||||
|
@ -3576,6 +3573,36 @@ aarch64_get_condition_code (rtx x)
|
|||
}
|
||||
}
|
||||
|
||||
bool
|
||||
aarch64_const_vec_all_same_in_range_p (rtx x,
|
||||
HOST_WIDE_INT minval,
|
||||
HOST_WIDE_INT maxval)
|
||||
{
|
||||
HOST_WIDE_INT firstval;
|
||||
int count, i;
|
||||
|
||||
if (GET_CODE (x) != CONST_VECTOR
|
||||
|| GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
|
||||
return false;
|
||||
|
||||
firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
|
||||
if (firstval < minval || firstval > maxval)
|
||||
return false;
|
||||
|
||||
count = CONST_VECTOR_NUNITS (x);
|
||||
for (i = 1; i < count; i++)
|
||||
if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
aarch64_const_vec_all_same_int_p (rtx x, HOST_WIDE_INT val)
|
||||
{
|
||||
return aarch64_const_vec_all_same_in_range_p (x, val, val);
|
||||
}
|
||||
|
||||
static unsigned
|
||||
bit_count (unsigned HOST_WIDE_INT value)
|
||||
{
|
||||
|
@ -3827,9 +3854,10 @@ aarch64_print_operand (FILE *f, rtx x, char code)
|
|||
case CONST_VECTOR:
|
||||
if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
|
||||
{
|
||||
gcc_assert (aarch64_const_vec_all_same_int_p (x,
|
||||
HOST_WIDE_INT_MIN,
|
||||
HOST_WIDE_INT_MAX));
|
||||
gcc_assert (
|
||||
aarch64_const_vec_all_same_in_range_p (x,
|
||||
HOST_WIDE_INT_MIN,
|
||||
HOST_WIDE_INT_MAX));
|
||||
asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
|
||||
}
|
||||
else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
|
||||
|
@ -7732,39 +7760,15 @@ aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
|
|||
#undef CHECK
|
||||
}
|
||||
|
||||
static bool
|
||||
aarch64_const_vec_all_same_int_p (rtx x,
|
||||
HOST_WIDE_INT minval,
|
||||
HOST_WIDE_INT maxval)
|
||||
{
|
||||
HOST_WIDE_INT firstval;
|
||||
int count, i;
|
||||
|
||||
if (GET_CODE (x) != CONST_VECTOR
|
||||
|| GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
|
||||
return false;
|
||||
|
||||
firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
|
||||
if (firstval < minval || firstval > maxval)
|
||||
return false;
|
||||
|
||||
count = CONST_VECTOR_NUNITS (x);
|
||||
for (i = 1; i < count; i++)
|
||||
if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Check of immediate shift constants are within range. */
|
||||
bool
|
||||
aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
|
||||
{
|
||||
int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
|
||||
if (left)
|
||||
return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
|
||||
return aarch64_const_vec_all_same_in_range_p (x, 0, bit_width - 1);
|
||||
else
|
||||
return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
|
||||
return aarch64_const_vec_all_same_in_range_p (x, 1, bit_width);
|
||||
}
|
||||
|
||||
/* Return true if X is a uniform vector where all elements
|
||||
|
|
|
@ -273,3 +273,9 @@
|
|||
{
|
||||
return aarch64_simd_imm_zero_p (op, mode);
|
||||
})
|
||||
|
||||
(define_special_predicate "aarch64_simd_imm_minus_one"
|
||||
(match_code "const_vector")
|
||||
{
|
||||
return aarch64_const_vec_all_same_int_p (op, -1);
|
||||
})
|
||||
|
|
|
@ -1,3 +1,9 @@
|
|||
2014-09-05 Alan Lawrence <alan.lawrence@arm.com>
|
||||
|
||||
* gcc.target/aarch64/simd/int_comparisons.x: New file.
|
||||
* gcc.target/aarch64/simd/int_comparisons_1.c: New test.
|
||||
* gcc.target/aarch64/simd/int_comparisons_2.c: Ditto.
|
||||
|
||||
2014-09-05 Alan Lawrence <alan.lawrence@arm.com>
|
||||
|
||||
* gcc.target/aarch64/simd/vrbit_1.c: New test.
|
||||
|
|
68
gcc/testsuite/gcc.target/aarch64/simd/int_comparisons.x
Normal file
68
gcc/testsuite/gcc.target/aarch64/simd/int_comparisons.x
Normal file
|
@ -0,0 +1,68 @@
|
|||
/* test_vcXXX wrappers for all the vcXXX (vector compare) and vtst intrinsics
|
||||
in arm_neon.h (excluding the 64x1 variants as these generally produce scalar
|
||||
not vector ops). */
|
||||
#include "arm_neon.h"
|
||||
|
||||
#define DONT_FORCE(X)
|
||||
|
||||
#define FORCE_SIMD(V1) asm volatile ("mov %d0, %1.d[0]" \
|
||||
: "=w"(V1) \
|
||||
: "w"(V1) \
|
||||
: /* No clobbers */);
|
||||
|
||||
#define OP1(SIZE, OP, BASETYPE, SUFFIX, FORCE) uint##SIZE##_t \
|
||||
test_v##OP##SUFFIX (BASETYPE##SIZE##_t a) \
|
||||
{ \
|
||||
uint##SIZE##_t res; \
|
||||
FORCE (a); \
|
||||
res = v##OP##SUFFIX (a); \
|
||||
FORCE (res); \
|
||||
return res; \
|
||||
}
|
||||
|
||||
#define OP2(SIZE, OP, BASETYPE, SUFFIX, FORCE) uint##SIZE##_t \
|
||||
test_v##OP##SUFFIX (BASETYPE##SIZE##_t a, BASETYPE##SIZE##_t b) \
|
||||
{ \
|
||||
uint##SIZE##_t res; \
|
||||
FORCE (a); \
|
||||
FORCE (b); \
|
||||
res = v##OP##SUFFIX (a, b); \
|
||||
FORCE (res); \
|
||||
return res; \
|
||||
}
|
||||
|
||||
#define UNSIGNED_OPS(SIZE, BASETYPE, SUFFIX, FORCE) \
|
||||
OP2 (SIZE, tst, BASETYPE, SUFFIX, FORCE) \
|
||||
OP1 (SIZE, ceqz, BASETYPE, SUFFIX, FORCE) \
|
||||
OP2 (SIZE, ceq, BASETYPE, SUFFIX, FORCE) \
|
||||
OP2 (SIZE, cge, BASETYPE, SUFFIX, FORCE) \
|
||||
OP2 (SIZE, cgt, BASETYPE, SUFFIX, FORCE) \
|
||||
OP2 (SIZE, cle, BASETYPE, SUFFIX, FORCE) \
|
||||
OP2 (SIZE, clt, BASETYPE, SUFFIX, FORCE)
|
||||
|
||||
#define ALL_OPS(SIZE, BASETYPE, SUFFIX, FORCE) \
|
||||
OP1 (SIZE, cgez, BASETYPE, SUFFIX, FORCE) \
|
||||
OP1 (SIZE, cgtz, BASETYPE, SUFFIX, FORCE) \
|
||||
OP1 (SIZE, clez, BASETYPE, SUFFIX, FORCE) \
|
||||
OP1 (SIZE, cltz, BASETYPE, SUFFIX, FORCE) \
|
||||
UNSIGNED_OPS (SIZE, BASETYPE, SUFFIX, FORCE)
|
||||
|
||||
ALL_OPS (8x8, int, _s8, DONT_FORCE)
|
||||
ALL_OPS (16x4, int, _s16, DONT_FORCE)
|
||||
ALL_OPS (32x2, int, _s32, DONT_FORCE)
|
||||
ALL_OPS (64x1, int, _s64, DONT_FORCE)
|
||||
ALL_OPS (64, int, d_s64, FORCE_SIMD)
|
||||
ALL_OPS (8x16, int, q_s8, DONT_FORCE)
|
||||
ALL_OPS (16x8, int, q_s16, DONT_FORCE)
|
||||
ALL_OPS (32x4, int, q_s32, DONT_FORCE)
|
||||
ALL_OPS (64x2, int, q_s64, DONT_FORCE)
|
||||
UNSIGNED_OPS (8x8, uint, _u8, DONT_FORCE)
|
||||
UNSIGNED_OPS (16x4, uint, _u16, DONT_FORCE)
|
||||
UNSIGNED_OPS (32x2, uint, _u32, DONT_FORCE)
|
||||
UNSIGNED_OPS (64x1, uint, _u64, DONT_FORCE)
|
||||
UNSIGNED_OPS (64, uint, d_u64, FORCE_SIMD)
|
||||
UNSIGNED_OPS (8x16, uint, q_u8, DONT_FORCE)
|
||||
UNSIGNED_OPS (16x8, uint, q_u16, DONT_FORCE)
|
||||
UNSIGNED_OPS (32x4, uint, q_u32, DONT_FORCE)
|
||||
UNSIGNED_OPS (64x2, uint, q_u64, DONT_FORCE)
|
||||
|
47
gcc/testsuite/gcc.target/aarch64/simd/int_comparisons_1.c
Normal file
47
gcc/testsuite/gcc.target/aarch64/simd/int_comparisons_1.c
Normal file
|
@ -0,0 +1,47 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O3 -fno-inline" } */
|
||||
|
||||
/* Scan-assembler test, so, incorporate as little other code as possible. */
|
||||
|
||||
#include "arm_neon.h"
|
||||
#include "int_comparisons.x"
|
||||
|
||||
/* Operations on all 18 integer types: (q?)_[su](8|16|32|64), d_[su]64.
|
||||
(d?)_[us]64 generate regs of form 'd0' rather than e.g. 'v0.2d'. */
|
||||
/* { dg-final { scan-assembler-times "\[ \t\]cmeq\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*#?0" 14 } } */
|
||||
/* { dg-final { scan-assembler-times "\[ \t\]cmeq\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]*#?0" 4 } } */
|
||||
/* { dg-final { scan-assembler-times "\[ \t\]cmeq\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\]" 14 } } */
|
||||
/* { dg-final { scan-assembler-times "\[ \t\]cmeq\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]+d\[0-9\]+" 4 } } */
|
||||
/* { dg-final { scan-assembler-times "\[ \t\]cmtst\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\]" 14 } } */
|
||||
/* { dg-final { scan-assembler-times "\[ \t\]cmtst\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]+d\[0-9\]+" 4 } } */
|
||||
|
||||
/* vcge + vcle both implemented with cmge (signed) or cmhs (unsigned). */
|
||||
/* { dg-final { scan-assembler-times "\[ \t\]cmge\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\]" 14 } } */
|
||||
/* { dg-final { scan-assembler-times "\[ \t\]cmge\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]+d\[0-9\]+" 4 } } */
|
||||
/* { dg-final { scan-assembler-times "\[ \t\]cmhs\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\]" 14 } } */
|
||||
/* { dg-final { scan-assembler-times "\[ \t\]cmhs\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]+d\[0-9\]+" 4 } } */
|
||||
|
||||
/* vcgt + vclt both implemented with cmgt (signed) or cmhi (unsigned). */
|
||||
/* { dg-final { scan-assembler-times "\[ \t\]cmgt\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\]" 14 } } */
|
||||
/* { dg-final { scan-assembler-times "\[ \t\]cmgt\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]+d\[0-9\]+" 4 } } */
|
||||
/* { dg-final { scan-assembler-times "\[ \t\]cmhi\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\]" 14 } } */
|
||||
/* { dg-final { scan-assembler-times "\[ \t\]cmhi\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]+d\[0-9\]+" 4 } } */
|
||||
|
||||
/* Comparisons against immediate zero, on the 8 signed integer types only. */
|
||||
|
||||
/* { dg-final { scan-assembler-times "\[ \t\]cmge\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*#?0" 7 } } */
|
||||
/* For int64_t and int64x1_t, combine_simplify_rtx failure of
|
||||
https://gcc.gnu.org/ml/gcc/2014-06/msg00253.html
|
||||
prevents generation of cmge....#0, instead producing mvn + sshr. */
|
||||
/* { #dg-final { scan-assembler-times "\[ \t\]cmge\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]*#?0" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "\[ \t\]cmgt\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*#?0" 7 } } */
|
||||
/* { dg-final { scan-assembler-times "\[ \t\]cmgt\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]*#?0" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "\[ \t\]cmle\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*#?0" 7 } } */
|
||||
/* { dg-final { scan-assembler-times "\[ \t\]cmle\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]*#?0" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "\[ \t\]cmlt\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*#?0" 7 } } */
|
||||
/* For int64_t and int64x1_t, cmlt ... #0 and sshr ... #63 are equivalent,
|
||||
so allow either. cmgez issue above results in extra 2 * sshr....63. */
|
||||
/* { dg-final { scan-assembler-times "\[ \t\](?:cmlt|sshr)\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]*#?(?:0|63)" 4 } } */
|
||||
|
||||
// All should have been compiled into single insns without inverting result:
|
||||
/* { dg-final { scan-assembler-not "not" } } */
|
131
gcc/testsuite/gcc.target/aarch64/simd/int_comparisons_2.c
Normal file
131
gcc/testsuite/gcc.target/aarch64/simd/int_comparisons_2.c
Normal file
|
@ -0,0 +1,131 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -fno-inline" } */
|
||||
/* Stops the test_xxx methods being inlined into main, thus preventing constant
|
||||
propagation. */
|
||||
|
||||
#include "int_comparisons.x"
|
||||
|
||||
extern void abort (void);
|
||||
|
||||
#define CHECK2(R0, R1) if (res[0] != R0 || res[1] != R1) abort ()
|
||||
|
||||
#define TEST2(BASETYPE, SUFFIX, RESTYPE, ST1_SUFFIX) { \
|
||||
BASETYPE##_t _a[2] = {2, 3}; \
|
||||
BASETYPE##x2_t a = vld1##SUFFIX (_a); \
|
||||
BASETYPE##_t _b[2] = {1, 3}; \
|
||||
BASETYPE##x2_t b = vld1##SUFFIX (_b); \
|
||||
RESTYPE res[2]; \
|
||||
vst1##ST1_SUFFIX (res, test_vclt##SUFFIX (a, b)); CHECK2 (0, 0); \
|
||||
vst1##ST1_SUFFIX (res, test_vclt##SUFFIX (b, a)); CHECK2 (-1, 0); \
|
||||
vst1##ST1_SUFFIX (res, test_vcle##SUFFIX (a, b)); CHECK2 (0, -1); \
|
||||
vst1##ST1_SUFFIX (res, test_vcle##SUFFIX (b, a)); CHECK2 (-1, -1); \
|
||||
vst1##ST1_SUFFIX (res, test_vceq##SUFFIX (a, b)); CHECK2 (0, -1); \
|
||||
vst1##ST1_SUFFIX (res, test_vcge##SUFFIX (a, b)); CHECK2 (-1, -1); \
|
||||
vst1##ST1_SUFFIX (res, test_vcge##SUFFIX (b, a)); CHECK2 (0, -1); \
|
||||
vst1##ST1_SUFFIX (res, test_vcgt##SUFFIX (a, b)); CHECK2 (-1, 0); \
|
||||
vst1##ST1_SUFFIX (res, test_vcgt##SUFFIX (b, a)); CHECK2 (0, 0); \
|
||||
vst1##ST1_SUFFIX (res, test_vtst##SUFFIX (a, b)); CHECK2 (0, -1); \
|
||||
vst1##ST1_SUFFIX (res, test_vtst##SUFFIX (a + 1, b)); CHECK2 (-1, 0); \
|
||||
}
|
||||
|
||||
#define CHECK4(T, R0, R1, R2, R3) \
|
||||
if (res[0] != (T)R0 || res[1] != (T)R1 \
|
||||
|| res[2] != (T)R2 || res[3] != (T)R3) abort ()
|
||||
|
||||
#define TEST4(BASETYPE, SUFFIX, RESTYPE, ST1_SUFFIX) { \
|
||||
BASETYPE##_t _a[4] = {1, 2, 3, 4}; \
|
||||
BASETYPE##x4_t a = vld1##SUFFIX (_a); \
|
||||
BASETYPE##_t _b[4] = {4, 2, 1, 3}; \
|
||||
BASETYPE##x4_t b = vld1##SUFFIX (_b); \
|
||||
RESTYPE res[4]; \
|
||||
vst1##ST1_SUFFIX (res, test_vclt##SUFFIX (a, b)); \
|
||||
CHECK4 (RESTYPE, -1, 0, 0, 0); \
|
||||
vst1##ST1_SUFFIX (res, test_vcle##SUFFIX (a, b)); \
|
||||
CHECK4 (RESTYPE, -1, -1, 0, 0); \
|
||||
vst1##ST1_SUFFIX (res, test_vceq##SUFFIX (a, b)); \
|
||||
CHECK4 (RESTYPE, 0, -1, 0, 0); \
|
||||
vst1##ST1_SUFFIX (res, test_vcge##SUFFIX (a, b)); \
|
||||
CHECK4 (RESTYPE, 0, -1, -1, -1); \
|
||||
vst1##ST1_SUFFIX (res, test_vcgt##SUFFIX (a, b)); \
|
||||
CHECK4 (RESTYPE, 0, 0, -1, -1); \
|
||||
vst1##ST1_SUFFIX (res, test_vtst##SUFFIX (a, b)); \
|
||||
CHECK4 (RESTYPE, 0, -1, -1, 0); \
|
||||
}
|
||||
|
||||
#define CHECK8(T, R0, R1, R2, R3, R4, R5, R6, R7) \
|
||||
if (res[0] != (T)R0 || res[1] != (T)R1 || res[2] != (T)R2 || res[3] != (T)R3 \
|
||||
|| res[4] != (T)R4 || res[5] != (T)R5 || res[6] != (T)R6 \
|
||||
|| res[7] != (T)R7) abort ()
|
||||
|
||||
#define TEST8(BASETYPE, SUFFIX, RESTYPE, ST1_SUFFIX) { \
|
||||
BASETYPE##_t _a[8] = {1, 2, 3, 4, 5, 6, 7, 8}; \
|
||||
BASETYPE##x8_t a = vld1##SUFFIX (_a); \
|
||||
BASETYPE##_t _b[8] = {4, 2, 1, 3, 2, 6, 8, 9}; \
|
||||
BASETYPE##x8_t b = vld1##SUFFIX (_b); \
|
||||
RESTYPE res[8]; \
|
||||
vst1##ST1_SUFFIX (res, test_vclt##SUFFIX (a, b)); \
|
||||
CHECK8 (RESTYPE, -1, 0, 0, 0, 0, 0, -1, -1); \
|
||||
vst1##ST1_SUFFIX (res, test_vcle##SUFFIX (a, b)); \
|
||||
CHECK8 (RESTYPE, -1, -1, 0, 0, 0, -1, -1, -1); \
|
||||
vst1##ST1_SUFFIX (res, test_vceq##SUFFIX (a, b)); \
|
||||
CHECK8 (RESTYPE, 0, -1, 0, 0, 0, -1, 0, 0); \
|
||||
vst1##ST1_SUFFIX (res, test_vcge##SUFFIX (a, b)); \
|
||||
CHECK8 (RESTYPE, 0, -1, -1, -1, -1, -1, 0, 0); \
|
||||
vst1##ST1_SUFFIX (res, test_vcgt##SUFFIX (a, b)); \
|
||||
CHECK8 (RESTYPE, 0, 0, -1, -1, -1, 0, 0, 0); \
|
||||
vst1##ST1_SUFFIX (res, test_vtst##SUFFIX (a, b)); \
|
||||
CHECK8 (RESTYPE, 0, -1, -1, 0, 0, -1, 0, -1); \
|
||||
}
|
||||
|
||||
/* 16-way tests use same 8 values twice. */
|
||||
#define CHECK16(T, R0, R1, R2, R3, R4, R5, R6, R7) \
|
||||
if (res[0] != (T)R0 || res[1] != (T)R1 || res[2] != (T)R2 || res[3] != (T)R3 \
|
||||
|| res[4] != (T)R4 || res[5] != (T)R5 || res[6] != (T)R6 \
|
||||
|| res[7] != (T)R7 || res[8] != (T)R0 || res[9] != (T)R1 \
|
||||
|| res[10] != (T)R2 || res[11] != (T)R3 || res[12] != (T)R4 \
|
||||
|| res[13] != (T)R5 || res[14] != (T)R6 || res[15] != (T)R7) abort ()
|
||||
|
||||
#define TEST16(BASETYPE, SUFFIX, RESTYPE, ST1_SUFFIX) { \
|
||||
BASETYPE##_t _a[16] = {1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8}; \
|
||||
BASETYPE##x16_t a = vld1##SUFFIX (_a); \
|
||||
BASETYPE##_t _b[16] = {4, 2, 1, 3, 2, 6, 8, 9, 4, 2, 1, 3, 2, 6, 8, 9}; \
|
||||
BASETYPE##x16_t b = vld1##SUFFIX (_b); \
|
||||
RESTYPE res[16]; \
|
||||
vst1##ST1_SUFFIX (res, test_vclt##SUFFIX (a, b)); \
|
||||
CHECK16 (RESTYPE, -1, 0, 0, 0, 0, 0, -1, -1); \
|
||||
vst1##ST1_SUFFIX (res, test_vcle##SUFFIX (a, b)); \
|
||||
CHECK16 (RESTYPE, -1, -1, 0, 0, 0, -1, -1, -1); \
|
||||
vst1##ST1_SUFFIX (res, test_vceq##SUFFIX (a, b)); \
|
||||
CHECK16 (RESTYPE, 0, -1, 0, 0, 0, -1, 0, 0); \
|
||||
vst1##ST1_SUFFIX (res, test_vcge##SUFFIX (a, b)); \
|
||||
CHECK16 (RESTYPE, 0, -1, -1, -1, -1, -1, 0, 0); \
|
||||
vst1##ST1_SUFFIX (res, test_vcgt##SUFFIX (a, b)); \
|
||||
CHECK16 (RESTYPE, 0, 0, -1, -1, -1, 0, 0, 0); \
|
||||
vst1##ST1_SUFFIX (res, test_vtst##SUFFIX (a, b)); \
|
||||
CHECK16 (RESTYPE, 0, -1, -1, 0, 0, -1, 0, -1); \
|
||||
}
|
||||
|
||||
int
|
||||
main (int argc, char **argv)
|
||||
{
|
||||
TEST2 (int32, _s32, uint32_t, _u32);
|
||||
TEST2 (uint32, _u32, uint32_t, _u32);
|
||||
TEST2 (int64, q_s64, uint64_t, q_u64);
|
||||
TEST2 (uint64, q_u64, uint64_t, q_u64);
|
||||
|
||||
TEST4 (int16, _s16, uint16_t, _u16);
|
||||
TEST4 (uint16, _u16, uint16_t, _u16);
|
||||
TEST4 (int32, q_s32, uint32_t, q_u32);
|
||||
TEST4 (uint32, q_u32, uint32_t, q_u32);
|
||||
|
||||
TEST8 (int8, _s8, uint8_t, _u8);
|
||||
TEST8 (uint8, _u8, uint8_t, _u8);
|
||||
TEST8 (int16, q_s16, uint16_t, q_u16);
|
||||
TEST8 (uint16, q_u16, uint16_t, q_u16);
|
||||
|
||||
TEST16 (int8, q_s8, uint8_t, q_u8);
|
||||
TEST16 (uint8, q_u8, uint8_t, q_u8);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
Loading…
Add table
Reference in a new issue