[Arm] Implement scalar Custom Datapath Extension intrinsics
This patch introduces the scalar CDE (Custom Datapath Extension) intrinsics for the arm backend. There is nothing beyond the standard in this patch. We simply build upon what has been done by Dennis for the vector intrinsics. We do add `+cdecp6` to the default arguments for `target-supports.exp`, this allows for using coprocessor 6 in tests. This patch uses an alternate coprocessor to ease assembler scanning by looking for a use of coprocessor 6. We also ensure that any DImode registers are put in an even-odd register pair when compiling for a target with CDE -- this avoids faulty code generation for -Os when producing the cx*d instructions. Testing done: Bootstrapped and regtested for arm-none-linux-gnueabihf. gcc/ChangeLog: 2020-03-03 Matthew Malcomson <matthew.malcomson@arm.com> * config/arm/arm.c (arm_hard_regno_mode_ok): DImode registers forced into even-odd register pairs for TARGET_CDE. * config/arm/arm.h (ARM_CCDE_CONST_1): New. (ARM_CCDE_CONST_2): New. (ARM_CCDE_CONST_3): New. * config/arm/arm.md (arm_cx1si, arm_cx1di arm_cx1asi, arm_cx1adi, arm_cx2si, arm_cx2di arm_cx2asi, arm_cx2adi arm_cx3si, arm_cx3di, arm_cx3asi, arm_cx3adi): New patterns. * config/arm/arm_cde.h (__arm_cx1, __arm_cx1a, __arm_cx2, __arm_cx2a, __arm_cx3, __arm_cx3a, __arm_cx1d, __arm_cx1da, __arm_cx2d, __arm_cx2da, __arm_cx3d, __arm_cx3da): New ACLE function macros. * config/arm/arm_cde_builtins.def (cx1, cx1a, cx2, cx2a, cx3, cx3a): Define intrinsics. * config/arm/iterators.md (cde_suffix, cde_dest): New mode attributes. * config/arm/predicates.md (const_int_ccde1_operand, const_int_ccde2_operand, const_int_ccde3_operand): New. * config/arm/unspecs.md (UNSPEC_CDE, UNSPEC_CDEA): New. gcc/testsuite/ChangeLog: 2020-03-03 Matthew Malcomson <matthew.malcomson@arm.com> * gcc.target/arm/acle/cde-errors.c: New test. * gcc.target/arm/acle/cde.c: New test. * lib/target-supports.exp: Update CDE flags to enable coprocessor 6.
This commit is contained in:
parent
07b9bfd02b
commit
a5f3c89e1b
11 changed files with 479 additions and 5 deletions
|
@ -25057,10 +25057,11 @@ arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
|
|||
if (ARM_NUM_REGS (mode) > 4)
|
||||
return false;
|
||||
|
||||
if (TARGET_THUMB2 && !TARGET_HAVE_MVE)
|
||||
if (TARGET_THUMB2 && !(TARGET_HAVE_MVE || TARGET_CDE))
|
||||
return true;
|
||||
|
||||
return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
|
||||
return !((TARGET_LDRD || TARGET_CDE)
|
||||
&& GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
|
||||
}
|
||||
|
||||
if (regno == FRAME_POINTER_REGNUM
|
||||
|
|
|
@ -576,6 +576,9 @@ extern int arm_arch_cde;
|
|||
extern int arm_arch_cde_coproc;
|
||||
extern const int arm_arch_cde_coproc_bits[];
|
||||
#define ARM_CDE_CONST_COPROC 7
|
||||
#define ARM_CCDE_CONST_1 ((1 << 13) - 1)
|
||||
#define ARM_CCDE_CONST_2 ((1 << 9 ) - 1)
|
||||
#define ARM_CCDE_CONST_3 ((1 << 6 ) - 1)
|
||||
#define ARM_VCDE_CONST_1 ((1 << 11) - 1)
|
||||
#define ARM_VCDE_CONST_2 ((1 << 6 ) - 1)
|
||||
#define ARM_VCDE_CONST_3 ((1 << 3 ) - 1)
|
||||
|
|
|
@ -4408,6 +4408,76 @@
|
|||
(set_attr "shift" "3")
|
||||
(set_attr "type" "logic_shift_reg")])
|
||||
|
||||
;; Custom Datapath Extension insns.
|
||||
(define_insn "arm_cx1<mode>"
|
||||
[(set (match_operand:SIDI 0 "s_register_operand" "=r")
|
||||
(unspec:SIDI [(match_operand:SI 1 "const_int_coproc_operand" "i")
|
||||
(match_operand:SI 2 "const_int_ccde1_operand" "i")]
|
||||
UNSPEC_CDE))]
|
||||
"TARGET_CDE"
|
||||
"cx1<cde_suffix>\\tp%c1, <cde_dest>, %2"
|
||||
[(set_attr "type" "coproc")]
|
||||
)
|
||||
|
||||
(define_insn "arm_cx1a<mode>"
|
||||
[(set (match_operand:SIDI 0 "s_register_operand" "=r")
|
||||
(unspec:SIDI [(match_operand:SI 1 "const_int_coproc_operand" "i")
|
||||
(match_operand:SIDI 2 "s_register_operand" "0")
|
||||
(match_operand:SI 3 "const_int_ccde1_operand" "i")]
|
||||
UNSPEC_CDEA))]
|
||||
"TARGET_CDE"
|
||||
"cx1<cde_suffix>a\\tp%c1, <cde_dest>, %3"
|
||||
[(set_attr "type" "coproc")]
|
||||
)
|
||||
|
||||
(define_insn "arm_cx2<mode>"
|
||||
[(set (match_operand:SIDI 0 "s_register_operand" "=r")
|
||||
(unspec:SIDI [(match_operand:SI 1 "const_int_coproc_operand" "i")
|
||||
(match_operand:SI 2 "s_register_operand" "r")
|
||||
(match_operand:SI 3 "const_int_ccde2_operand" "i")]
|
||||
UNSPEC_CDE))]
|
||||
"TARGET_CDE"
|
||||
"cx2<cde_suffix>\\tp%c1, <cde_dest>, %2, %3"
|
||||
[(set_attr "type" "coproc")]
|
||||
)
|
||||
|
||||
(define_insn "arm_cx2a<mode>"
|
||||
[(set (match_operand:SIDI 0 "s_register_operand" "=r")
|
||||
(unspec:SIDI [(match_operand:SI 1 "const_int_coproc_operand" "i")
|
||||
(match_operand:SIDI 2 "s_register_operand" "0")
|
||||
(match_operand:SI 3 "s_register_operand" "r")
|
||||
(match_operand:SI 4 "const_int_ccde2_operand" "i")]
|
||||
UNSPEC_CDEA))]
|
||||
"TARGET_CDE"
|
||||
"cx2<cde_suffix>a\\tp%c1, <cde_dest>, %3, %4"
|
||||
[(set_attr "type" "coproc")]
|
||||
)
|
||||
|
||||
(define_insn "arm_cx3<mode>"
|
||||
[(set (match_operand:SIDI 0 "s_register_operand" "=r")
|
||||
(unspec:SIDI [(match_operand:SI 1 "const_int_coproc_operand" "i")
|
||||
(match_operand:SI 2 "s_register_operand" "r")
|
||||
(match_operand:SI 3 "s_register_operand" "r")
|
||||
(match_operand:SI 4 "const_int_ccde3_operand" "i")]
|
||||
UNSPEC_CDE))]
|
||||
"TARGET_CDE"
|
||||
"cx3<cde_suffix>\\tp%c1, <cde_dest>, %2, %3, %4"
|
||||
[(set_attr "type" "coproc")]
|
||||
)
|
||||
|
||||
(define_insn "arm_cx3a<mode>"
|
||||
[(set (match_operand:SIDI 0 "s_register_operand" "=r")
|
||||
(unspec:SIDI [(match_operand:SI 1 "const_int_coproc_operand" "i")
|
||||
(match_operand:SIDI 2 "s_register_operand" "0")
|
||||
(match_operand:SI 3 "s_register_operand" "r")
|
||||
(match_operand:SI 4 "s_register_operand" "r")
|
||||
(match_operand:SI 5 "const_int_ccde3_operand" "i")]
|
||||
UNSPEC_CDEA))]
|
||||
"TARGET_CDE"
|
||||
"cx3<cde_suffix>a\\tp%c1, <cde_dest>, %3, %4, %5"
|
||||
[(set_attr "type" "coproc")]
|
||||
)
|
||||
|
||||
;; Shift and rotation insns
|
||||
|
||||
(define_expand "ashldi3"
|
||||
|
|
|
@ -35,6 +35,42 @@ extern "C" {
|
|||
|
||||
#if defined (__ARM_FEATURE_CDE)
|
||||
|
||||
#define __arm_cx1(coproc, imm) \
|
||||
__builtin_arm_cx1si(coproc, imm)
|
||||
|
||||
#define __arm_cx1a(coproc, acc, imm) \
|
||||
__builtin_arm_cx1asi(coproc, acc, imm)
|
||||
|
||||
#define __arm_cx2(coproc, n, imm) \
|
||||
__builtin_arm_cx2si(coproc, n, imm)
|
||||
|
||||
#define __arm_cx2a(coproc, acc, n, imm) \
|
||||
__builtin_arm_cx2asi(coproc, acc, n, imm)
|
||||
|
||||
#define __arm_cx3(coproc, n, m, imm) \
|
||||
__builtin_arm_cx3si(coproc, n, m, imm)
|
||||
|
||||
#define __arm_cx3a(coproc, acc, n, m, imm) \
|
||||
__builtin_arm_cx3asi(coproc, acc, n, m, imm)
|
||||
|
||||
#define __arm_cx1d(coproc, imm) \
|
||||
__builtin_arm_cx1di(coproc, imm)
|
||||
|
||||
#define __arm_cx1da(coproc, acc, imm) \
|
||||
__builtin_arm_cx1adi(coproc, acc, imm)
|
||||
|
||||
#define __arm_cx2d(coproc, n, imm) \
|
||||
__builtin_arm_cx2di(coproc, n, imm)
|
||||
|
||||
#define __arm_cx2da(coproc, acc, n, imm) \
|
||||
__builtin_arm_cx2adi(coproc, acc, n, imm)
|
||||
|
||||
#define __arm_cx3d(coproc, n, m, imm) \
|
||||
__builtin_arm_cx3di(coproc, n, m, imm)
|
||||
|
||||
#define __arm_cx3da(coproc, acc, n, m, imm) \
|
||||
__builtin_arm_cx3adi(coproc, acc, n, m, imm)
|
||||
|
||||
#if defined (__ARM_FP) || defined (__ARM_FEATURE_MVE)
|
||||
|
||||
/* CDE builtins using FPU/MVE registers. */
|
||||
|
|
|
@ -23,6 +23,13 @@
|
|||
VAR1 (T, N, A, IMM_MAX, ECF_FLAG) \
|
||||
VAR1 (T, N, B, IMM_MAX, ECF_FLAG)
|
||||
|
||||
CDE_VAR2 (CX_IMM, cx1, si, di, ARM_CCDE_CONST_1, ECF_CONST)
|
||||
CDE_VAR2 (CX_UNARY, cx1a, si, di, ARM_CCDE_CONST_1, ECF_CONST)
|
||||
CDE_VAR2 (CX_UNARY, cx2, si, di, ARM_CCDE_CONST_2, ECF_CONST)
|
||||
CDE_VAR2 (CX_BINARY, cx2a, si, di, ARM_CCDE_CONST_2, ECF_CONST)
|
||||
CDE_VAR2 (CX_BINARY, cx3, si, di, ARM_CCDE_CONST_3, ECF_CONST)
|
||||
CDE_VAR2 (CX_TERNARY, cx3a, si, di, ARM_CCDE_CONST_3, ECF_CONST)
|
||||
|
||||
CDE_VAR2 (CX_IMM, vcx1, si, di, ARM_VCDE_CONST_1, ECF_CONST)
|
||||
CDE_VAR2 (CX_UNARY, vcx1a, si, di, ARM_VCDE_CONST_1, ECF_CONST)
|
||||
CDE_VAR2 (CX_UNARY, vcx2, si, di, ARM_VCDE_CONST_2, ECF_CONST)
|
||||
|
|
|
@ -896,6 +896,9 @@
|
|||
|
||||
(define_mode_attr VSF2BF [(V2SF "V4BF") (V4SF "V8BF")])
|
||||
|
||||
(define_mode_attr cde_suffix [(SI "") (DI "d")])
|
||||
(define_mode_attr cde_dest [(SI "%0") (DI "%0, %H0")])
|
||||
|
||||
;;----------------------------------------------------------------------------
|
||||
;; Code attributes
|
||||
;;----------------------------------------------------------------------------
|
||||
|
|
|
@ -231,6 +231,18 @@
|
|||
(match_test "IN_RANGE (UINTVAL (op), 0, ARM_CDE_CONST_COPROC)")
|
||||
(match_test "arm_arch_cde_coproc_bits[UINTVAL (op)] & arm_arch_cde_coproc")))
|
||||
|
||||
(define_predicate "const_int_ccde1_operand"
|
||||
(and (match_operand 0 "const_int_operand")
|
||||
(match_test "IN_RANGE (UINTVAL (op), 0, ARM_CCDE_CONST_1)")))
|
||||
|
||||
(define_predicate "const_int_ccde2_operand"
|
||||
(and (match_operand 0 "const_int_operand")
|
||||
(match_test "IN_RANGE (UINTVAL (op), 0, ARM_CCDE_CONST_2)")))
|
||||
|
||||
(define_predicate "const_int_ccde3_operand"
|
||||
(and (match_operand 0 "const_int_operand")
|
||||
(match_test "IN_RANGE (UINTVAL (op), 0, ARM_CCDE_CONST_3)")))
|
||||
|
||||
(define_predicate "const_int_vcde1_operand"
|
||||
(and (match_operand 0 "const_int_operand")
|
||||
(match_test "IN_RANGE (UINTVAL (op), 0, ARM_VCDE_CONST_1)")))
|
||||
|
|
|
@ -154,6 +154,8 @@
|
|||
UNSPEC_SMUADX ; Represent the SMUADX operation.
|
||||
UNSPEC_SSAT16 ; Represent the SSAT16 operation.
|
||||
UNSPEC_USAT16 ; Represent the USAT16 operation.
|
||||
UNSPEC_CDE ; Custom Datapath Extension instruction.
|
||||
UNSPEC_CDEA ; Custom Datapath Extension instruction.
|
||||
UNSPEC_VCDE ; Custom Datapath Extension instruction.
|
||||
UNSPEC_VCDEA ; Custom Datapath Extension instruction.
|
||||
])
|
||||
|
|
111
gcc/testsuite/gcc.target/arm/acle/cde-errors.c
Normal file
111
gcc/testsuite/gcc.target/arm/acle/cde-errors.c
Normal file
|
@ -0,0 +1,111 @@
|
|||
/* Test the Custom Datapath Extension ACLE intrinsic. */
|
||||
|
||||
/* This file is to check we catch incorrect uses of the ACLE. */
|
||||
|
||||
/* { dg-do assemble } */
|
||||
/* { dg-require-effective-target arm_v8m_main_cde_ok } */
|
||||
/* { dg-add-options arm_v8m_main_cde } */
|
||||
/* { dg-additional-options "-save-temps" } */
|
||||
|
||||
#include "arm_cde.h"
|
||||
|
||||
/*
|
||||
These are the scalar intrinsics.
|
||||
uint32_t __arm_cx1(int coproc, uint32_t imm);
|
||||
uint32_t __arm_cx1a(int coproc, uint32_t acc, uint32_t imm);
|
||||
uint32_t __arm_cx2(int coproc, uint32_t n, uint32_t imm);
|
||||
uint32_t __arm_cx2a(int coproc, uint32_t acc, uint32_t n, uint32_t imm);
|
||||
uint32_t __arm_cx3(int coproc, uint32_t n, uint32_t m, uint32_t imm);
|
||||
uint32_t __arm_cx3a(int coproc, uint32_t acc, uint32_t n, uint32_t m, uint32_t imm);
|
||||
|
||||
uint64_t __arm_cx1d(int coproc, uint32_t imm);
|
||||
uint64_t __arm_cx1da(int coproc, uint64_t acc, uint32_t imm);
|
||||
uint64_t __arm_cx2d(int coproc, uint32_t n, uint32_t imm);
|
||||
uint64_t __arm_cx2da(int coproc, uint64_t acc, uint32_t n, uint32_t imm);
|
||||
uint64_t __arm_cx3d(int coproc, uint32_t n, uint32_t m, uint32_t imm);
|
||||
uint64_t __arm_cx3da(int coproc, uint64_t acc, uint32_t n, uint32_t m, uint32_t imm);
|
||||
*/
|
||||
|
||||
/* Incorrect types as the constants. */
|
||||
uint64_t test_cde (uint32_t n, uint32_t m)
|
||||
{
|
||||
uint64_t accum = 0;
|
||||
|
||||
/* `coproc` not enabled. */
|
||||
accum += __arm_cx1 (7, 0); /* { dg-error {coprocessor 7 is not enabled with \+cdecp7} } */
|
||||
accum += __arm_cx1a (7, (uint32_t)accum, 0); /* { dg-error {coprocessor 7 is not enabled with \+cdecp7} } */
|
||||
accum += __arm_cx2 (7, n, 0); /* { dg-error {coprocessor 7 is not enabled with \+cdecp7} } */
|
||||
accum += __arm_cx2a (7, (uint32_t)accum, n, 0); /* { dg-error {coprocessor 7 is not enabled with \+cdecp7} } */
|
||||
accum += __arm_cx3 (7, n, m, 0); /* { dg-error {coprocessor 7 is not enabled with \+cdecp7} } */
|
||||
accum += __arm_cx3a (7, (uint32_t)accum, n, m, 0); /* { dg-error {coprocessor 7 is not enabled with \+cdecp7} } */
|
||||
|
||||
accum += __arm_cx1d (7, 0); /* { dg-error {coprocessor 7 is not enabled with \+cdecp7} } */
|
||||
accum += __arm_cx1da (7, accum, 0); /* { dg-error {coprocessor 7 is not enabled with \+cdecp7} } */
|
||||
accum += __arm_cx2d (7, n, 0); /* { dg-error {coprocessor 7 is not enabled with \+cdecp7} } */
|
||||
accum += __arm_cx2da (7, accum, n, 0); /* { dg-error {coprocessor 7 is not enabled with \+cdecp7} } */
|
||||
accum += __arm_cx3d (7, n, m, 0); /* { dg-error {coprocessor 7 is not enabled with \+cdecp7} } */
|
||||
accum += __arm_cx3da (7, accum, n, m, 0); /* { dg-error {coprocessor 7 is not enabled with \+cdecp7} } */
|
||||
|
||||
/* `coproc` out of range. */
|
||||
accum += __arm_cx1 (8, 0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
|
||||
accum += __arm_cx1a (8, (uint32_t)accum, 0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
|
||||
accum += __arm_cx2 (8, n, 0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
|
||||
accum += __arm_cx2a (8, (uint32_t)accum, n, 0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
|
||||
accum += __arm_cx3 (8, n, m, 0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
|
||||
accum += __arm_cx3a (8, (uint32_t)accum, n, m, 0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
|
||||
|
||||
accum += __arm_cx1d (8, 0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
|
||||
accum += __arm_cx1da (8, accum, 0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
|
||||
accum += __arm_cx2d (8, n, 0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
|
||||
accum += __arm_cx2da (8, accum, n, 0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
|
||||
accum += __arm_cx3d (8, n, m, 0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
|
||||
accum += __arm_cx3da (8, accum, n, m, 0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
|
||||
|
||||
/* `imm` out of range. */
|
||||
accum += __arm_cx1 (0, 8192); /* { dg-error {argument 2 must be a constant immediate in range \[0-8191\]} } */
|
||||
accum += __arm_cx1a (0, (uint32_t)accum, 8192); /* { dg-error {argument 3 must be a constant immediate in range \[0-8191\]} } */
|
||||
accum += __arm_cx2 (0, n, 512); /* { dg-error {argument 3 must be a constant immediate in range \[0-511\]} } */
|
||||
accum += __arm_cx2a (0, (uint32_t)accum, n, 512); /* { dg-error {argument 4 must be a constant immediate in range \[0-511\]} } */
|
||||
accum += __arm_cx3 (0, n, m, 64); /* { dg-error {argument 4 must be a constant immediate in range \[0-63\]} } */
|
||||
accum += __arm_cx3a (0, (uint32_t)accum, n, m, 64); /* { dg-error {argument 5 must be a constant immediate in range \[0-63\]} } */
|
||||
|
||||
accum += __arm_cx1d (0, 8192); /* { dg-error {argument 2 must be a constant immediate in range \[0-8191\]} } */
|
||||
accum += __arm_cx1da (0, accum, 8192); /* { dg-error {argument 3 must be a constant immediate in range \[0-8191\]} } */
|
||||
accum += __arm_cx2d (0, n, 512); /* { dg-error {argument 3 must be a constant immediate in range \[0-511\]} } */
|
||||
accum += __arm_cx2da (0, accum, n, 512); /* { dg-error {argument 4 must be a constant immediate in range \[0-511\]} } */
|
||||
accum += __arm_cx3d (0, n, m, 64); /* { dg-error {argument 4 must be a constant immediate in range \[0-63\]} } */
|
||||
accum += __arm_cx3da (0, accum, n, m, 64); /* { dg-error {argument 5 must be a constant immediate in range \[0-63\]} } */
|
||||
|
||||
/* `imm` is not an immediate. */
|
||||
accum += __arm_cx1 (0, n); /* { dg-error {argument 2 must be a constant immediate in range \[0-8191\]} } */
|
||||
accum += __arm_cx1a (0, (uint32_t)accum, n); /* { dg-error {argument 3 must be a constant immediate in range \[0-8191\]} } */
|
||||
accum += __arm_cx2 (0, n, n); /* { dg-error {argument 3 must be a constant immediate in range \[0-511\]} } */
|
||||
accum += __arm_cx2a (0, (uint32_t)accum, n, n); /* { dg-error {argument 4 must be a constant immediate in range \[0-511\]} } */
|
||||
accum += __arm_cx3 (0, n, m, n); /* { dg-error {argument 4 must be a constant immediate in range \[0-63\]} } */
|
||||
accum += __arm_cx3a (0, (uint32_t)accum, n, m, n); /* { dg-error {argument 5 must be a constant immediate in range \[0-63\]} } */
|
||||
|
||||
accum += __arm_cx1d (0, n); /* { dg-error {argument 2 must be a constant immediate in range \[0-8191\]} } */
|
||||
accum += __arm_cx1da (0, accum, n); /* { dg-error {argument 3 must be a constant immediate in range \[0-8191\]} } */
|
||||
accum += __arm_cx2d (0, n, n); /* { dg-error {argument 3 must be a constant immediate in range \[0-511\]} } */
|
||||
accum += __arm_cx2da (0, accum, n, n); /* { dg-error {argument 4 must be a constant immediate in range \[0-511\]} } */
|
||||
accum += __arm_cx3d (0, n, m, n); /* { dg-error {argument 4 must be a constant immediate in range \[0-63\]} } */
|
||||
accum += __arm_cx3da (0, accum, n, m, n); /* { dg-error {argument 5 must be a constant immediate in range \[0-63\]} } */
|
||||
|
||||
/* `coproc` is not an immediate. */
|
||||
accum += __arm_cx1 ((int)m, 0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
|
||||
accum += __arm_cx1a ((int)m, (uint32_t)accum, 0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
|
||||
accum += __arm_cx2 ((int)m, n, 0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
|
||||
accum += __arm_cx2a ((int)m, (uint32_t)accum, n, 0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
|
||||
accum += __arm_cx3 ((int)m, n, m, 0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
|
||||
accum += __arm_cx3a ((int)m, (uint32_t)accum, n, m, 0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
|
||||
|
||||
accum += __arm_cx1d ((int)m, 0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
|
||||
accum += __arm_cx1da ((int)m, accum, 0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
|
||||
accum += __arm_cx2d ((int)m, n, 0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
|
||||
accum += __arm_cx2da ((int)m, accum, n, 0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
|
||||
accum += __arm_cx3d ((int)m, n, m, 0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
|
||||
accum += __arm_cx3da ((int)m, accum, n, m, 0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
|
||||
|
||||
return accum;
|
||||
}
|
||||
|
229
gcc/testsuite/gcc.target/arm/acle/cde.c
Normal file
229
gcc/testsuite/gcc.target/arm/acle/cde.c
Normal file
|
@ -0,0 +1,229 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-skip-if "Require optimsation to compile DCE tests" { *-*-* } { "-O0" } { "" } } */
|
||||
/* { dg-require-effective-target arm_v8m_main_cde_ok } */
|
||||
/* { dg-add-options arm_v8m_main_cde } */
|
||||
/* { dg-final { check-function-bodies "**" "" } } */
|
||||
|
||||
/* These are the scalar intrinsics.
|
||||
uint32_t __arm_cx1(int coproc, uint32_t imm);
|
||||
uint32_t __arm_cx1a(int coproc, uint32_t acc, uint32_t imm);
|
||||
uint32_t __arm_cx2(int coproc, uint32_t n, uint32_t imm);
|
||||
uint32_t __arm_cx2a(int coproc, uint32_t acc, uint32_t n, uint32_t imm);
|
||||
uint32_t __arm_cx3(int coproc, uint32_t n, uint32_t m, uint32_t imm);
|
||||
uint32_t __arm_cx3a(int coproc, uint32_t acc, uint32_t n, uint32_t m, uint32_t imm);
|
||||
|
||||
uint64_t __arm_cx1d(int coproc, uint32_t imm);
|
||||
uint64_t __arm_cx1da(int coproc, uint64_t acc, uint32_t imm);
|
||||
uint64_t __arm_cx2d(int coproc, uint32_t n, uint32_t imm);
|
||||
uint64_t __arm_cx2da(int coproc, uint64_t acc, uint32_t n, uint32_t imm);
|
||||
uint64_t __arm_cx3d(int coproc, uint32_t n, uint32_t m, uint32_t imm);
|
||||
uint64_t __arm_cx3da(int coproc, uint64_t acc, uint32_t n, uint32_t m, uint32_t imm); */
|
||||
|
||||
#include "arm_cde.h"
|
||||
|
||||
#define TEST_CDE_SCALAR_INTRINSIC(name, accum_type, arguments) \
|
||||
accum_type test_cde_##name (__attribute__ ((unused)) uint32_t n, \
|
||||
__attribute__ ((unused)) uint32_t m) \
|
||||
{ \
|
||||
accum_type accum = 0; \
|
||||
accum += __arm_##name arguments; \
|
||||
return accum; \
|
||||
}
|
||||
|
||||
/* Basic test that we produce the assembly as expected. */
|
||||
/*
|
||||
** test_cde_cx1:
|
||||
** cx1 p0, r0, #33
|
||||
** bx lr
|
||||
*/
|
||||
TEST_CDE_SCALAR_INTRINSIC (cx1, uint32_t, (0, 33))
|
||||
|
||||
/*
|
||||
** test_cde_cx1a:
|
||||
** movs r0, #0
|
||||
** cx1a p0, r0, #33
|
||||
** bx lr
|
||||
*/
|
||||
TEST_CDE_SCALAR_INTRINSIC (cx1a, uint32_t, (0, accum, 33))
|
||||
|
||||
/*
|
||||
** test_cde_cx2:
|
||||
** cx2 p0, r0, r0, #33
|
||||
** bx lr
|
||||
*/
|
||||
TEST_CDE_SCALAR_INTRINSIC (cx2, uint32_t, (0, n, 33))
|
||||
|
||||
/*
|
||||
** test_cde_cx2a:
|
||||
** movs (r[0-9]+), #0
|
||||
** cx2a p0, \1, r0, #33
|
||||
** mov r0, \1
|
||||
** bx lr
|
||||
*/
|
||||
TEST_CDE_SCALAR_INTRINSIC (cx2a, uint32_t, (0, accum, n, 33))
|
||||
|
||||
/*
|
||||
** test_cde_cx3:
|
||||
** cx3 p0, r0, r0, r1, #33
|
||||
** bx lr
|
||||
*/
|
||||
TEST_CDE_SCALAR_INTRINSIC (cx3, uint32_t, (0, n, m, 33))
|
||||
|
||||
/*
|
||||
** test_cde_cx3a:
|
||||
** movs (r[0-9]+), #0
|
||||
** cx3a p0, \1, r0, r1, #33
|
||||
** mov r0, \1
|
||||
** bx lr
|
||||
*/
|
||||
TEST_CDE_SCALAR_INTRINSIC (cx3a, uint32_t, (0, accum, n, m, 33))
|
||||
|
||||
/*
|
||||
** test_cde_cx1d:
|
||||
** cx1d p0, r0, r1, #33
|
||||
** bx lr
|
||||
*/
|
||||
TEST_CDE_SCALAR_INTRINSIC (cx1d, uint64_t, (0, 33))
|
||||
|
||||
/*
|
||||
** test_cde_cx1da:
|
||||
** movs r0, #0
|
||||
** movs r1, #0
|
||||
** cx1da p0, r0, r1, #33
|
||||
** bx lr
|
||||
*/
|
||||
TEST_CDE_SCALAR_INTRINSIC (cx1da, uint64_t, (0, accum, 33))
|
||||
|
||||
/*
|
||||
** test_cde_cx2d:
|
||||
** cx2d p0, r0, r1, r0, #33
|
||||
** bx lr
|
||||
*/
|
||||
TEST_CDE_SCALAR_INTRINSIC (cx2d, uint64_t, (0, n, 33))
|
||||
|
||||
/* This particular function gets optimised by the compiler in two different
|
||||
ways depending on the optimisation level. So does test_cde_cx3da. That's
|
||||
why we have two different regexes in each of these function body checks. */
|
||||
/*
|
||||
** test_cde_cx2da:
|
||||
** (
|
||||
** mov (r[0-9]+), r0
|
||||
** movs r0, #0
|
||||
** movs r1, #0
|
||||
** cx2da p0, r0, r1, \1, #33
|
||||
** |
|
||||
** movs (r[0-9]+), #0
|
||||
** movs (r[0-9]+), #0
|
||||
** cx2da p0, \2, \3, r0, #33
|
||||
** mov r0, \2
|
||||
** mov r1, \3
|
||||
** )
|
||||
** bx lr
|
||||
*/
|
||||
TEST_CDE_SCALAR_INTRINSIC (cx2da, uint64_t, (0, accum, n, 33))
|
||||
|
||||
/*
|
||||
** test_cde_cx3d:
|
||||
** cx3d p0, r0, r1, r0, r1, #33
|
||||
** bx lr
|
||||
*/
|
||||
TEST_CDE_SCALAR_INTRINSIC (cx3d, uint64_t, (0, n, m, 33))
|
||||
|
||||
/*
|
||||
** test_cde_cx3da:
|
||||
** ...
|
||||
** (
|
||||
** movs (r[0-9]+), #0
|
||||
** movs (r[0-9]+), #0
|
||||
** cx3da p0, \1, \2, r0, r1, #33
|
||||
** mov r0, \1
|
||||
** mov r1, \2
|
||||
** |
|
||||
** movs r0, #0
|
||||
** movs r1, #0
|
||||
** cx3da p0, r0, r1, r[0-9]+, r[0-9]+, #33
|
||||
** )
|
||||
** ...
|
||||
** bx lr
|
||||
*/
|
||||
TEST_CDE_SCALAR_INTRINSIC (cx3da, uint64_t, (0, accum, n, m, 33))
|
||||
|
||||
|
||||
|
||||
/* Ensure this function gets DCE'd out after optimisation.
|
||||
Should be such since the ACLE specification mentions these functions are
|
||||
stateless and pure. */
|
||||
/*
|
||||
** test_cde_dce:
|
||||
** bx lr
|
||||
*/
|
||||
void test_cde_dce (uint32_t n, uint32_t m)
|
||||
{
|
||||
uint64_t accum = 0;
|
||||
__arm_cx1 (0, 33);
|
||||
__arm_cx1a (0, accum, 33);
|
||||
__arm_cx2 (0, n, 33);
|
||||
__arm_cx2a (0, accum, n, 33);
|
||||
__arm_cx3 (0, n, m, 33);
|
||||
__arm_cx3a (0, accum, n, m, 33);
|
||||
__arm_cx1d (0, 33);
|
||||
__arm_cx1da (0, accum, 33);
|
||||
__arm_cx2d (0, n, 33);
|
||||
__arm_cx2da (0, accum, n, 33);
|
||||
__arm_cx3d (0, n, m, 33);
|
||||
__arm_cx3da (0, accum, n, m, 33);
|
||||
}
|
||||
|
||||
/* Checking this function allows constants with symbolic names.
|
||||
This test must be run under some level of optimisation.
|
||||
The actual check we perform is that the function is provided something that,
|
||||
at the point of expansion, is an immediate. That check is not as strict as
|
||||
having something that is an immediate directly.
|
||||
|
||||
Since we've already checked these intrinsics generate code in the manner we
|
||||
expect (above), here we just check that all the instructions we expect are
|
||||
there. To ensure the instructions are from these functions we use different
|
||||
constants and search for those specifically with `scan-assembler-times`. */
|
||||
|
||||
/* Checking this function allows constants with symbolic names. */
|
||||
uint32_t test_cde2 (uint32_t n, uint32_t m)
|
||||
{
|
||||
int coproc = 6;
|
||||
uint32_t imm = 30;
|
||||
uint32_t accum = 0;
|
||||
accum += __arm_cx1 (coproc, imm);
|
||||
accum += __arm_cx1a (coproc, accum, imm);
|
||||
accum += __arm_cx2 (coproc, n, imm);
|
||||
accum += __arm_cx2a (coproc, accum, n, imm);
|
||||
accum += __arm_cx3 (coproc, n, m, imm);
|
||||
accum += __arm_cx3a (coproc, accum, n, m, imm);
|
||||
return accum;
|
||||
}
|
||||
|
||||
/* Checking this function allows constants with symbolic names. */
|
||||
uint64_t test_cdedi2 (uint32_t n, uint32_t m)
|
||||
{
|
||||
int coproc = 6;
|
||||
uint32_t imm = 30;
|
||||
uint64_t accum = 0;
|
||||
accum += __arm_cx1d (coproc, imm);
|
||||
accum += __arm_cx1da (coproc, accum, imm);
|
||||
accum += __arm_cx2d (coproc, n, imm);
|
||||
accum += __arm_cx2da (coproc, accum, n, imm);
|
||||
accum += __arm_cx3d (coproc, n, m, imm);
|
||||
accum += __arm_cx3da (coproc, accum, n, m, imm);
|
||||
return accum;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "cx1\\tp6" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "cx2\\tp6" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "cx3\\tp6" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "cx1a\\tp6" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "cx2a\\tp6" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "cx3a\\tp6" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "cx1d\\tp6" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "cx2d\\tp6" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "cx3d\\tp6" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "cx1da\\tp6" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "cx2da\\tp6" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "cx3da\\tp6" 1 } } */
|
|
@ -5117,13 +5117,13 @@ proc add_options_for_arm_v8_2a_bf16_neon { flags } {
|
|||
|
||||
foreach { armfunc armflag armdef } {
|
||||
arm_v8m_main_cde
|
||||
"-march=armv8-m.main+cdecp0 -mthumb"
|
||||
"-march=armv8-m.main+cdecp0+cdecp6 -mthumb"
|
||||
"defined (__ARM_FEATURE_CDE)"
|
||||
arm_v8m_main_cde_fp
|
||||
"-march=armv8-m.main+fp+cdecp0 -mthumb"
|
||||
"-march=armv8-m.main+fp+cdecp0+cdecp6 -mthumb"
|
||||
"defined (__ARM_FEATURE_CDE) && defined (__ARM_FP)"
|
||||
arm_v8_1m_main_cde_mve
|
||||
"-march=armv8.1-m.main+mve+cdecp0 -mthumb"
|
||||
"-march=armv8.1-m.main+mve+cdecp0+cdecp6 -mthumb"
|
||||
"defined (__ARM_FEATURE_CDE) && defined (__ARM_FEATURE_MVE)"
|
||||
} {
|
||||
eval [string map [list FUNC $armfunc FLAG $armflag DEF $armdef ] {
|
||||
|
|
Loading…
Add table
Reference in a new issue