diff --git a/gcc/ChangeLog b/gcc/ChangeLog index d1a40254232..95c1a4a8029 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,10 @@ +2018-12-07 Richard Sandiford + + * config/aarch64/aarch64-sve.md (*mul3, *v3): + Split the patterns after reload if we don't need the predicate + operand. + (*post_ra_mul3, *post_ra_v3): New patterns. + 2018-12-07 Richard Sandiford * config/aarch64/iterators.md (SVE_UNPRED_FP_BINARY): New code diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index edc6cff8fbd..8569a8e1ea7 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -936,7 +936,7 @@ ;; predicate for the first alternative, but using Upa or X isn't likely ;; to gain much and would make the instruction seem less uniform to the ;; register allocator. -(define_insn "*mul3" +(define_insn_and_split "*mul3" [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w") (unspec:SVE_I [(match_operand: 1 "register_operand" "Upl, Upl, Upl") @@ -946,12 +946,30 @@ UNSPEC_MERGE_PTRUE))] "TARGET_SVE" "@ - mul\t%0., %0., #%3 + # mul\t%0., %1/m, %0., %3. movprfx\t%0, %2\;mul\t%0., %1/m, %0., %3." + ; Split the unpredicated form after reload, so that we don't have + ; the unnecessary PTRUE. + "&& reload_completed + && !register_operand (operands[3], mode)" + [(set (match_dup 0) (mult:SVE_I (match_dup 2) (match_dup 3)))] + "" [(set_attr "movprfx" "*,*,yes")] ) +;; Unpredicated multiplications by a constant (post-RA only). +;; These are generated by splitting a predicated instruction whose +;; predicate is unused. +(define_insn "*post_ra_mul3" + [(set (match_operand:SVE_I 0 "register_operand" "=w") + (mult:SVE_I + (match_operand:SVE_I 1 "register_operand" "0") + (match_operand:SVE_I 2 "aarch64_sve_mul_immediate")))] + "TARGET_SVE && reload_completed" + "mul\t%0., %0., #%2" +) + (define_insn "*madd" [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w") (plus:SVE_I @@ -1232,7 +1250,7 @@ ;; actually need the predicate for the first alternative, but using Upa ;; or X isn't likely to gain much and would make the instruction seem ;; less uniform to the register allocator. -(define_insn "*v3" +(define_insn_and_split "*v3" [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w") (unspec:SVE_I [(match_operand: 1 "register_operand" "Upl, Upl, Upl") @@ -1242,12 +1260,28 @@ UNSPEC_MERGE_PTRUE))] "TARGET_SVE" "@ - \t%0., %2., #%3 + # \t%0., %1/m, %0., %3. movprfx\t%0, %2\;\t%0., %1/m, %0., %3." + "&& reload_completed + && !register_operand (operands[3], mode)" + [(set (match_dup 0) (ASHIFT:SVE_I (match_dup 2) (match_dup 3)))] + "" [(set_attr "movprfx" "*,*,yes")] ) +;; Unpredicated shift operations by a constant (post-RA only). +;; These are generated by splitting a predicated instruction whose +;; predicate is unused. +(define_insn "*post_ra_v3" + [(set (match_operand:SVE_I 0 "register_operand" "=w") + (ASHIFT:SVE_I + (match_operand:SVE_I 1 "register_operand" "w") + (match_operand:SVE_I 2 "aarch64_simd_shift_imm")))] + "TARGET_SVE && reload_completed" + "\t%0., %1., #%2" +) + ;; LSL, LSR and ASR by a scalar, which expands into one of the vector ;; shifts above. (define_expand "3" diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 996cacda1cb..231275454bd 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2018-12-07 Richard Sandiford + + * gcc.target/aarch64/sve/pred_elim_2.c: New test. + 2018-12-07 Richard Sandiford * gcc.target/aarch64/sve/pred_elim_1.c: New test. diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred_elim_2.c b/gcc/testsuite/gcc.target/aarch64/sve/pred_elim_2.c new file mode 100644 index 00000000000..ed9c7007d2e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/pred_elim_2.c @@ -0,0 +1,31 @@ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include + +#define TEST_OP(NAME, TYPE, OP) \ + void \ + NAME##_##TYPE (TYPE *restrict a, TYPE *restrict b, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + a[i] = b[i] OP; \ + } + +#define TEST_TYPE(TYPE) \ + TEST_OP (shl, TYPE, << 6) \ + TEST_OP (shr, TYPE, >> 6) \ + TEST_OP (mult, TYPE, * 0x2b) + +TEST_TYPE (int8_t) +TEST_TYPE (int16_t) +TEST_TYPE (int32_t) +TEST_TYPE (int64_t) +TEST_TYPE (uint8_t) +TEST_TYPE (uint16_t) +TEST_TYPE (uint32_t) +TEST_TYPE (uint64_t) + +/* { dg-final { scan-assembler-times {\tlsl\t} 8 } } */ +/* { dg-final { scan-assembler-times {\tlsr\t} 4 } } */ +/* { dg-final { scan-assembler-times {\tasr\t} 4 } } */ +/* { dg-final { scan-assembler-times {\tmul\t} 8 } } */ +/* { dg-final { scan-assembler-not {\tptrue\t} } } */