diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index dbd6fc68914..b23067c6754 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -7014,17 +7014,73 @@ ;; addp -(define_insn "aarch64_addp" - [(set (match_operand:VDQ_I 0 "register_operand" "=w") - (unspec:VDQ_I - [(match_operand:VDQ_I 1 "register_operand" "w") - (match_operand:VDQ_I 2 "register_operand" "w")] - UNSPEC_ADDP))] - "TARGET_SIMD" +;; ADDP with two registers semantically concatenates them and performs +;; a pairwise addition on the result. For 128-bit input modes represent this +;; as a concatentation of the pairwise addition results of the two input +;; registers. This allow us to avoid using intermediate 256-bit modes. +(define_insn "aarch64_addp_insn" + [(set (match_operand:VQ_I 0 "register_operand" "=w") + (vec_concat:VQ_I + (plus: + (vec_select: + (match_operand:VQ_I 1 "register_operand" "w") + (match_operand:VQ_I 3 "vect_par_cnst_even_or_odd_half")) + (vec_select: + (match_dup 1) + (match_operand:VQ_I 4 "vect_par_cnst_even_or_odd_half"))) + (plus: + (vec_select: + (match_operand:VQ_I 2 "register_operand" "w") + (match_dup 3)) + (vec_select: + (match_dup 2) + (match_dup 4)))))] + "TARGET_SIMD && !rtx_equal_p (operands[3], operands[4])" "addp\t%0, %1, %2" [(set_attr "type" "neon_reduc_add")] ) +;; For 64-bit input modes an ADDP is represented as a concatentation +;; of the input registers into an 128-bit register which is then fed +;; into a pairwise add. That way we avoid having to create intermediate +;; 32-bit vector modes. +(define_insn "aarch64_addp_insn" + [(set (match_operand:VD_BHSI 0 "register_operand" "=w") + (plus:VD_BHSI + (vec_select:VD_BHSI + (vec_concat: + (match_operand:VD_BHSI 1 "register_operand" "w") + (match_operand:VD_BHSI 2 "register_operand" "w")) + (match_operand: 3 "vect_par_cnst_even_or_odd_half")) + (vec_select:VD_BHSI + (vec_concat: + (match_dup 1) + (match_dup 2)) + (match_operand: 4 "vect_par_cnst_even_or_odd_half"))))] + "TARGET_SIMD && !rtx_equal_p (operands[3], operands[4])" + "addp\t%0, %1, %2" + [(set_attr "type" "neon_reduc_add")] +) + +(define_expand "aarch64_addp" + [(match_operand:VDQ_I 0 "register_operand") + (match_operand:VDQ_I 1 "register_operand") + (match_operand:VDQ_I 2 "register_operand")] + "TARGET_SIMD" + { + int nunits = GET_MODE_NUNITS (mode).to_constant (); + if (known_eq (GET_MODE_BITSIZE (mode), 128)) + nunits /= 2; + rtx par_even = aarch64_gen_stepped_int_parallel (nunits, 0, 2); + rtx par_odd = aarch64_gen_stepped_int_parallel (nunits, 1, 2); + if (BYTES_BIG_ENDIAN) + std::swap (operands[1], operands[2]); + emit_insn (gen_aarch64_addp_insn (operands[0], operands[1], + operands[2], par_even, par_odd)); + DONE; + } +) + ;; sqrt (define_expand "sqrt2"