diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 070986ec3f7..307f5360ea0 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,34 @@ +2019-08-13 Richard Sandiford + + * tree.h (build_vector_a_then_b): Declare. + * tree.c (build_vector_a_then_b): New function. + * fold-const-call.c (fold_while_ult): Likewise. + (fold_const_call): Use it to handle IFN_WHILE_ULT. + * config/aarch64/aarch64-protos.h (AARCH64_FOR_SVPATTERN): New macro. + (aarch64_svpattern): New enum. + * config/aarch64/aarch64-sve.md (mov): Pass + constants through aarch64_expand_mov_immediate. + (*aarch64_sve_mov): Use aarch64_mov_operand rather + than general_operand as the predicate for operand 1. + (while_ult): Add a '@' marker. + * config/aarch64/aarch64.c (simd_immediate_info::PTRUE): New + insn_type. + (simd_immediate_info::simd_immediate_info): New overload that + takes a scalar_int_mode and an svpattern. + (simd_immediate_info::u): Add a "pattern" field. + (svpattern_token): New function. + (aarch64_get_sve_pred_bits, aarch64_widest_sve_pred_elt_size) + (aarch64_partial_ptrue_length, aarch64_svpattern_for_vl) + (aarch64_sve_move_pred_via_while): New functions. + (aarch64_expand_mov_immediate): Try using + aarch64_sve_move_pred_via_while for predicates that contain N ones + followed by M zeros but that do not correspond to a VLnnn pattern. + (aarch64_sve_pred_valid_immediate): New function. + (aarch64_simd_valid_immediate): Use it instead of dealing directly + with PTRUE and PFALSE. + (aarch64_output_sve_mov_immediate): Handle new simd_immediate_info + forms. + 2019-08-13 Iain Sandoe * config/darwin.c (machopic_indirect_call_target): Rename symbol stub diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index ad818a4ec7f..86d53c5ce1e 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -406,6 +406,33 @@ extern enum aarch64_key_type aarch64_ra_sign_key; extern struct tune_params aarch64_tune_params; +/* The available SVE predicate patterns, known in the ACLE as "svpattern". */ +#define AARCH64_FOR_SVPATTERN(T) \ + T (POW2, pow2, 0) \ + T (VL1, vl1, 1) \ + T (VL2, vl2, 2) \ + T (VL3, vl3, 3) \ + T (VL4, vl4, 4) \ + T (VL5, vl5, 5) \ + T (VL6, vl6, 6) \ + T (VL7, vl7, 7) \ + T (VL8, vl8, 8) \ + T (VL16, vl16, 9) \ + T (VL32, vl32, 10) \ + T (VL64, vl64, 11) \ + T (VL128, vl128, 12) \ + T (VL256, vl256, 13) \ + T (MUL4, mul4, 29) \ + T (MUL3, mul3, 30) \ + T (ALL, all, 31) + +#define AARCH64_SVENUM(UPPER, LOWER, VALUE) AARCH64_SV_##UPPER = VALUE, +enum aarch64_svpattern { + AARCH64_FOR_SVPATTERN (AARCH64_SVENUM) + AARCH64_NUM_SVPATTERNS +}; +#undef AARCH64_SVENUM + void aarch64_post_cfi_startproc (void); poly_int64 aarch64_initial_elimination_offset (unsigned, unsigned); int aarch64_get_condition_code (rtx); diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 950f39781af..53d93a367db 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -481,12 +481,18 @@ { if (GET_CODE (operands[0]) == MEM) operands[1] = force_reg (mode, operands[1]); + + if (CONSTANT_P (operands[1])) + { + aarch64_expand_mov_immediate (operands[0], operands[1]); + DONE; + } } ) (define_insn "*aarch64_sve_mov" [(set (match_operand:PRED_ALL 0 "nonimmediate_operand" "=Upa, m, Upa, Upa") - (match_operand:PRED_ALL 1 "general_operand" "Upa, Upa, m, Dn"))] + (match_operand:PRED_ALL 1 "aarch64_mov_operand" "Upa, Upa, m, Dn"))] "TARGET_SVE && (register_operand (operands[0], mode) || register_operand (operands[1], mode))" @@ -2923,7 +2929,7 @@ ;; Set element I of the result if operand1 + J < operand2 for all J in [0, I], ;; with the comparison being unsigned. -(define_insn "while_ult" +(define_insn "@while_ult" [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") (unspec:PRED_ALL [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ") (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")] diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index fe968459241..2b3ea9f164c 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -83,7 +83,7 @@ /* Information about a legitimate vector immediate operand. */ struct simd_immediate_info { - enum insn_type { MOV, MVN, INDEX }; + enum insn_type { MOV, MVN, INDEX, PTRUE }; enum modifier_type { LSL, MSL }; simd_immediate_info () {} @@ -92,6 +92,7 @@ struct simd_immediate_info insn_type = MOV, modifier_type = LSL, unsigned int = 0); simd_immediate_info (scalar_mode, rtx, rtx); + simd_immediate_info (scalar_int_mode, aarch64_svpattern); /* The mode of the elements. */ scalar_mode elt_mode; @@ -120,6 +121,9 @@ struct simd_immediate_info subsequent element. */ rtx base, step; } index; + + /* For PTRUE. */ + aarch64_svpattern pattern; } u; }; @@ -159,6 +163,16 @@ inline simd_immediate_info u.index.step = step_in; } +/* Construct a predicate that controls elements of mode ELT_MODE_IN + and has PTRUE pattern PATTERN_IN. */ +inline simd_immediate_info +::simd_immediate_info (scalar_int_mode elt_mode_in, + aarch64_svpattern pattern_in) + : elt_mode (elt_mode_in), insn (PTRUE) +{ + u.pattern = pattern_in; +} + /* The current code model. */ enum aarch64_code_model aarch64_cmodel; @@ -1334,6 +1348,22 @@ static const char *const aarch64_sve_condition_codes[] = "pmore", "plast", "tcont", "tstop", "gt", "le", "al", "nv" }; +/* Return the assembly token for svpattern value VALUE. */ + +static const char * +svpattern_token (enum aarch64_svpattern pattern) +{ + switch (pattern) + { +#define CASE(UPPER, LOWER, VALUE) case AARCH64_SV_##UPPER: return #LOWER; + AARCH64_FOR_SVPATTERN (CASE) +#undef CASE + case AARCH64_NUM_SVPATTERNS: + break; + } + gcc_unreachable (); +} + /* Generate code to enable conditional branches in functions over 1 MiB. */ const char * aarch64_gen_far_branch (rtx * operands, int pos_label, const char * dest, @@ -2529,6 +2559,146 @@ aarch64_force_temporary (machine_mode mode, rtx x, rtx value) } } +/* Return true if predicate value X is a constant in which every element + is a CONST_INT. When returning true, describe X in BUILDER as a VNx16BI + value, i.e. as a predicate in which all bits are significant. */ + +static bool +aarch64_get_sve_pred_bits (rtx_vector_builder &builder, rtx x) +{ + if (GET_CODE (x) != CONST_VECTOR) + return false; + + unsigned int factor = vector_element_size (GET_MODE_NUNITS (VNx16BImode), + GET_MODE_NUNITS (GET_MODE (x))); + unsigned int npatterns = CONST_VECTOR_NPATTERNS (x) * factor; + unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x); + builder.new_vector (VNx16BImode, npatterns, nelts_per_pattern); + + unsigned int nelts = const_vector_encoded_nelts (x); + for (unsigned int i = 0; i < nelts; ++i) + { + rtx elt = CONST_VECTOR_ENCODED_ELT (x, i); + if (!CONST_INT_P (elt)) + return false; + + builder.quick_push (elt); + for (unsigned int j = 1; j < factor; ++j) + builder.quick_push (const0_rtx); + } + builder.finalize (); + return true; +} + +/* BUILDER contains a predicate constant of mode VNx16BI. Return the + widest predicate element size it can have (that is, the largest size + for which each element would still be 0 or 1). */ + +unsigned int +aarch64_widest_sve_pred_elt_size (rtx_vector_builder &builder) +{ + /* Start with the most optimistic assumption: that we only need + one bit per pattern. This is what we will use if only the first + bit in each pattern is ever set. */ + unsigned int mask = GET_MODE_SIZE (DImode); + mask |= builder.npatterns (); + + /* Look for set bits. */ + unsigned int nelts = builder.encoded_nelts (); + for (unsigned int i = 1; i < nelts; ++i) + if (INTVAL (builder.elt (i)) != 0) + { + if (i & 1) + return 1; + mask |= i; + } + return mask & -mask; +} + +/* BUILDER is a predicate constant of mode VNx16BI. Consider the value + that the constant would have with predicate element size ELT_SIZE + (ignoring the upper bits in each element) and return: + + * -1 if all bits are set + * N if the predicate has N leading set bits followed by all clear bits + * 0 if the predicate does not have any of these forms. */ + +int +aarch64_partial_ptrue_length (rtx_vector_builder &builder, + unsigned int elt_size) +{ + /* If nelts_per_pattern is 3, we have set bits followed by clear bits + followed by set bits. */ + if (builder.nelts_per_pattern () == 3) + return 0; + + /* Skip over leading set bits. */ + unsigned int nelts = builder.encoded_nelts (); + unsigned int i = 0; + for (; i < nelts; i += elt_size) + if (INTVAL (builder.elt (i)) == 0) + break; + unsigned int vl = i / elt_size; + + /* Check for the all-true case. */ + if (i == nelts) + return -1; + + /* If nelts_per_pattern is 1, then either VL is zero, or we have a + repeating pattern of set bits followed by clear bits. */ + if (builder.nelts_per_pattern () != 2) + return 0; + + /* We have a "foreground" value and a duplicated "background" value. + If the background might repeat and the last set bit belongs to it, + we might have set bits followed by clear bits followed by set bits. */ + if (i > builder.npatterns () && maybe_ne (nelts, builder.full_nelts ())) + return 0; + + /* Make sure that the rest are all clear. */ + for (; i < nelts; i += elt_size) + if (INTVAL (builder.elt (i)) != 0) + return 0; + + return vl; +} + +/* See if there is an svpattern that encodes an SVE predicate of mode + PRED_MODE in which the first VL bits are set and the rest are clear. + Return the pattern if so, otherwise return AARCH64_NUM_SVPATTERNS. + A VL of -1 indicates an all-true vector. */ + +aarch64_svpattern +aarch64_svpattern_for_vl (machine_mode pred_mode, int vl) +{ + if (vl < 0) + return AARCH64_SV_ALL; + + if (maybe_gt (vl, GET_MODE_NUNITS (pred_mode))) + return AARCH64_NUM_SVPATTERNS; + + if (vl >= 1 && vl <= 8) + return aarch64_svpattern (AARCH64_SV_VL1 + (vl - 1)); + + if (vl >= 16 && vl <= 256 && pow2p_hwi (vl)) + return aarch64_svpattern (AARCH64_SV_VL16 + (exact_log2 (vl) - 4)); + + int max_vl; + if (GET_MODE_NUNITS (pred_mode).is_constant (&max_vl)) + { + if (vl == (max_vl / 3) * 3) + return AARCH64_SV_MUL3; + /* These would only trigger for non-power-of-2 lengths. */ + if (vl == (max_vl & -4)) + return AARCH64_SV_MUL4; + if (vl == (1 << floor_log2 (max_vl))) + return AARCH64_SV_POW2; + if (vl == max_vl) + return AARCH64_SV_ALL; + } + return AARCH64_NUM_SVPATTERNS; +} + /* Return an all-true predicate register of mode MODE. */ rtx @@ -3447,6 +3617,17 @@ aarch64_expand_sve_const_vector (rtx target, rtx src) return target; } +/* Use WHILE to set predicate register DEST so that the first VL bits + are set and the rest are clear. */ + +static void +aarch64_sve_move_pred_via_while (rtx dest, unsigned int vl) +{ + rtx limit = force_reg (DImode, gen_int_mode (vl, DImode)); + emit_insn (gen_while_ult (DImode, GET_MODE (dest), + dest, const0_rtx, limit)); +} + /* Set DEST to immediate IMM. */ void @@ -3580,6 +3761,19 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) return; } + rtx_vector_builder builder; + if (GET_MODE_CLASS (GET_MODE (imm)) == MODE_VECTOR_BOOL + && aarch64_get_sve_pred_bits (builder, imm)) + { + unsigned int elt_size = aarch64_widest_sve_pred_elt_size (builder); + int vl = aarch64_partial_ptrue_length (builder, elt_size); + if (vl > 0) + { + aarch64_sve_move_pred_via_while (dest, vl); + return; + } + } + if (GET_CODE (imm) == CONST_VECTOR && aarch64_sve_data_mode_p (mode)) if (rtx res = aarch64_expand_sve_const_vector (dest, imm)) { @@ -14776,6 +14970,44 @@ aarch64_sve_valid_immediate (unsigned HOST_WIDE_INT val64, return false; } +/* Return true if X is a valid SVE predicate. If INFO is nonnull, use + it to describe valid immediates. */ + +static bool +aarch64_sve_pred_valid_immediate (rtx x, simd_immediate_info *info) +{ + if (x == CONST0_RTX (GET_MODE (x))) + { + if (info) + *info = simd_immediate_info (DImode, 0); + return true; + } + + /* Analyze the value as a VNx16BImode. This should be relatively + efficient, since rtx_vector_builder has enough built-in capacity + to store all VLA predicate constants without needing the heap. */ + rtx_vector_builder builder; + if (!aarch64_get_sve_pred_bits (builder, x)) + return false; + + unsigned int elt_size = aarch64_widest_sve_pred_elt_size (builder); + if (int vl = aarch64_partial_ptrue_length (builder, elt_size)) + { + machine_mode mode = aarch64_sve_pred_mode (elt_size).require (); + aarch64_svpattern pattern = aarch64_svpattern_for_vl (mode, vl); + if (pattern != AARCH64_NUM_SVPATTERNS) + { + if (info) + { + scalar_int_mode int_mode = aarch64_sve_element_int_mode (mode); + *info = simd_immediate_info (int_mode, pattern); + } + return true; + } + } + return false; +} + /* Return true if OP is a valid SIMD immediate for the operation described by WHICH. If INFO is nonnull, use it to describe valid immediates. */ @@ -14788,6 +15020,9 @@ aarch64_simd_valid_immediate (rtx op, simd_immediate_info *info, if (vec_flags == 0 || vec_flags == (VEC_ADVSIMD | VEC_STRUCT)) return false; + if (vec_flags & VEC_SVE_PRED) + return aarch64_sve_pred_valid_immediate (op, info); + scalar_mode elt_mode = GET_MODE_INNER (mode); rtx base, step; unsigned int n_elts; @@ -14812,21 +15047,6 @@ aarch64_simd_valid_immediate (rtx op, simd_immediate_info *info, else return false; - /* Handle PFALSE and PTRUE. */ - if (vec_flags & VEC_SVE_PRED) - { - if (op == CONST0_RTX (mode) || op == CONSTM1_RTX (mode)) - { - if (info) - { - scalar_int_mode int_mode = aarch64_sve_element_int_mode (mode); - *info = simd_immediate_info (int_mode, op == CONSTM1_RTX (mode)); - } - return true; - } - return false; - } - scalar_float_mode elt_float_mode; if (n_elts == 1 && is_a (elt_mode, &elt_float_mode)) @@ -16570,14 +16790,23 @@ aarch64_output_sve_mov_immediate (rtx const_vector) if (aarch64_sve_pred_mode_p (vec_mode)) { static char buf[sizeof ("ptrue\t%0.N, vlNNNNN")]; - unsigned int total_bytes; - if (info.u.mov.value == const0_rtx) - snprintf (buf, sizeof (buf), "pfalse\t%%0.b"); - else if (BYTES_PER_SVE_VECTOR.is_constant (&total_bytes)) - snprintf (buf, sizeof (buf), "ptrue\t%%0.%c, vl%d", element_char, - total_bytes / GET_MODE_SIZE (info.elt_mode)); + if (info.insn == simd_immediate_info::MOV) + { + gcc_assert (info.u.mov.value == const0_rtx); + snprintf (buf, sizeof (buf), "pfalse\t%%0.b"); + } else - snprintf (buf, sizeof (buf), "ptrue\t%%0.%c, all", element_char); + { + gcc_assert (info.insn == simd_immediate_info::PTRUE); + unsigned int total_bytes; + if (info.u.pattern == AARCH64_SV_ALL + && BYTES_PER_SVE_VECTOR.is_constant (&total_bytes)) + snprintf (buf, sizeof (buf), "ptrue\t%%0.%c, vl%d", element_char, + total_bytes / GET_MODE_SIZE (info.elt_mode)); + else + snprintf (buf, sizeof (buf), "ptrue\t%%0.%c, %s", element_char, + svpattern_token (info.u.pattern)); + } return buf; } diff --git a/gcc/fold-const-call.c b/gcc/fold-const-call.c index 702c8b4057a..e21d8e11072 100644 --- a/gcc/fold-const-call.c +++ b/gcc/fold-const-call.c @@ -689,6 +689,36 @@ fold_const_vec_convert (tree ret_type, tree arg) return elts.build (); } +/* Try to evaluate: + + IFN_WHILE_ULT (ARG0, ARG1, (TYPE) { ... }) + + Return the value on success and null on failure. */ + +static tree +fold_while_ult (tree type, poly_uint64 arg0, poly_uint64 arg1) +{ + if (known_ge (arg0, arg1)) + return build_zero_cst (type); + + if (maybe_ge (arg0, arg1)) + return NULL_TREE; + + poly_uint64 diff = arg1 - arg0; + poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (type); + if (known_ge (diff, nelts)) + return build_all_ones_cst (type); + + unsigned HOST_WIDE_INT const_diff; + if (known_le (diff, nelts) && diff.is_constant (&const_diff)) + { + tree minus_one = build_minus_one_cst (TREE_TYPE (type)); + tree zero = build_zero_cst (TREE_TYPE (type)); + return build_vector_a_then_b (type, const_diff, minus_one, zero); + } + return NULL_TREE; +} + /* Try to evaluate: *RESULT = FN (*ARG) @@ -1782,6 +1812,14 @@ fold_const_call (combined_fn fn, tree type, tree arg0, tree arg1, tree arg2) } return NULL_TREE; + case CFN_WHILE_ULT: + { + poly_uint64 parg0, parg1; + if (poly_int_tree_p (arg0, &parg0) && poly_int_tree_p (arg1, &parg1)) + return fold_while_ult (type, parg0, parg1); + return NULL_TREE; + } + default: return fold_const_call_1 (fn, type, arg0, arg1, arg2); } diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index c0a7d2dc236..1147dc86a0e 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,13 @@ +2019-08-13 Richard Sandiford + + * gcc.target/aarch64/sve/spill_2.c: Increase iteration counts + beyond the range of a PTRUE. + * gcc.target/aarch64/sve/while_6.c: New test. + * gcc.target/aarch64/sve/while_7.c: Likewise. + * gcc.target/aarch64/sve/while_8.c: Likewise. + * gcc.target/aarch64/sve/while_9.c: Likewise. + * gcc.target/aarch64/sve/while_10.c: Likewise. + 2019-08-13 Steven G. Kargl PR fortran/88072 diff --git a/gcc/testsuite/gcc.target/aarch64/sve/spill_2.c b/gcc/testsuite/gcc.target/aarch64/sve/spill_2.c index 28fcc442975..fcd481611ec 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/spill_2.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/spill_2.c @@ -9,29 +9,30 @@ void consumer (void *); void \ multi_loop_##TYPE (TYPE *x, TYPE val) \ { \ - for (int i = 0; i < 7; ++i) \ + for (int i = 0; i < 9; ++i) \ x[i] += val; \ consumer (x); \ - for (int i = 0; i < 7; ++i) \ + for (int i = 0; i < 9; ++i) \ x[i] += val; \ consumer (x); \ - for (int i = 0; i < 7; ++i) \ + for (int i = 0; i < 9; ++i) \ x[i] += val; \ consumer (x); \ } /* One iteration is enough. */ TEST_LOOP (uint8_t); +/* Two iterations are enough. We specialize the second two loops based + on whether the first executes once or twice. */ TEST_LOOP (uint16_t); -/* Two iterations are enough. Complete unrolling makes sense - even at -O2. */ +/* Three iterations are needed; ought to stay a loop. */ TEST_LOOP (uint32_t); -/* Four iterations are needed; ought to stay a loop. */ +/* Five iterations are needed; ought to stay a loop. */ TEST_LOOP (uint64_t); /* { dg-final { scan-assembler-times {\twhilelo\tp[0-9]\.b} 3 } } */ -/* { dg-final { scan-assembler-times {\twhilelo\tp[0-9]\.h} 3 } } */ -/* { dg-final { scan-assembler {\twhilelo\tp[0-9]\.s} } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-9]\.h} 8 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-9]\.s} 6 } } */ /* { dg-final { scan-assembler-times {\twhilelo\tp[0-9]\.d} 6 } } */ /* { dg-final { scan-assembler-not {\tldr\tz[0-9]} } } */ /* { dg-final { scan-assembler-not {\tstr\tz[0-9]} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/while_10.c b/gcc/testsuite/gcc.target/aarch64/sve/while_10.c new file mode 100644 index 00000000000..eaed326f999 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/while_10.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=512" } */ + +#include + +#define ADD_LOOP(TYPE, COUNT) \ + TYPE __attribute__ ((noinline, noclone)) \ + vec_while_##TYPE (TYPE *restrict a) \ + { \ + for (int i = 0; i < COUNT; ++i) \ + a[i] += 1; \ + } + +#define TEST_ALL(T) \ + T (int8_t, 63) \ + T (int16_t, 30) \ + T (int32_t, 15) \ + T (int64_t, 6) + +TEST_ALL (ADD_LOOP) + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, mul3\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.h, mul3\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s, mul3\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d, vl6\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/while_6.c b/gcc/testsuite/gcc.target/aarch64/sve/while_6.c new file mode 100644 index 00000000000..b4cc596efe7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/while_6.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=scalable" } */ + +#include + +#define ADD_LOOP(TYPE) \ + TYPE __attribute__ ((noinline, noclone)) \ + vec_while_##TYPE (TYPE *restrict a) \ + { \ + for (int i = 0; i < 7; ++i) \ + a[i] += 1; \ + } + +#define TEST_ALL(T) \ + T (int8_t) \ + T (int16_t) \ + T (int32_t) \ + T (int64_t) + +TEST_ALL (ADD_LOOP) + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl7\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.h, vl7\n} 1 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s,} 2 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d,} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/while_7.c b/gcc/testsuite/gcc.target/aarch64/sve/while_7.c new file mode 100644 index 00000000000..d5ffb66a142 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/while_7.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=scalable" } */ + +#include + +#define ADD_LOOP(TYPE) \ + TYPE __attribute__ ((noinline, noclone)) \ + vec_while_##TYPE (TYPE *restrict a) \ + { \ + for (int i = 0; i < 8; ++i) \ + a[i] += 1; \ + } + +#define TEST_ALL(T) \ + T (int8_t) \ + T (int16_t) \ + T (int32_t) \ + T (int64_t) + +TEST_ALL (ADD_LOOP) + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl8\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.h, vl8\n} 1 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s,} 2 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d,} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/while_8.c b/gcc/testsuite/gcc.target/aarch64/sve/while_8.c new file mode 100644 index 00000000000..1c11aa849a2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/while_8.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=scalable" } */ + +#include + +#define ADD_LOOP(TYPE) \ + TYPE __attribute__ ((noinline, noclone)) \ + vec_while_##TYPE (TYPE *restrict a) \ + { \ + for (int i = 0; i < 9; ++i) \ + a[i] += 1; \ + } + +#define TEST_ALL(T) \ + T (int8_t) \ + T (int16_t) \ + T (int32_t) \ + T (int64_t) + +TEST_ALL (ADD_LOOP) + +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b,} 1 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h,} 2 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s,} 2 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d,} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/while_9.c b/gcc/testsuite/gcc.target/aarch64/sve/while_9.c new file mode 100644 index 00000000000..9a8e5fe12fb --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/while_9.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=scalable" } */ + +#include + +#define ADD_LOOP(TYPE) \ + TYPE __attribute__ ((noinline, noclone)) \ + vec_while_##TYPE (TYPE *restrict a) \ + { \ + for (int i = 0; i < 16; ++i) \ + a[i] += 1; \ + } + +#define TEST_ALL(T) \ + T (int8_t) \ + T (int16_t) \ + T (int32_t) \ + T (int64_t) + +TEST_ALL (ADD_LOOP) + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl16\n} 1 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h,} 2 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s,} 2 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d,} 2 } } */ diff --git a/gcc/tree.c b/gcc/tree.c index 8f80012c6e8..ae292281b1f 100644 --- a/gcc/tree.c +++ b/gcc/tree.c @@ -1981,6 +1981,23 @@ build_index_vector (tree vec_type, poly_uint64 base, poly_uint64 step) return v.build (); } +/* Return a VECTOR_CST of type VEC_TYPE in which the first NUM_A + elements are A and the rest are B. */ + +tree +build_vector_a_then_b (tree vec_type, unsigned int num_a, tree a, tree b) +{ + gcc_assert (known_le (num_a, TYPE_VECTOR_SUBPARTS (vec_type))); + unsigned int count = constant_lower_bound (TYPE_VECTOR_SUBPARTS (vec_type)); + /* Optimize the constant case. */ + if ((count & 1) == 0 && TYPE_VECTOR_SUBPARTS (vec_type).is_constant ()) + count /= 2; + tree_vector_builder builder (vec_type, count, 2); + for (unsigned int i = 0; i < count * 2; ++i) + builder.quick_push (i < num_a ? a : b); + return builder.build (); +} + /* Something has messed with the elements of CONSTRUCTOR C after it was built; calculate TREE_CONSTANT and TREE_SIDE_EFFECTS. */ diff --git a/gcc/tree.h b/gcc/tree.h index 94dbb95a78a..dd54f4d2af5 100644 --- a/gcc/tree.h +++ b/gcc/tree.h @@ -4314,6 +4314,7 @@ extern tree build_vector_from_val (tree, tree); extern tree build_uniform_cst (tree, tree); extern tree build_vec_series (tree, tree, tree); extern tree build_index_vector (tree, poly_uint64, poly_uint64); +extern tree build_vector_a_then_b (tree, unsigned int, tree, tree); extern void recompute_constructor_flags (tree); extern void verify_constructor_flags (tree); extern tree build_constructor (tree, vec * CXX_MEM_STAT_INFO);