diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 0780b31a42e..ab690afa222 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,69 @@ +2019-11-08 Richard Sandiford + + * optabs.def (gather_load_optab, mask_gather_load_optab) + (scatter_store_optab, mask_scatter_store_optab): Turn into + conversion optabs, with the offset mode given explicitly. + * doc/md.texi: Update accordingly. + * config/aarch64/aarch64-sve-builtins-base.cc + (svld1_gather_impl::expand): Likewise. + (svst1_scatter_impl::expand): Likewise. + * internal-fn.c (gather_load_direct, scatter_store_direct): Likewise. + (expand_scatter_store_optab_fn): Likewise. + (direct_gather_load_optab_supported_p): Likewise. + (direct_scatter_store_optab_supported_p): Likewise. + (expand_gather_load_optab_fn): Likewise. Expect the mask argument + to be argument 4. + (internal_fn_mask_index): Return 4 for IFN_MASK_GATHER_LOAD. + (internal_gather_scatter_fn_supported_p): Replace the offset sign + argument with the offset vector type. Require the two vector + types to have the same number of elements but allow their element + sizes to be different. Treat the optabs as conversion optabs. + * internal-fn.h (internal_gather_scatter_fn_supported_p): Update + prototype accordingly. + * optabs-query.c (supports_at_least_one_mode_p): Replace with... + (supports_vec_convert_optab_p): ...this new function. + (supports_vec_gather_load_p): Update accordingly. + (supports_vec_scatter_store_p): Likewise. + * tree-vectorizer.h (vect_gather_scatter_fn_p): Take a vec_info. + Replace the offset sign and bits parameters with a scalar type tree. + * tree-vect-data-refs.c (vect_gather_scatter_fn_p): Likewise. + Pass back the offset vector type instead of the scalar element type. + Allow the offset to be wider than the memory elements. Search for + an offset type that the target supports, stopping once we've + reached the maximum of the element size and pointer size. + Update call to internal_gather_scatter_fn_supported_p. + (vect_check_gather_scatter): Update calls accordingly. + When testing a new scale before knowing the final offset type, + check whether the scale is supported for any signed or unsigned + offset type. Check whether the target supports the source and + target types of a conversion before deciding whether to look + through the conversion. Record the chosen offset_vectype. + * tree-vect-patterns.c (vect_get_gather_scatter_offset_type): Delete. + (vect_recog_gather_scatter_pattern): Get the scalar offset type + directly from the gs_info's offset_vectype instead. Pass a zero + of the result type to IFN_GATHER_LOAD and IFN_MASK_GATHER_LOAD. + * tree-vect-stmts.c (check_load_store_masking): Update call to + internal_gather_scatter_fn_supported_p, passing the offset vector + type recorded in the gs_info. + (vect_truncate_gather_scatter_offset): Update call to + vect_check_gather_scatter, leaving it to search for a valid + offset vector type. + (vect_use_strided_gather_scatters_p): Convert the offset to the + element type of the gs_info's offset_vectype. + (vect_get_gather_scatter_ops): Get the offset vector type directly + from the gs_info. + (vect_get_strided_load_store_ops): Likewise. + (vectorizable_load): Pass a zero of the result type to IFN_GATHER_LOAD + and IFN_MASK_GATHER_LOAD. + * config/aarch64/aarch64-sve.md (gather_load): Rename to... + (gather_load): ...this. + (mask_gather_load): Rename to... + (mask_gather_load): ...this. + (scatter_store): Rename to... + (scatter_store): ...this. + (mask_scatter_store): Rename to... + (mask_scatter_store): ...this. + 2019-11-08 Kewen Lin PR target/92132 diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc index ce70f80e98f..e12882ff399 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc @@ -1076,7 +1076,9 @@ public: /* Put the predicate last, as required by mask_gather_load_optab. */ e.rotate_inputs_left (0, 5); machine_mode mem_mode = e.memory_vector_mode (); - insn_code icode = direct_optab_handler (mask_gather_load_optab, mem_mode); + machine_mode int_mode = aarch64_sve_int_mode (mem_mode); + insn_code icode = convert_optab_handler (mask_gather_load_optab, + mem_mode, int_mode); return e.use_exact_insn (icode); } }; @@ -2043,8 +2045,10 @@ public: e.prepare_gather_address_operands (1); /* Put the predicate last, as required by mask_scatter_store_optab. */ e.rotate_inputs_left (0, 6); - insn_code icode = direct_optab_handler (mask_scatter_store_optab, - e.memory_vector_mode ()); + machine_mode mem_mode = e.memory_vector_mode (); + machine_mode int_mode = aarch64_sve_int_mode (mem_mode); + insn_code icode = convert_optab_handler (mask_scatter_store_optab, + mem_mode, int_mode); return e.use_exact_insn (icode); } }; diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 0cda88287b0..51e876aaa74 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -1336,7 +1336,7 @@ ;; ------------------------------------------------------------------------- ;; Unpredicated gather loads. -(define_expand "gather_load" +(define_expand "gather_load" [(set (match_operand:SVE_SD 0 "register_operand") (unspec:SVE_SD [(match_dup 5) @@ -1354,7 +1354,7 @@ ;; Predicated gather loads for 32-bit elements. Operand 3 is true for ;; unsigned extension and false for signed extension. -(define_insn "mask_gather_load" +(define_insn "mask_gather_load" [(set (match_operand:SVE_S 0 "register_operand" "=w, w, w, w, w, w") (unspec:SVE_S [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") @@ -1376,7 +1376,7 @@ ;; Predicated gather loads for 64-bit elements. The value of operand 3 ;; doesn't matter in this case. -(define_insn "mask_gather_load" +(define_insn "mask_gather_load" [(set (match_operand:SVE_D 0 "register_operand" "=w, w, w, w") (unspec:SVE_D [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl") @@ -1395,7 +1395,7 @@ ) ;; Likewise, but with the offset being sign-extended from 32 bits. -(define_insn "*mask_gather_load_sxtw" +(define_insn "*mask_gather_load_sxtw" [(set (match_operand:SVE_D 0 "register_operand" "=w, w") (unspec:SVE_D [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") @@ -1417,7 +1417,7 @@ ) ;; Likewise, but with the offset being zero-extended from 32 bits. -(define_insn "*mask_gather_load_uxtw" +(define_insn "*mask_gather_load_uxtw" [(set (match_operand:SVE_D 0 "register_operand" "=w, w") (unspec:SVE_D [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") @@ -2054,7 +2054,7 @@ ;; ------------------------------------------------------------------------- ;; Unpredicated scatter stores. -(define_expand "scatter_store" +(define_expand "scatter_store" [(set (mem:BLK (scratch)) (unspec:BLK [(match_dup 5) @@ -2072,7 +2072,7 @@ ;; Predicated scatter stores for 32-bit elements. Operand 2 is true for ;; unsigned extension and false for signed extension. -(define_insn "mask_scatter_store" +(define_insn "mask_scatter_store" [(set (mem:BLK (scratch)) (unspec:BLK [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") @@ -2094,7 +2094,7 @@ ;; Predicated scatter stores for 64-bit elements. The value of operand 2 ;; doesn't matter in this case. -(define_insn "mask_scatter_store" +(define_insn "mask_scatter_store" [(set (mem:BLK (scratch)) (unspec:BLK [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl") @@ -2113,7 +2113,7 @@ ) ;; Likewise, but with the offset being sign-extended from 32 bits. -(define_insn_and_rewrite "*mask_scatter_store_sxtw" +(define_insn_and_rewrite "*mask_scatter_store_sxtw" [(set (mem:BLK (scratch)) (unspec:BLK [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") @@ -2139,7 +2139,7 @@ ) ;; Likewise, but with the offset being zero-extended from 32 bits. -(define_insn "*mask_scatter_store_uxtw" +(define_insn "*mask_scatter_store_uxtw" [(set (mem:BLK (scratch)) (unspec:BLK [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index 19d6893882b..87bbeb4bfc9 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -4959,12 +4959,12 @@ for (j = 0; j < GET_MODE_NUNITS (@var{n}); j++) This pattern is not allowed to @code{FAIL}. -@cindex @code{gather_load@var{m}} instruction pattern -@item @samp{gather_load@var{m}} +@cindex @code{gather_load@var{m}@var{n}} instruction pattern +@item @samp{gather_load@var{m}@var{n}} Load several separate memory locations into a vector of mode @var{m}. -Operand 1 is a scalar base address and operand 2 is a vector of -offsets from that base. Operand 0 is a destination vector with the -same number of elements as the offset. For each element index @var{i}: +Operand 1 is a scalar base address and operand 2 is a vector of mode @var{n} +containing offsets from that base. Operand 0 is a destination vector with +the same number of elements as @var{n}. For each element index @var{i}: @itemize @bullet @item @@ -4981,20 +4981,20 @@ load the value at that address into element @var{i} of operand 0. The value of operand 3 does not matter if the offsets are already address width. -@cindex @code{mask_gather_load@var{m}} instruction pattern -@item @samp{mask_gather_load@var{m}} -Like @samp{gather_load@var{m}}, but takes an extra mask operand as +@cindex @code{mask_gather_load@var{m}@var{n}} instruction pattern +@item @samp{mask_gather_load@var{m}@var{n}} +Like @samp{gather_load@var{m}@var{n}}, but takes an extra mask operand as operand 5. Bit @var{i} of the mask is set if element @var{i} of the result should be loaded from memory and clear if element @var{i} of the result should be set to zero. -@cindex @code{scatter_store@var{m}} instruction pattern -@item @samp{scatter_store@var{m}} +@cindex @code{scatter_store@var{m}@var{n}} instruction pattern +@item @samp{scatter_store@var{m}@var{n}} Store a vector of mode @var{m} into several distinct memory locations. -Operand 0 is a scalar base address and operand 1 is a vector of offsets -from that base. Operand 4 is the vector of values that should be stored, -which has the same number of elements as the offset. For each element -index @var{i}: +Operand 0 is a scalar base address and operand 1 is a vector of mode +@var{n} containing offsets from that base. Operand 4 is the vector of +values that should be stored, which has the same number of elements as +@var{n}. For each element index @var{i}: @itemize @bullet @item @@ -5011,9 +5011,9 @@ store element @var{i} of operand 4 to that address. The value of operand 2 does not matter if the offsets are already address width. -@cindex @code{mask_scatter_store@var{m}} instruction pattern -@item @samp{mask_scatter_store@var{m}} -Like @samp{scatter_store@var{m}}, but takes an extra mask operand as +@cindex @code{mask_scatter_store@var{m}@var{n}} instruction pattern +@item @samp{mask_scatter_store@var{m}@var{n}} +Like @samp{scatter_store@var{m}@var{n}}, but takes an extra mask operand as operand 5. Bit @var{i} of the mask is set if element @var{i} of the result should be stored to memory. diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c index 549d6f1153b..6a878bde24d 100644 --- a/gcc/internal-fn.c +++ b/gcc/internal-fn.c @@ -103,11 +103,11 @@ init_internal_fns () #define mask_load_direct { -1, 2, false } #define load_lanes_direct { -1, -1, false } #define mask_load_lanes_direct { -1, -1, false } -#define gather_load_direct { -1, -1, false } +#define gather_load_direct { 3, 1, false } #define mask_store_direct { 3, 2, false } #define store_lanes_direct { 0, 0, false } #define mask_store_lanes_direct { 0, 0, false } -#define scatter_store_direct { 3, 3, false } +#define scatter_store_direct { 3, 1, false } #define unary_direct { 0, 0, true } #define binary_direct { 0, 0, true } #define ternary_direct { 0, 0, true } @@ -2785,7 +2785,8 @@ expand_scatter_store_optab_fn (internal_fn, gcall *stmt, direct_optab optab) create_input_operand (&ops[i++], mask_rtx, TYPE_MODE (TREE_TYPE (mask))); } - insn_code icode = direct_optab_handler (optab, TYPE_MODE (TREE_TYPE (rhs))); + insn_code icode = convert_optab_handler (optab, TYPE_MODE (TREE_TYPE (rhs)), + TYPE_MODE (TREE_TYPE (offset))); expand_insn (icode, i, ops); } @@ -2813,11 +2814,12 @@ expand_gather_load_optab_fn (internal_fn, gcall *stmt, direct_optab optab) create_integer_operand (&ops[i++], scale_int); if (optab == mask_gather_load_optab) { - tree mask = gimple_call_arg (stmt, 3); + tree mask = gimple_call_arg (stmt, 4); rtx mask_rtx = expand_normal (mask); create_input_operand (&ops[i++], mask_rtx, TYPE_MODE (TREE_TYPE (mask))); } - insn_code icode = direct_optab_handler (optab, TYPE_MODE (TREE_TYPE (lhs))); + insn_code icode = convert_optab_handler (optab, TYPE_MODE (TREE_TYPE (lhs)), + TYPE_MODE (TREE_TYPE (offset))); expand_insn (icode, i, ops); } @@ -3084,11 +3086,11 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types, #define direct_mask_load_optab_supported_p direct_optab_supported_p #define direct_load_lanes_optab_supported_p multi_vector_optab_supported_p #define direct_mask_load_lanes_optab_supported_p multi_vector_optab_supported_p -#define direct_gather_load_optab_supported_p direct_optab_supported_p +#define direct_gather_load_optab_supported_p convert_optab_supported_p #define direct_mask_store_optab_supported_p direct_optab_supported_p #define direct_store_lanes_optab_supported_p multi_vector_optab_supported_p #define direct_mask_store_lanes_optab_supported_p multi_vector_optab_supported_p -#define direct_scatter_store_optab_supported_p direct_optab_supported_p +#define direct_scatter_store_optab_supported_p convert_optab_supported_p #define direct_while_optab_supported_p convert_optab_supported_p #define direct_fold_extract_optab_supported_p direct_optab_supported_p #define direct_fold_left_optab_supported_p direct_optab_supported_p @@ -3513,8 +3515,6 @@ internal_fn_mask_index (internal_fn fn) return 2; case IFN_MASK_GATHER_LOAD: - return 3; - case IFN_MASK_SCATTER_STORE: return 4; @@ -3546,27 +3546,30 @@ internal_fn_stored_value_index (internal_fn fn) IFN. For loads, VECTOR_TYPE is the vector type of the load result, while for stores it is the vector type of the stored data argument. MEMORY_ELEMENT_TYPE is the type of the memory elements being loaded - or stored. OFFSET_SIGN is the sign of the offset argument, which is - only relevant when the offset is narrower than an address. SCALE is - the amount by which the offset should be multiplied *after* it has - been extended to address width. */ + or stored. OFFSET_VECTOR_TYPE is the vector type that holds the + offset from the shared base address of each loaded or stored element. + SCALE is the amount by which these offsets should be multiplied + *after* they have been extended to address width. */ bool internal_gather_scatter_fn_supported_p (internal_fn ifn, tree vector_type, tree memory_element_type, - signop offset_sign, int scale) + tree offset_vector_type, int scale) { if (!tree_int_cst_equal (TYPE_SIZE (TREE_TYPE (vector_type)), TYPE_SIZE (memory_element_type))) return false; + if (maybe_ne (TYPE_VECTOR_SUBPARTS (vector_type), + TYPE_VECTOR_SUBPARTS (offset_vector_type))) + return false; optab optab = direct_internal_fn_optab (ifn); - insn_code icode = direct_optab_handler (optab, TYPE_MODE (vector_type)); + insn_code icode = convert_optab_handler (optab, TYPE_MODE (vector_type), + TYPE_MODE (offset_vector_type)); int output_ops = internal_load_fn_p (ifn) ? 1 : 0; + bool unsigned_p = TYPE_UNSIGNED (TREE_TYPE (offset_vector_type)); return (icode != CODE_FOR_nothing - && insn_operand_matches (icode, 2 + output_ops, - GEN_INT (offset_sign == UNSIGNED)) - && insn_operand_matches (icode, 3 + output_ops, - GEN_INT (scale))); + && insn_operand_matches (icode, 2 + output_ops, GEN_INT (unsigned_p)) + && insn_operand_matches (icode, 3 + output_ops, GEN_INT (scale))); } /* Expand STMT as though it were a call to internal function FN. */ diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h index 7164ee5cf3c..389241a8a06 100644 --- a/gcc/internal-fn.h +++ b/gcc/internal-fn.h @@ -220,7 +220,7 @@ extern bool internal_gather_scatter_fn_p (internal_fn); extern int internal_fn_mask_index (internal_fn); extern int internal_fn_stored_value_index (internal_fn); extern bool internal_gather_scatter_fn_supported_p (internal_fn, tree, - tree, signop, int); + tree, tree, int); extern void expand_internal_call (gcall *); extern void expand_internal_call (internal_fn, gcall *); diff --git a/gcc/optabs-query.c b/gcc/optabs-query.c index 2a066960e22..6465b5cf6c5 100644 --- a/gcc/optabs-query.c +++ b/gcc/optabs-query.c @@ -698,14 +698,18 @@ lshift_cheap_p (bool speed_p) return cheap[speed_p]; } -/* Return true if optab OP supports at least one mode. */ +/* Return true if vector conversion optab OP supports at least one mode, + given that the second mode is always an integer vector. */ static bool -supports_at_least_one_mode_p (optab op) +supports_vec_convert_optab_p (optab op) { for (int i = 0; i < NUM_MACHINE_MODES; ++i) - if (direct_optab_handler (op, (machine_mode) i) != CODE_FOR_nothing) - return true; + if (VECTOR_MODE_P ((machine_mode) i)) + for (int j = MIN_MODE_VECTOR_INT; j < MAX_MODE_VECTOR_INT; ++j) + if (convert_optab_handler (op, (machine_mode) i, + (machine_mode) j) != CODE_FOR_nothing) + return true; return false; } @@ -722,7 +726,7 @@ supports_vec_gather_load_p () this_fn_optabs->supports_vec_gather_load_cached = true; this_fn_optabs->supports_vec_gather_load - = supports_at_least_one_mode_p (gather_load_optab); + = supports_vec_convert_optab_p (gather_load_optab); return this_fn_optabs->supports_vec_gather_load; } @@ -739,7 +743,7 @@ supports_vec_scatter_store_p () this_fn_optabs->supports_vec_scatter_store_cached = true; this_fn_optabs->supports_vec_scatter_store - = supports_at_least_one_mode_p (scatter_store_optab); + = supports_vec_convert_optab_p (scatter_store_optab); return this_fn_optabs->supports_vec_scatter_store; } diff --git a/gcc/optabs.def b/gcc/optabs.def index e9373158fc0..90e177a5cc0 100644 --- a/gcc/optabs.def +++ b/gcc/optabs.def @@ -91,6 +91,10 @@ OPTAB_CD(vec_cmpu_optab, "vec_cmpu$a$b") OPTAB_CD(vec_cmpeq_optab, "vec_cmpeq$a$b") OPTAB_CD(maskload_optab, "maskload$a$b") OPTAB_CD(maskstore_optab, "maskstore$a$b") +OPTAB_CD(gather_load_optab, "gather_load$a$b") +OPTAB_CD(mask_gather_load_optab, "mask_gather_load$a$b") +OPTAB_CD(scatter_store_optab, "scatter_store$a$b") +OPTAB_CD(mask_scatter_store_optab, "mask_scatter_store$a$b") OPTAB_CD(vec_extract_optab, "vec_extract$a$b") OPTAB_CD(vec_init_optab, "vec_init$a$b") @@ -425,11 +429,6 @@ OPTAB_D (atomic_xor_optab, "atomic_xor$I$a") OPTAB_D (get_thread_pointer_optab, "get_thread_pointer$I$a") OPTAB_D (set_thread_pointer_optab, "set_thread_pointer$I$a") -OPTAB_D (gather_load_optab, "gather_load$a") -OPTAB_D (mask_gather_load_optab, "mask_gather_load$a") -OPTAB_D (scatter_store_optab, "scatter_store$a") -OPTAB_D (mask_scatter_store_optab, "mask_scatter_store$a") - OPTAB_DC (vec_duplicate_optab, "vec_duplicate$a", VEC_DUPLICATE) OPTAB_DC (vec_series_optab, "vec_series$a", VEC_SERIES) OPTAB_D (vec_shl_insert_optab, "vec_shl_insert_$a") diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c index 9dd18d26536..36639b697f1 100644 --- a/gcc/tree-vect-data-refs.c +++ b/gcc/tree-vect-data-refs.c @@ -3660,28 +3660,22 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo) /* Check whether we can use an internal function for a gather load or scatter store. READ_P is true for loads and false for stores. MASKED_P is true if the load or store is conditional. MEMORY_TYPE is - the type of the memory elements being loaded or stored. OFFSET_BITS - is the number of bits in each scalar offset and OFFSET_SIGN is the - sign of the offset. SCALE is the amount by which the offset should + the type of the memory elements being loaded or stored. OFFSET_TYPE + is the type of the offset that is being applied to the invariant + base address. SCALE is the amount by which the offset should be multiplied *after* it has been converted to address width. - Return true if the function is supported, storing the function - id in *IFN_OUT and the type of a vector element in *ELEMENT_TYPE_OUT. */ + Return true if the function is supported, storing the function id in + *IFN_OUT and the vector type for the offset in *OFFSET_VECTYPE_OUT. */ bool -vect_gather_scatter_fn_p (bool read_p, bool masked_p, tree vectype, - tree memory_type, unsigned int offset_bits, - signop offset_sign, int scale, - internal_fn *ifn_out, tree *element_type_out) +vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p, + tree vectype, tree memory_type, tree offset_type, + int scale, internal_fn *ifn_out, + tree *offset_vectype_out) { unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type)); unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))); - if (offset_bits > element_bits) - /* Internal functions require the offset to be the same width as - the vector elements. We can extend narrower offsets, but it isn't - safe to truncate wider offsets. */ - return false; - if (element_bits != memory_bits) /* For now the vector elements must be the same width as the memory elements. */ @@ -3694,14 +3688,28 @@ vect_gather_scatter_fn_p (bool read_p, bool masked_p, tree vectype, else ifn = masked_p ? IFN_MASK_SCATTER_STORE : IFN_SCATTER_STORE; - /* Test whether the target supports this combination. */ - if (!internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type, - offset_sign, scale)) - return false; + for (;;) + { + tree offset_vectype = get_vectype_for_scalar_type (vinfo, offset_type); + if (!offset_vectype) + return false; - *ifn_out = ifn; - *element_type_out = TREE_TYPE (vectype); - return true; + /* Test whether the target supports this combination. */ + if (internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type, + offset_vectype, scale)) + { + *ifn_out = ifn; + *offset_vectype_out = offset_vectype; + return true; + } + + if (TYPE_PRECISION (offset_type) >= POINTER_SIZE + && TYPE_PRECISION (offset_type) >= element_bits) + return false; + + offset_type = build_nonstandard_integer_type + (TYPE_PRECISION (offset_type) * 2, TYPE_UNSIGNED (offset_type)); + } } /* STMT_INFO is a call to an internal gather load or scatter store function. @@ -3744,7 +3752,7 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo, machine_mode pmode; int punsignedp, reversep, pvolatilep = 0; internal_fn ifn; - tree element_type; + tree offset_vectype; bool masked_p = false; /* See whether this is already a call to a gather/scatter internal function. @@ -3905,13 +3913,18 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo, { int new_scale = tree_to_shwi (op1); /* Only treat this as a scaling operation if the target - supports it. */ + supports it for at least some offset type. */ if (use_ifn_p - && !vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, - vectype, memory_type, 1, - TYPE_SIGN (TREE_TYPE (op0)), + && !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), + masked_p, vectype, memory_type, + signed_char_type_node, new_scale, &ifn, - &element_type)) + &offset_vectype) + && !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), + masked_p, vectype, memory_type, + unsigned_char_type_node, + new_scale, &ifn, + &offset_vectype)) break; scale = new_scale; off = op0; @@ -3925,6 +3938,16 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo, if (!POINTER_TYPE_P (TREE_TYPE (op0)) && !INTEGRAL_TYPE_P (TREE_TYPE (op0))) break; + + /* Don't include the conversion if the target is happy with + the current offset type. */ + if (use_ifn_p + && vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), + masked_p, vectype, memory_type, + TREE_TYPE (off), scale, &ifn, + &offset_vectype)) + break; + if (TYPE_PRECISION (TREE_TYPE (op0)) == TYPE_PRECISION (TREE_TYPE (off))) { @@ -3932,14 +3955,6 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo, continue; } - /* The internal functions need the offset to be the same width - as the elements of VECTYPE. Don't include operations that - cast the offset from that width to a different width. */ - if (use_ifn_p - && (int_size_in_bytes (TREE_TYPE (vectype)) - == int_size_in_bytes (TREE_TYPE (off)))) - break; - if (TYPE_PRECISION (TREE_TYPE (op0)) < TYPE_PRECISION (TREE_TYPE (off))) { @@ -3966,10 +3981,9 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo, if (use_ifn_p) { - if (!vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, vectype, - memory_type, TYPE_PRECISION (offtype), - TYPE_SIGN (offtype), scale, &ifn, - &element_type)) + if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p, + vectype, memory_type, offtype, scale, + &ifn, &offset_vectype)) return false; } else @@ -3989,7 +4003,8 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo, return false; ifn = IFN_LAST; - element_type = TREE_TYPE (vectype); + /* The offset vector type will be read from DECL when needed. */ + offset_vectype = NULL_TREE; } info->ifn = ifn; @@ -3997,9 +4012,9 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo, info->base = base; info->offset = off; info->offset_dt = vect_unknown_def_type; - info->offset_vectype = NULL_TREE; + info->offset_vectype = offset_vectype; info->scale = scale; - info->element_type = element_type; + info->element_type = TREE_TYPE (vectype); info->memory_type = memory_type; return true; } diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c index c0fdde68697..8ebbcd76b64 100644 --- a/gcc/tree-vect-patterns.c +++ b/gcc/tree-vect-patterns.c @@ -4498,28 +4498,6 @@ vect_get_load_store_mask (stmt_vec_info stmt_info) gcc_unreachable (); } -/* Return the scalar offset type that an internal gather/scatter function - should use. GS_INFO describes the gather/scatter operation. */ - -static tree -vect_get_gather_scatter_offset_type (gather_scatter_info *gs_info) -{ - tree offset_type = TREE_TYPE (gs_info->offset); - unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (gs_info->element_type)); - - /* Enforced by vect_check_gather_scatter. */ - unsigned int offset_bits = TYPE_PRECISION (offset_type); - gcc_assert (element_bits >= offset_bits); - - /* If the offset is narrower than the elements, extend it according - to its sign. */ - if (element_bits > offset_bits) - return build_nonstandard_integer_type (element_bits, - TYPE_UNSIGNED (offset_type)); - - return offset_type; -} - /* Return MASK if MASK is suitable for masking an operation on vectors of type VECTYPE, otherwise convert it into such a form and return the result. Associate any conversion statements with STMT_INFO's @@ -4604,7 +4582,7 @@ vect_recog_gather_scatter_pattern (stmt_vec_info stmt_info, tree *type_out) /* Get the invariant base and non-invariant offset, converting the latter to the same width as the vector elements. */ tree base = gs_info.base; - tree offset_type = vect_get_gather_scatter_offset_type (&gs_info); + tree offset_type = TREE_TYPE (gs_info.offset_vectype); tree offset = vect_add_conversion_to_pattern (offset_type, gs_info.offset, stmt_info); @@ -4613,12 +4591,13 @@ vect_recog_gather_scatter_pattern (stmt_vec_info stmt_info, tree *type_out) gcall *pattern_stmt; if (DR_IS_READ (dr)) { + tree zero = build_zero_cst (gs_info.element_type); if (mask != NULL) - pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base, - offset, scale, mask); + pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base, + offset, scale, zero, mask); else - pattern_stmt = gimple_build_call_internal (gs_info.ifn, 3, base, - offset, scale); + pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base, + offset, scale, zero); tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL); gimple_call_set_lhs (pattern_stmt, load_lhs); } diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 3cda8888521..2bbc783ffe0 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -1910,10 +1910,9 @@ check_load_store_masking (loop_vec_info loop_vinfo, tree vectype, internal_fn ifn = (is_load ? IFN_MASK_GATHER_LOAD : IFN_MASK_SCATTER_STORE); - tree offset_type = TREE_TYPE (gs_info->offset); if (!internal_gather_scatter_fn_supported_p (ifn, vectype, gs_info->memory_type, - TYPE_SIGN (offset_type), + gs_info->offset_vectype, gs_info->scale)) { if (dump_enabled_p ()) @@ -2046,35 +2045,33 @@ vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info, if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor)) continue; - /* See whether we can calculate (COUNT - 1) * STEP / SCALE - in OFFSET_BITS bits. */ + /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */ widest_int range = wi::mul (count, factor, SIGNED, &overflow); if (overflow) continue; signop sign = range >= 0 ? UNSIGNED : SIGNED; - if (wi::min_precision (range, sign) > element_bits) - { - overflow = wi::OVF_UNKNOWN; - continue; - } + unsigned int min_offset_bits = wi::min_precision (range, sign); - /* See whether the target supports the operation. */ - tree memory_type = TREE_TYPE (DR_REF (dr)); - if (!vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, vectype, - memory_type, element_bits, sign, scale, - &gs_info->ifn, &gs_info->element_type)) - continue; - - tree offset_type = build_nonstandard_integer_type (element_bits, + /* Find the narrowest viable offset type. */ + unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits); + tree offset_type = build_nonstandard_integer_type (offset_bits, sign == UNSIGNED); + /* See whether the target supports the operation with an offset + no narrower than OFFSET_TYPE. */ + tree memory_type = TREE_TYPE (DR_REF (dr)); + if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p, + vectype, memory_type, offset_type, scale, + &gs_info->ifn, &gs_info->offset_vectype)) + continue; + gs_info->decl = NULL_TREE; /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET, but we don't need to store that here. */ gs_info->base = NULL_TREE; + gs_info->element_type = TREE_TYPE (vectype); gs_info->offset = fold_convert (offset_type, step); gs_info->offset_dt = vect_constant_def; - gs_info->offset_vectype = NULL_TREE; gs_info->scale = scale; gs_info->memory_type = memory_type; return true; @@ -2104,22 +2101,12 @@ vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info, return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo, masked_p, gs_info); - scalar_mode element_mode = SCALAR_TYPE_MODE (gs_info->element_type); - unsigned int element_bits = GET_MODE_BITSIZE (element_mode); - tree offset_type = TREE_TYPE (gs_info->offset); - unsigned int offset_bits = TYPE_PRECISION (offset_type); + tree old_offset_type = TREE_TYPE (gs_info->offset); + tree new_offset_type = TREE_TYPE (gs_info->offset_vectype); - /* Enforced by vect_check_gather_scatter. */ - gcc_assert (element_bits >= offset_bits); - - /* If the elements are wider than the offset, convert the offset to the - same width, without changing its sign. */ - if (element_bits > offset_bits) - { - bool unsigned_p = TYPE_UNSIGNED (offset_type); - offset_type = build_nonstandard_integer_type (element_bits, unsigned_p); - gs_info->offset = fold_convert (offset_type, gs_info->offset); - } + gcc_assert (TYPE_PRECISION (new_offset_type) + >= TYPE_PRECISION (old_offset_type)); + gs_info->offset = fold_convert (new_offset_type, gs_info->offset); if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, @@ -2963,7 +2950,6 @@ vect_get_gather_scatter_ops (class loop *loop, stmt_vec_info stmt_info, gather_scatter_info *gs_info, tree *dataref_ptr, tree *vec_offset) { - vec_info *vinfo = stmt_info->vinfo; gimple_seq stmts = NULL; *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE); if (stmts != NULL) @@ -2973,10 +2959,8 @@ vect_get_gather_scatter_ops (class loop *loop, stmt_vec_info stmt_info, new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); gcc_assert (!new_bb); } - tree offset_type = TREE_TYPE (gs_info->offset); - tree offset_vectype = get_vectype_for_scalar_type (vinfo, offset_type); *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt_info, - offset_vectype); + gs_info->offset_vectype); } /* Prepare to implement a grouped or strided load or store using @@ -3009,8 +2993,7 @@ vect_get_strided_load_store_ops (stmt_vec_info stmt_info, /* The offset given in GS_INFO can have pointer type, so use the element type of the vector instead. */ tree offset_type = TREE_TYPE (gs_info->offset); - tree offset_vectype = get_vectype_for_scalar_type (loop_vinfo, offset_type); - offset_type = TREE_TYPE (offset_vectype); + offset_type = TREE_TYPE (gs_info->offset_vectype); /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */ tree step = size_binop (EXACT_DIV_EXPR, DR_STEP (dr), @@ -3019,7 +3002,7 @@ vect_get_strided_load_store_ops (stmt_vec_info stmt_info, step = force_gimple_operand (step, &stmts, true, NULL_TREE); /* Create {0, X, X*2, X*3, ...}. */ - *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, offset_vectype, + *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, gs_info->offset_vectype, build_zero_cst (offset_type), step); if (stmts) gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); @@ -9442,16 +9425,17 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, if (memory_access_type == VMAT_GATHER_SCATTER) { + tree zero = build_zero_cst (vectype); tree scale = size_int (gs_info.scale); gcall *call; if (loop_masks) call = gimple_build_call_internal - (IFN_MASK_GATHER_LOAD, 4, dataref_ptr, - vec_offset, scale, final_mask); + (IFN_MASK_GATHER_LOAD, 5, dataref_ptr, + vec_offset, scale, zero, final_mask); else call = gimple_build_call_internal - (IFN_GATHER_LOAD, 3, dataref_ptr, - vec_offset, scale); + (IFN_GATHER_LOAD, 4, dataref_ptr, + vec_offset, scale, zero); gimple_call_set_nothrow (call, true); new_stmt = call; data_ref = NULL_TREE; diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 6b4e92e78af..96eb1f52927 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -1678,8 +1678,8 @@ extern opt_result vect_verify_datarefs_alignment (loop_vec_info); extern bool vect_slp_analyze_and_verify_instance_alignment (slp_instance); extern opt_result vect_analyze_data_ref_accesses (vec_info *); extern opt_result vect_prune_runtime_alias_test_list (loop_vec_info); -extern bool vect_gather_scatter_fn_p (bool, bool, tree, tree, unsigned int, - signop, int, internal_fn *, tree *); +extern bool vect_gather_scatter_fn_p (vec_info *, bool, bool, tree, tree, + tree, int, internal_fn *, tree *); extern bool vect_check_gather_scatter (stmt_vec_info, loop_vec_info, gather_scatter_info *); extern opt_result vect_find_stmt_data_reference (loop_p, gimple *,