VECT: Apply LEN_MASK_GATHER_LOAD/SCATTER_STORE into vectorizer
Hi, Richard and Richi. Address comments from Richi. Make gs_info.ifn = LEN_MASK_GATHER_LOAD/LEN_MASK_SCATTER_STORE. I have fully tested these 4 format: length = vf is a dummpy length, mask = {-1,-1, ... } is a dummy mask. 1. no length, no mask LEN_MASK_GATHER_LOAD (..., length = vf, mask = {-1,-1,...}) 2. exist length, no mask LEN_MASK_GATHER_LOAD (..., len, mask = {-1,-1,...}) 3. exist mask, no length LEN_MASK_GATHER_LOAD (..., length = vf, mask) 4. both mask and length exist LEN_MASK_GATHER_LOAD (..., length, mask) All of these work fine in this patch. Here is the example: void f (int *restrict a, int *restrict b, int n, int base, int step, int *restrict cond) { for (int i = 0; i < n; ++i) { if (cond[i]) a[i * 4] = b[i]; } } Gimple IR: <bb 3> [local count: 105119324]: _58 = (unsigned long) n_13(D); <bb 4> [local count: 630715945]: # vectp_cond.7_45 = PHI <vectp_cond.7_46(4), cond_14(D)(3)> # vectp_b.11_51 = PHI <vectp_b.11_52(4), b_15(D)(3)> # vectp_a.14_55 = PHI <vectp_a.14_56(4), a_16(D)(3)> # ivtmp_59 = PHI <ivtmp_60(4), _58(3)> _61 = .SELECT_VL (ivtmp_59, POLY_INT_CST [2, 2]); ivtmp_44 = _61 * 4; vect__4.9_47 = .LEN_MASK_LOAD (vectp_cond.7_45, 32B, _61, 0, { -1, ... }); mask__24.10_49 = vect__4.9_47 != { 0, ... }; vect__8.13_53 = .LEN_MASK_LOAD (vectp_b.11_51, 32B, _61, 0, mask__24.10_49); ivtmp_54 = _61 * 16; .LEN_MASK_SCATTER_STORE (vectp_a.14_55, { 0, 16, 32, ... }, 1, vect__8.13_53, _61, 0, mask__24.10_49); vectp_cond.7_46 = vectp_cond.7_45 + ivtmp_44; vectp_b.11_52 = vectp_b.11_51 + ivtmp_44; vectp_a.14_56 = vectp_a.14_55 + ivtmp_54; ivtmp_60 = ivtmp_59 - _61; if (ivtmp_60 != 0) goto <bb 4>; [83.33%] else goto <bb 5>; [16.67%] Ok for trunk ? gcc/ChangeLog: * internal-fn.cc (internal_fn_len_index): Apply LEN_MASK_GATHER_LOAD/SCATTER_STORE into vectorizer. (internal_fn_mask_index): Ditto. * optabs-query.cc (supports_vec_gather_load_p): Ditto. (supports_vec_scatter_store_p): Ditto. * tree-vect-data-refs.cc (vect_gather_scatter_fn_p): Ditto. * tree-vect-patterns.cc (vect_recog_gather_scatter_pattern): Ditto. * tree-vect-stmts.cc (check_load_store_for_partial_vectors): Ditto. (vect_get_strided_load_store_ops): Ditto. (vectorizable_store): Ditto. (vectorizable_load): Ditto.
This commit is contained in:
parent
f4a2ae2338
commit
34c614b7e9
5 changed files with 129 additions and 23 deletions
|
@ -4472,7 +4472,7 @@ internal_fn_len_index (internal_fn fn)
|
|||
|
||||
case IFN_LEN_MASK_GATHER_LOAD:
|
||||
case IFN_LEN_MASK_SCATTER_STORE:
|
||||
return 4;
|
||||
return 5;
|
||||
|
||||
default:
|
||||
return -1;
|
||||
|
@ -4497,11 +4497,9 @@ internal_fn_mask_index (internal_fn fn)
|
|||
case IFN_MASK_SCATTER_STORE:
|
||||
case IFN_LEN_MASK_LOAD:
|
||||
case IFN_LEN_MASK_STORE:
|
||||
return 4;
|
||||
|
||||
case IFN_LEN_MASK_GATHER_LOAD:
|
||||
case IFN_LEN_MASK_SCATTER_STORE:
|
||||
return 6;
|
||||
return 4;
|
||||
|
||||
default:
|
||||
return (conditional_internal_fn_code (fn) != ERROR_MARK
|
||||
|
|
|
@ -676,6 +676,7 @@ supports_vec_gather_load_p (machine_mode mode)
|
|||
this_fn_optabs->supports_vec_gather_load[mode]
|
||||
= (supports_vec_convert_optab_p (gather_load_optab, mode)
|
||||
|| supports_vec_convert_optab_p (mask_gather_load_optab, mode)
|
||||
|| supports_vec_convert_optab_p (len_mask_gather_load_optab, mode)
|
||||
? 1 : -1);
|
||||
|
||||
return this_fn_optabs->supports_vec_gather_load[mode] > 0;
|
||||
|
@ -692,6 +693,7 @@ supports_vec_scatter_store_p (machine_mode mode)
|
|||
this_fn_optabs->supports_vec_scatter_store[mode]
|
||||
= (supports_vec_convert_optab_p (scatter_store_optab, mode)
|
||||
|| supports_vec_convert_optab_p (mask_scatter_store_optab, mode)
|
||||
|| supports_vec_convert_optab_p (len_mask_scatter_store_optab, mode)
|
||||
? 1 : -1);
|
||||
|
||||
return this_fn_optabs->supports_vec_scatter_store[mode] > 0;
|
||||
|
|
|
@ -3873,16 +3873,24 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
|
|||
return false;
|
||||
|
||||
/* Work out which function we need. */
|
||||
internal_fn ifn, alt_ifn;
|
||||
internal_fn ifn, alt_ifn, alt_ifn2;
|
||||
if (read_p)
|
||||
{
|
||||
ifn = masked_p ? IFN_MASK_GATHER_LOAD : IFN_GATHER_LOAD;
|
||||
alt_ifn = IFN_MASK_GATHER_LOAD;
|
||||
/* When target supports LEN_MASK_GATHER_LOAD, we always
|
||||
use LEN_MASK_GATHER_LOAD regardless whether len and
|
||||
mask are valid or not. */
|
||||
alt_ifn2 = IFN_LEN_MASK_GATHER_LOAD;
|
||||
}
|
||||
else
|
||||
{
|
||||
ifn = masked_p ? IFN_MASK_SCATTER_STORE : IFN_SCATTER_STORE;
|
||||
alt_ifn = IFN_MASK_SCATTER_STORE;
|
||||
/* When target supports LEN_MASK_SCATTER_STORE, we always
|
||||
use LEN_MASK_SCATTER_STORE regardless whether len and
|
||||
mask are valid or not. */
|
||||
alt_ifn2 = IFN_LEN_MASK_SCATTER_STORE;
|
||||
}
|
||||
|
||||
for (;;)
|
||||
|
@ -3909,6 +3917,14 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
|
|||
*offset_vectype_out = offset_vectype;
|
||||
return true;
|
||||
}
|
||||
else if (internal_gather_scatter_fn_supported_p (alt_ifn2, vectype,
|
||||
memory_type,
|
||||
offset_vectype, scale))
|
||||
{
|
||||
*ifn_out = alt_ifn2;
|
||||
*offset_vectype_out = offset_vectype;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (TYPE_PRECISION (offset_type) >= POINTER_SIZE
|
||||
&& TYPE_PRECISION (offset_type) >= element_bits)
|
||||
|
|
|
@ -6075,7 +6075,9 @@ vect_recog_gather_scatter_pattern (vec_info *vinfo,
|
|||
mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
|
||||
loop_vinfo);
|
||||
else if (gs_info.ifn == IFN_MASK_SCATTER_STORE
|
||||
|| gs_info.ifn == IFN_MASK_GATHER_LOAD)
|
||||
|| gs_info.ifn == IFN_MASK_GATHER_LOAD
|
||||
|| gs_info.ifn == IFN_LEN_MASK_SCATTER_STORE
|
||||
|| gs_info.ifn == IFN_LEN_MASK_GATHER_LOAD)
|
||||
mask = build_int_cst (TREE_TYPE (truth_type_for (gs_vectype)), -1);
|
||||
|
||||
/* Get the invariant base and non-invariant offset, converting the
|
||||
|
|
|
@ -1786,6 +1786,18 @@ check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
|
|||
gs_info->offset_vectype,
|
||||
gs_info->scale))
|
||||
{
|
||||
ifn = (is_load
|
||||
? IFN_LEN_MASK_GATHER_LOAD
|
||||
: IFN_LEN_MASK_SCATTER_STORE);
|
||||
if (internal_gather_scatter_fn_supported_p (ifn, vectype,
|
||||
gs_info->memory_type,
|
||||
gs_info->offset_vectype,
|
||||
gs_info->scale))
|
||||
{
|
||||
vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
|
||||
vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
|
||||
return;
|
||||
}
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||
"can't operate on partial vectors because"
|
||||
|
@ -3144,16 +3156,39 @@ vect_get_gather_scatter_ops (loop_vec_info loop_vinfo,
|
|||
static void
|
||||
vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
|
||||
loop_vec_info loop_vinfo,
|
||||
gimple_stmt_iterator *gsi,
|
||||
gather_scatter_info *gs_info,
|
||||
tree *dataref_bump, tree *vec_offset)
|
||||
tree *dataref_bump, tree *vec_offset,
|
||||
vec_loop_lens *loop_lens)
|
||||
{
|
||||
struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
|
||||
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
|
||||
|
||||
tree bump = size_binop (MULT_EXPR,
|
||||
fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
|
||||
size_int (TYPE_VECTOR_SUBPARTS (vectype)));
|
||||
*dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
|
||||
if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
|
||||
{
|
||||
/* _31 = .SELECT_VL (ivtmp_29, POLY_INT_CST [4, 4]);
|
||||
ivtmp_8 = _31 * 16 (step in bytes);
|
||||
.LEN_MASK_SCATTER_STORE (vectp_a.9_7, ... );
|
||||
vectp_a.9_26 = vectp_a.9_7 + ivtmp_8; */
|
||||
tree loop_len
|
||||
= vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, vectype, 0, 0);
|
||||
tree tmp
|
||||
= fold_build2 (MULT_EXPR, sizetype,
|
||||
fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
|
||||
loop_len);
|
||||
tree bump = make_temp_ssa_name (sizetype, NULL, "ivtmp");
|
||||
gassign *assign = gimple_build_assign (bump, tmp);
|
||||
gsi_insert_before (gsi, assign, GSI_SAME_STMT);
|
||||
*dataref_bump = bump;
|
||||
}
|
||||
else
|
||||
{
|
||||
tree bump
|
||||
= size_binop (MULT_EXPR,
|
||||
fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
|
||||
size_int (TYPE_VECTOR_SUBPARTS (vectype)));
|
||||
*dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
|
||||
}
|
||||
|
||||
/* The offset given in GS_INFO can have pointer type, so use the element
|
||||
type of the vector instead. */
|
||||
|
@ -8700,8 +8735,8 @@ vectorizable_store (vec_info *vinfo,
|
|||
else if (memory_access_type == VMAT_GATHER_SCATTER)
|
||||
{
|
||||
aggr_type = elem_type;
|
||||
vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
|
||||
&bump, &vec_offset);
|
||||
vect_get_strided_load_store_ops (stmt_info, loop_vinfo, gsi, &gs_info,
|
||||
&bump, &vec_offset, loop_lens);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -8930,6 +8965,8 @@ vectorizable_store (vec_info *vinfo,
|
|||
unsigned HOST_WIDE_INT align;
|
||||
|
||||
tree final_mask = NULL_TREE;
|
||||
tree final_len = NULL_TREE;
|
||||
tree bias = NULL_TREE;
|
||||
if (loop_masks)
|
||||
final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
|
||||
vec_num * ncopies,
|
||||
|
@ -8944,8 +8981,36 @@ vectorizable_store (vec_info *vinfo,
|
|||
if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
|
||||
vec_offset = vec_offsets[vec_num * j + i];
|
||||
tree scale = size_int (gs_info.scale);
|
||||
|
||||
if (gs_info.ifn == IFN_LEN_MASK_SCATTER_STORE)
|
||||
{
|
||||
if (loop_lens)
|
||||
final_len
|
||||
= vect_get_loop_len (loop_vinfo, gsi, loop_lens,
|
||||
vec_num * ncopies, vectype,
|
||||
vec_num * j + i, 1);
|
||||
else
|
||||
final_len
|
||||
= build_int_cst (sizetype,
|
||||
TYPE_VECTOR_SUBPARTS (vectype));
|
||||
signed char biasval
|
||||
= LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
|
||||
bias = build_int_cst (intQI_type_node, biasval);
|
||||
if (!final_mask)
|
||||
{
|
||||
mask_vectype = truth_type_for (vectype);
|
||||
final_mask = build_minus_one_cst (mask_vectype);
|
||||
}
|
||||
}
|
||||
|
||||
gcall *call;
|
||||
if (final_mask)
|
||||
if (final_len && final_mask)
|
||||
call
|
||||
= gimple_build_call_internal (IFN_LEN_MASK_SCATTER_STORE,
|
||||
7, dataref_ptr, vec_offset,
|
||||
scale, vec_oprnd, final_mask,
|
||||
final_len, bias);
|
||||
else if (final_mask)
|
||||
call = gimple_build_call_internal
|
||||
(IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
|
||||
scale, vec_oprnd, final_mask);
|
||||
|
@ -9062,9 +9127,6 @@ vectorizable_store (vec_info *vinfo,
|
|||
machine_mode vmode = TYPE_MODE (vectype);
|
||||
machine_mode new_vmode = vmode;
|
||||
internal_fn partial_ifn = IFN_LAST;
|
||||
/* Produce 'len' and 'bias' argument. */
|
||||
tree final_len = NULL_TREE;
|
||||
tree bias = NULL_TREE;
|
||||
if (loop_lens)
|
||||
{
|
||||
opt_machine_mode new_ovmode
|
||||
|
@ -10192,8 +10254,8 @@ vectorizable_load (vec_info *vinfo,
|
|||
else if (memory_access_type == VMAT_GATHER_SCATTER)
|
||||
{
|
||||
aggr_type = elem_type;
|
||||
vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
|
||||
&bump, &vec_offset);
|
||||
vect_get_strided_load_store_ops (stmt_info, loop_vinfo, gsi, &gs_info,
|
||||
&bump, &vec_offset, loop_lens);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -10354,6 +10416,8 @@ vectorizable_load (vec_info *vinfo,
|
|||
for (i = 0; i < vec_num; i++)
|
||||
{
|
||||
tree final_mask = NULL_TREE;
|
||||
tree final_len = NULL_TREE;
|
||||
tree bias = NULL_TREE;
|
||||
if (loop_masks
|
||||
&& memory_access_type != VMAT_INVARIANT)
|
||||
final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
|
||||
|
@ -10383,8 +10447,35 @@ vectorizable_load (vec_info *vinfo,
|
|||
vec_offset = vec_offsets[vec_num * j + i];
|
||||
tree zero = build_zero_cst (vectype);
|
||||
tree scale = size_int (gs_info.scale);
|
||||
|
||||
if (gs_info.ifn == IFN_LEN_MASK_GATHER_LOAD)
|
||||
{
|
||||
if (loop_lens)
|
||||
final_len
|
||||
= vect_get_loop_len (loop_vinfo, gsi, loop_lens,
|
||||
vec_num * ncopies, vectype,
|
||||
vec_num * j + i, 1);
|
||||
else
|
||||
final_len = build_int_cst (sizetype,
|
||||
TYPE_VECTOR_SUBPARTS (
|
||||
vectype));
|
||||
signed char biasval
|
||||
= LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
|
||||
bias = build_int_cst (intQI_type_node, biasval);
|
||||
if (!final_mask)
|
||||
{
|
||||
mask_vectype = truth_type_for (vectype);
|
||||
final_mask = build_minus_one_cst (mask_vectype);
|
||||
}
|
||||
}
|
||||
|
||||
gcall *call;
|
||||
if (final_mask)
|
||||
if (final_len && final_mask)
|
||||
call = gimple_build_call_internal (
|
||||
IFN_LEN_MASK_GATHER_LOAD, 7, dataref_ptr,
|
||||
vec_offset, scale, zero, final_mask, final_len,
|
||||
bias);
|
||||
else if (final_mask)
|
||||
call = gimple_build_call_internal
|
||||
(IFN_MASK_GATHER_LOAD, 5, dataref_ptr,
|
||||
vec_offset, scale, zero, final_mask);
|
||||
|
@ -10477,9 +10568,6 @@ vectorizable_load (vec_info *vinfo,
|
|||
machine_mode vmode = TYPE_MODE (vectype);
|
||||
machine_mode new_vmode = vmode;
|
||||
internal_fn partial_ifn = IFN_LAST;
|
||||
/* Produce 'len' and 'bias' argument. */
|
||||
tree final_len = NULL_TREE;
|
||||
tree bias = NULL_TREE;
|
||||
if (loop_lens)
|
||||
{
|
||||
opt_machine_mode new_ovmode
|
||||
|
|
Loading…
Add table
Reference in a new issue