omp-low.c (struct omp_context): Add scan_inclusive field.
* omp-low.c (struct omp_context): Add scan_inclusive field. (scan_omp_1_stmt) <case GIMPLE_OMP_SCAN>: Set ctx->scan_inclusive if inclusive scan. (struct omplow_simd_context): Add lastlane member. (lower_rec_simd_input_clauses): Add rvar argument, handle inscan reductions. Build 2 or 3 argument .GOMP_SIMD_LANE calls rather than 1 or 2 argument. (lower_rec_input_clauses): Handle inscan reductions in simd contexts. (lower_lastprivate_clauses): Set TREE_THIS_NOTRAP on the ARRAY_REF. (lower_omp_scan): New function. (lower_omp_1) <case GIMPLE_OMP_SCAN>: Use lower_omp_scan. * tree-ssa-dce.c (eliminate_unnecessary_stmts): For IFN_GOMP_SIMD_LANE check 3rd argument if present rather than 2nd. * tree-vectorizer.h (struct _loop_vec_info): Add scan_map member. (struct _stmt_vec_info): Change simd_lane_access_p from bool into 2-bit bitfield. * tree-vect-loop.c (_loop_vec_info::_loop_vec_info): Initialize scan_map. For IFN_GOMP_SIMD_LANE check 3rd argument if present rather than 2nd. (_loop_vec_info::~_loop_vec_info): Delete scan_map. * tree-vect-data-refs.c (vect_analyze_data_ref_accesses): Allow two different STMT_VINFO_SIMD_LANE_ACCESS_P refs if they have the same init. (vect_find_stmt_data_reference): Encode in ->aux the 2nd IFN_GOMP_SIMD_LANE argument. (vect_analyze_data_refs): Set STMT_VINFO_SIMD_LANE_ACCESS_P from the encoded ->aux value. * tree-vect-stmts.c: Include attribs.h. (vectorizable_call): Adjust comment about IFN_GOMP_SIMD_LANE. (scan_operand_equal_p, check_scan_store, vectorizable_scan_store): New functions. (vectorizable_load): For STMT_VINFO_SIMD_LANE_ACCESS_P tests use != 0. (vectorizable_store): Handle STMT_VINFO_SIMD_LANE_ACCESS_P > 1. cp/ * semantics.c (finish_omp_clauses): For OMP_CLAUSE_REDUCTION_INSCAN set need_copy_assignment. testsuite/ * gcc.dg/vect/vect-simd-8.c: New test. * gcc.dg/vect/vect-simd-9.c: New test. * g++.dg/vect/simd-2.cc: New test. * g++.dg/gomp/scan-1.C: New test. From-SVN: r272399
This commit is contained in:
parent
a064fd4c73
commit
0356aab806
14 changed files with 1348 additions and 34 deletions
|
@ -1,3 +1,39 @@
|
|||
2019-06-17 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
* omp-low.c (struct omp_context): Add scan_inclusive field.
|
||||
(scan_omp_1_stmt) <case GIMPLE_OMP_SCAN>: Set ctx->scan_inclusive
|
||||
if inclusive scan.
|
||||
(struct omplow_simd_context): Add lastlane member.
|
||||
(lower_rec_simd_input_clauses): Add rvar argument, handle inscan
|
||||
reductions. Build 2 or 3 argument .GOMP_SIMD_LANE calls rather than
|
||||
1 or 2 argument.
|
||||
(lower_rec_input_clauses): Handle inscan reductions in simd contexts.
|
||||
(lower_lastprivate_clauses): Set TREE_THIS_NOTRAP on the ARRAY_REF.
|
||||
(lower_omp_scan): New function.
|
||||
(lower_omp_1) <case GIMPLE_OMP_SCAN>: Use lower_omp_scan.
|
||||
* tree-ssa-dce.c (eliminate_unnecessary_stmts): For IFN_GOMP_SIMD_LANE
|
||||
check 3rd argument if present rather than 2nd.
|
||||
* tree-vectorizer.h (struct _loop_vec_info): Add scan_map member.
|
||||
(struct _stmt_vec_info): Change simd_lane_access_p from bool into
|
||||
2-bit bitfield.
|
||||
* tree-vect-loop.c (_loop_vec_info::_loop_vec_info): Initialize
|
||||
scan_map. For IFN_GOMP_SIMD_LANE check 3rd argument if present rather
|
||||
than 2nd.
|
||||
(_loop_vec_info::~_loop_vec_info): Delete scan_map.
|
||||
* tree-vect-data-refs.c (vect_analyze_data_ref_accesses): Allow two
|
||||
different STMT_VINFO_SIMD_LANE_ACCESS_P refs if they have the same
|
||||
init.
|
||||
(vect_find_stmt_data_reference): Encode in ->aux the 2nd
|
||||
IFN_GOMP_SIMD_LANE argument.
|
||||
(vect_analyze_data_refs): Set STMT_VINFO_SIMD_LANE_ACCESS_P from the
|
||||
encoded ->aux value.
|
||||
* tree-vect-stmts.c: Include attribs.h.
|
||||
(vectorizable_call): Adjust comment about IFN_GOMP_SIMD_LANE.
|
||||
(scan_operand_equal_p, check_scan_store, vectorizable_scan_store): New
|
||||
functions.
|
||||
(vectorizable_load): For STMT_VINFO_SIMD_LANE_ACCESS_P tests use != 0.
|
||||
(vectorizable_store): Handle STMT_VINFO_SIMD_LANE_ACCESS_P > 1.
|
||||
|
||||
2019-06-17 Uroš Bizjak <ubizjak@gmail.com>
|
||||
|
||||
PR target/62055
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
2019-06-17 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
* semantics.c (finish_omp_clauses): For OMP_CLAUSE_REDUCTION_INSCAN
|
||||
set need_copy_assignment.
|
||||
|
||||
2019-06-17 Marek Polacek <polacek@redhat.com>
|
||||
|
||||
PR c++/83820 - excessive attribute arguments not detected.
|
||||
|
|
|
@ -7688,6 +7688,8 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
|
|||
case OMP_CLAUSE_REDUCTION:
|
||||
if (reduction_seen == -2)
|
||||
OMP_CLAUSE_REDUCTION_INSCAN (c) = 0;
|
||||
if (OMP_CLAUSE_REDUCTION_INSCAN (c))
|
||||
need_copy_assignment = true;
|
||||
need_implicitly_determined = true;
|
||||
break;
|
||||
case OMP_CLAUSE_IN_REDUCTION:
|
||||
|
|
377
gcc/omp-low.c
377
gcc/omp-low.c
|
@ -141,6 +141,9 @@ struct omp_context
|
|||
/* True if lower_omp_1 should look up lastprivate conditional in parent
|
||||
context. */
|
||||
bool combined_into_simd_safelen0;
|
||||
|
||||
/* True if there is nested scan context with inclusive clause. */
|
||||
bool scan_inclusive;
|
||||
};
|
||||
|
||||
static splay_tree all_contexts;
|
||||
|
@ -3329,11 +3332,15 @@ scan_omp_1_stmt (gimple_stmt_iterator *gsi, bool *handled_ops_p,
|
|||
scan_omp_single (as_a <gomp_single *> (stmt), ctx);
|
||||
break;
|
||||
|
||||
case GIMPLE_OMP_SCAN:
|
||||
if (tree clauses = gimple_omp_scan_clauses (as_a <gomp_scan *> (stmt)))
|
||||
if (OMP_CLAUSE_CODE (clauses) == OMP_CLAUSE_INCLUSIVE)
|
||||
ctx->scan_inclusive = true;
|
||||
/* FALLTHRU */
|
||||
case GIMPLE_OMP_SECTION:
|
||||
case GIMPLE_OMP_MASTER:
|
||||
case GIMPLE_OMP_ORDERED:
|
||||
case GIMPLE_OMP_CRITICAL:
|
||||
case GIMPLE_OMP_SCAN:
|
||||
case GIMPLE_OMP_GRID_BODY:
|
||||
ctx = new_omp_context (stmt, ctx);
|
||||
scan_omp (gimple_omp_body_ptr (stmt), ctx);
|
||||
|
@ -3671,6 +3678,7 @@ struct omplow_simd_context {
|
|||
omplow_simd_context () { memset (this, 0, sizeof (*this)); }
|
||||
tree idx;
|
||||
tree lane;
|
||||
tree lastlane;
|
||||
vec<tree, va_heap> simt_eargs;
|
||||
gimple_seq simt_dlist;
|
||||
poly_uint64_pod max_vf;
|
||||
|
@ -3682,7 +3690,8 @@ struct omplow_simd_context {
|
|||
|
||||
static bool
|
||||
lower_rec_simd_input_clauses (tree new_var, omp_context *ctx,
|
||||
omplow_simd_context *sctx, tree &ivar, tree &lvar)
|
||||
omplow_simd_context *sctx, tree &ivar,
|
||||
tree &lvar, tree *rvar = NULL)
|
||||
{
|
||||
if (known_eq (sctx->max_vf, 0U))
|
||||
{
|
||||
|
@ -3738,7 +3747,27 @@ lower_rec_simd_input_clauses (tree new_var, omp_context *ctx,
|
|||
= tree_cons (get_identifier ("omp simd array"), NULL,
|
||||
DECL_ATTRIBUTES (avar));
|
||||
gimple_add_tmp_var (avar);
|
||||
ivar = build4 (ARRAY_REF, TREE_TYPE (new_var), avar, sctx->idx,
|
||||
tree iavar = avar;
|
||||
if (rvar)
|
||||
{
|
||||
/* For inscan reductions, create another array temporary,
|
||||
which will hold the reduced value. */
|
||||
iavar = create_tmp_var_raw (atype);
|
||||
if (TREE_ADDRESSABLE (new_var))
|
||||
TREE_ADDRESSABLE (iavar) = 1;
|
||||
DECL_ATTRIBUTES (iavar)
|
||||
= tree_cons (get_identifier ("omp simd array"), NULL,
|
||||
tree_cons (get_identifier ("omp simd inscan"), NULL,
|
||||
DECL_ATTRIBUTES (iavar)));
|
||||
gimple_add_tmp_var (iavar);
|
||||
ctx->cb.decl_map->put (avar, iavar);
|
||||
if (sctx->lastlane == NULL_TREE)
|
||||
sctx->lastlane = create_tmp_var (unsigned_type_node);
|
||||
*rvar = build4 (ARRAY_REF, TREE_TYPE (new_var), iavar,
|
||||
sctx->lastlane, NULL_TREE, NULL_TREE);
|
||||
TREE_THIS_NOTRAP (*rvar) = 1;
|
||||
}
|
||||
ivar = build4 (ARRAY_REF, TREE_TYPE (new_var), iavar, sctx->idx,
|
||||
NULL_TREE, NULL_TREE);
|
||||
lvar = build4 (ARRAY_REF, TREE_TYPE (new_var), avar, sctx->lane,
|
||||
NULL_TREE, NULL_TREE);
|
||||
|
@ -3814,7 +3843,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
|
|||
omplow_simd_context sctx = omplow_simd_context ();
|
||||
tree simt_lane = NULL_TREE, simtrec = NULL_TREE;
|
||||
tree ivar = NULL_TREE, lvar = NULL_TREE, uid = NULL_TREE;
|
||||
gimple_seq llist[3] = { };
|
||||
gimple_seq llist[4] = { };
|
||||
tree nonconst_simd_if = NULL_TREE;
|
||||
|
||||
copyin_seq = NULL;
|
||||
|
@ -5155,9 +5184,14 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
|
|||
new_vard = TREE_OPERAND (new_var, 0);
|
||||
gcc_assert (DECL_P (new_vard));
|
||||
}
|
||||
tree rvar = NULL_TREE, *rvarp = NULL;
|
||||
if (is_simd
|
||||
&& OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
|
||||
&& OMP_CLAUSE_REDUCTION_INSCAN (c))
|
||||
rvarp = &rvar;
|
||||
if (is_simd
|
||||
&& lower_rec_simd_input_clauses (new_var, ctx, &sctx,
|
||||
ivar, lvar))
|
||||
ivar, lvar, rvarp))
|
||||
{
|
||||
if (new_vard == new_var)
|
||||
{
|
||||
|
@ -5173,6 +5207,93 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
|
|||
x = lang_hooks.decls.omp_clause_default_ctor
|
||||
(c, unshare_expr (ivar),
|
||||
build_outer_var_ref (var, ctx));
|
||||
if (rvarp)
|
||||
{
|
||||
if (x)
|
||||
{
|
||||
gimplify_and_add (x, &llist[0]);
|
||||
|
||||
tree ivar2 = unshare_expr (lvar);
|
||||
TREE_OPERAND (ivar2, 1) = sctx.idx;
|
||||
x = lang_hooks.decls.omp_clause_default_ctor
|
||||
(c, ivar2, build_outer_var_ref (var, ctx));
|
||||
gimplify_and_add (x, &llist[0]);
|
||||
|
||||
/* For types that need construction, add another
|
||||
private var which will be default constructed
|
||||
and optionally initialized with
|
||||
OMP_CLAUSE_REDUCTION_GIMPLE_INIT, as in the
|
||||
loop we want to assign this value instead of
|
||||
constructing and destructing it in each
|
||||
iteration. */
|
||||
tree nv = create_tmp_var_raw (TREE_TYPE (ivar));
|
||||
gimple_add_tmp_var (nv);
|
||||
ctx->cb.decl_map->put (TREE_OPERAND (ivar, 0),
|
||||
nv);
|
||||
x = lang_hooks.decls.omp_clause_default_ctor
|
||||
(c, nv, build_outer_var_ref (var, ctx));
|
||||
gimplify_and_add (x, ilist);
|
||||
|
||||
if (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c))
|
||||
{
|
||||
tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c);
|
||||
x = DECL_VALUE_EXPR (new_var);
|
||||
SET_DECL_VALUE_EXPR (new_var, nv);
|
||||
lower_omp (&tseq, ctx);
|
||||
SET_DECL_VALUE_EXPR (new_var, x);
|
||||
gimple_seq_add_seq (ilist, tseq);
|
||||
OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL;
|
||||
}
|
||||
|
||||
x = lang_hooks.decls.omp_clause_dtor (c, nv);
|
||||
if (x)
|
||||
{
|
||||
tseq = NULL;
|
||||
dtor = x;
|
||||
gimplify_stmt (&dtor, &tseq);
|
||||
gimple_seq_add_seq (dlist, tseq);
|
||||
}
|
||||
}
|
||||
|
||||
tree ref = build_outer_var_ref (var, ctx);
|
||||
x = unshare_expr (ivar);
|
||||
x = lang_hooks.decls.omp_clause_assign_op (c, x,
|
||||
ref);
|
||||
gimplify_and_add (x, &llist[0]);
|
||||
|
||||
ref = build_outer_var_ref (var, ctx);
|
||||
x = lang_hooks.decls.omp_clause_assign_op (c, ref,
|
||||
rvar);
|
||||
gimplify_and_add (x, &llist[3]);
|
||||
|
||||
DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
|
||||
if (new_vard == new_var)
|
||||
SET_DECL_VALUE_EXPR (new_var, lvar);
|
||||
else
|
||||
SET_DECL_VALUE_EXPR (new_vard,
|
||||
build_fold_addr_expr (lvar));
|
||||
|
||||
x = lang_hooks.decls.omp_clause_dtor (c, ivar);
|
||||
if (x)
|
||||
{
|
||||
tseq = NULL;
|
||||
dtor = x;
|
||||
gimplify_stmt (&dtor, &tseq);
|
||||
gimple_seq_add_seq (&llist[1], tseq);
|
||||
}
|
||||
|
||||
tree ivar2 = unshare_expr (lvar);
|
||||
TREE_OPERAND (ivar2, 1) = sctx.idx;
|
||||
x = lang_hooks.decls.omp_clause_dtor (c, ivar2);
|
||||
if (x)
|
||||
{
|
||||
tseq = NULL;
|
||||
dtor = x;
|
||||
gimplify_stmt (&dtor, &tseq);
|
||||
gimple_seq_add_seq (&llist[1], tseq);
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (x)
|
||||
gimplify_and_add (x, &llist[0]);
|
||||
if (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c))
|
||||
|
@ -5240,6 +5361,41 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
|
|||
: build_outer_var_ref (var, ctx));
|
||||
if (x)
|
||||
gimplify_and_add (x, ilist);
|
||||
|
||||
if (rvarp)
|
||||
{
|
||||
if (x)
|
||||
{
|
||||
tree nv = create_tmp_var_raw (TREE_TYPE (new_vard));
|
||||
gimple_add_tmp_var (nv);
|
||||
ctx->cb.decl_map->put (new_var, nv);
|
||||
x = lang_hooks.decls.omp_clause_default_ctor
|
||||
(c, nv, build_outer_var_ref (var, ctx));
|
||||
gimplify_and_add (x, ilist);
|
||||
if (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c))
|
||||
{
|
||||
tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c);
|
||||
SET_DECL_VALUE_EXPR (new_var, nv);
|
||||
DECL_HAS_VALUE_EXPR_P (new_var) = 1;
|
||||
lower_omp (&tseq, ctx);
|
||||
SET_DECL_VALUE_EXPR (new_var, NULL_TREE);
|
||||
DECL_HAS_VALUE_EXPR_P (new_var) = 0;
|
||||
gimple_seq_add_seq (ilist, tseq);
|
||||
}
|
||||
OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL;
|
||||
x = lang_hooks.decls.omp_clause_dtor (c, nv);
|
||||
if (x)
|
||||
{
|
||||
tseq = NULL;
|
||||
dtor = x;
|
||||
gimplify_stmt (&dtor, &tseq);
|
||||
gimple_seq_add_seq (dlist, tseq);
|
||||
}
|
||||
}
|
||||
DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
|
||||
goto do_dtor;
|
||||
}
|
||||
|
||||
if (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c))
|
||||
{
|
||||
tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c);
|
||||
|
@ -5324,12 +5480,32 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
|
|||
new_vard = TREE_OPERAND (new_var, 0);
|
||||
gcc_assert (DECL_P (new_vard));
|
||||
}
|
||||
tree rvar = NULL_TREE, *rvarp = NULL;
|
||||
if (is_simd
|
||||
&& OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
|
||||
&& OMP_CLAUSE_REDUCTION_INSCAN (c))
|
||||
rvarp = &rvar;
|
||||
if (is_simd
|
||||
&& lower_rec_simd_input_clauses (new_var, ctx, &sctx,
|
||||
ivar, lvar))
|
||||
ivar, lvar, rvarp))
|
||||
{
|
||||
if (new_vard != new_var)
|
||||
{
|
||||
SET_DECL_VALUE_EXPR (new_vard,
|
||||
build_fold_addr_expr (lvar));
|
||||
DECL_HAS_VALUE_EXPR_P (new_vard) = 1;
|
||||
}
|
||||
|
||||
tree ref = build_outer_var_ref (var, ctx);
|
||||
|
||||
if (rvarp)
|
||||
{
|
||||
gimplify_assign (ivar, ref, &llist[0]);
|
||||
ref = build_outer_var_ref (var, ctx);
|
||||
gimplify_assign (ref, rvar, &llist[3]);
|
||||
break;
|
||||
}
|
||||
|
||||
gimplify_assign (unshare_expr (ivar), x, &llist[0]);
|
||||
|
||||
if (sctx.is_simt)
|
||||
|
@ -5346,14 +5522,8 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
|
|||
ref = build_outer_var_ref (var, ctx);
|
||||
gimplify_assign (ref, x, &llist[1]);
|
||||
|
||||
if (new_vard != new_var)
|
||||
{
|
||||
SET_DECL_VALUE_EXPR (new_vard,
|
||||
build_fold_addr_expr (lvar));
|
||||
DECL_HAS_VALUE_EXPR_P (new_vard) = 1;
|
||||
}
|
||||
}
|
||||
else
|
||||
else if (rvarp == NULL)
|
||||
{
|
||||
if (omp_is_reference (var) && is_simd)
|
||||
handle_simd_reference (clause_loc, new_vard, ilist);
|
||||
|
@ -5456,14 +5626,23 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
|
|||
if (sctx.lane)
|
||||
{
|
||||
gimple *g = gimple_build_call_internal (IFN_GOMP_SIMD_LANE,
|
||||
1 + (nonconst_simd_if != NULL),
|
||||
uid, nonconst_simd_if);
|
||||
2 + (nonconst_simd_if != NULL),
|
||||
uid, integer_zero_node,
|
||||
nonconst_simd_if);
|
||||
gimple_call_set_lhs (g, sctx.lane);
|
||||
gimple_stmt_iterator gsi = gsi_start_1 (gimple_omp_body_ptr (ctx->stmt));
|
||||
gsi_insert_before_without_update (&gsi, g, GSI_SAME_STMT);
|
||||
g = gimple_build_assign (sctx.lane, INTEGER_CST,
|
||||
build_int_cst (unsigned_type_node, 0));
|
||||
gimple_seq_add_stmt (ilist, g);
|
||||
if (sctx.lastlane)
|
||||
{
|
||||
g = gimple_build_call_internal (IFN_GOMP_SIMD_LAST_LANE,
|
||||
2, uid, sctx.lane);
|
||||
gimple_call_set_lhs (g, sctx.lastlane);
|
||||
gimple_seq_add_stmt (dlist, g);
|
||||
gimple_seq_add_seq (dlist, llist[3]);
|
||||
}
|
||||
/* Emit reductions across SIMT lanes in log_2(simt_vf) steps. */
|
||||
if (llist[2])
|
||||
{
|
||||
|
@ -5865,6 +6044,7 @@ lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *body_p,
|
|||
new_var = build4 (ARRAY_REF, TREE_TYPE (val),
|
||||
TREE_OPERAND (val, 0), lastlane,
|
||||
NULL_TREE, NULL_TREE);
|
||||
TREE_THIS_NOTRAP (new_var) = 1;
|
||||
}
|
||||
}
|
||||
else if (maybe_simt)
|
||||
|
@ -8371,6 +8551,167 @@ lower_omp_ordered (gimple_stmt_iterator *gsi_p, omp_context *ctx)
|
|||
}
|
||||
|
||||
|
||||
/* Expand code for an OpenMP scan directive and the structured block
|
||||
before the scan directive. */
|
||||
|
||||
static void
|
||||
lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx)
|
||||
{
|
||||
gimple *stmt = gsi_stmt (*gsi_p);
|
||||
bool has_clauses
|
||||
= gimple_omp_scan_clauses (as_a <gomp_scan *> (stmt)) != NULL;
|
||||
tree lane = NULL_TREE;
|
||||
gimple_seq before = NULL;
|
||||
omp_context *octx = ctx->outer;
|
||||
gcc_assert (octx);
|
||||
bool input_phase = has_clauses ^ octx->scan_inclusive;
|
||||
if (gimple_code (octx->stmt) == GIMPLE_OMP_FOR
|
||||
&& (gimple_omp_for_kind (octx->stmt) & GF_OMP_FOR_SIMD)
|
||||
&& !gimple_omp_for_combined_into_p (octx->stmt)
|
||||
&& octx->scan_inclusive)
|
||||
{
|
||||
if (tree c = omp_find_clause (gimple_omp_for_clauses (octx->stmt),
|
||||
OMP_CLAUSE__SIMDUID_))
|
||||
{
|
||||
tree uid = OMP_CLAUSE__SIMDUID__DECL (c);
|
||||
lane = create_tmp_var (unsigned_type_node);
|
||||
tree t = build_int_cst (integer_type_node, 1 + !input_phase);
|
||||
gimple *g
|
||||
= gimple_build_call_internal (IFN_GOMP_SIMD_LANE, 2, uid, t);
|
||||
gimple_call_set_lhs (g, lane);
|
||||
gimple_seq_add_stmt (&before, g);
|
||||
}
|
||||
for (tree c = gimple_omp_for_clauses (octx->stmt);
|
||||
c; c = OMP_CLAUSE_CHAIN (c))
|
||||
if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
|
||||
&& OMP_CLAUSE_REDUCTION_INSCAN (c))
|
||||
{
|
||||
tree var = OMP_CLAUSE_DECL (c);
|
||||
tree new_var = lookup_decl (var, octx);
|
||||
tree val = new_var;
|
||||
tree var2 = NULL_TREE;
|
||||
tree var3 = NULL_TREE;
|
||||
if (DECL_HAS_VALUE_EXPR_P (new_var))
|
||||
{
|
||||
val = DECL_VALUE_EXPR (new_var);
|
||||
if (TREE_CODE (val) == ARRAY_REF
|
||||
&& VAR_P (TREE_OPERAND (val, 0)))
|
||||
{
|
||||
tree v = TREE_OPERAND (val, 0);
|
||||
if (lookup_attribute ("omp simd array",
|
||||
DECL_ATTRIBUTES (v)))
|
||||
{
|
||||
val = unshare_expr (val);
|
||||
TREE_OPERAND (val, 1) = lane;
|
||||
var2 = lookup_decl (v, octx);
|
||||
if (input_phase
|
||||
&& OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
|
||||
var3 = maybe_lookup_decl (var2, octx);
|
||||
if (!input_phase)
|
||||
{
|
||||
var2 = build4 (ARRAY_REF, TREE_TYPE (val),
|
||||
var2, lane, NULL_TREE, NULL_TREE);
|
||||
TREE_THIS_NOTRAP (var2) = 1;
|
||||
}
|
||||
else
|
||||
var2 = val;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
var2 = build_outer_var_ref (var, octx);
|
||||
if (input_phase && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
|
||||
{
|
||||
var3 = maybe_lookup_decl (new_var, octx);
|
||||
if (var3 == new_var)
|
||||
var3 = NULL_TREE;
|
||||
}
|
||||
}
|
||||
if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
|
||||
{
|
||||
tree placeholder = OMP_CLAUSE_REDUCTION_PLACEHOLDER (c);
|
||||
if (input_phase)
|
||||
{
|
||||
if (var3)
|
||||
{
|
||||
/* If we've added a separate identity element
|
||||
variable, copy it over into val. */
|
||||
tree x = lang_hooks.decls.omp_clause_assign_op (c, val,
|
||||
var3);
|
||||
gimplify_and_add (x, &before);
|
||||
}
|
||||
else if (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c))
|
||||
{
|
||||
/* Otherwise, assign to it the identity element. */
|
||||
gimple_seq tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c);
|
||||
tree x = (DECL_HAS_VALUE_EXPR_P (new_var)
|
||||
? DECL_VALUE_EXPR (new_var) : NULL_TREE);
|
||||
tree ref = build_outer_var_ref (var, octx);
|
||||
SET_DECL_VALUE_EXPR (new_var, val);
|
||||
SET_DECL_VALUE_EXPR (placeholder, ref);
|
||||
DECL_HAS_VALUE_EXPR_P (placeholder) = 1;
|
||||
lower_omp (&tseq, octx);
|
||||
SET_DECL_VALUE_EXPR (new_var, x);
|
||||
SET_DECL_VALUE_EXPR (placeholder, NULL_TREE);
|
||||
DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
|
||||
if (x == NULL_TREE)
|
||||
DECL_HAS_VALUE_EXPR_P (new_var) = 0;
|
||||
gimple_seq_add_seq (&before, tseq);
|
||||
OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
gimple_seq tseq = OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c);
|
||||
tree x = (DECL_HAS_VALUE_EXPR_P (new_var)
|
||||
? DECL_VALUE_EXPR (new_var) : NULL_TREE);
|
||||
SET_DECL_VALUE_EXPR (new_var, val);
|
||||
SET_DECL_VALUE_EXPR (placeholder, var2);
|
||||
DECL_HAS_VALUE_EXPR_P (placeholder) = 1;
|
||||
lower_omp (&tseq, octx);
|
||||
gimple_seq_add_seq (&before, tseq);
|
||||
OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c) = NULL;
|
||||
SET_DECL_VALUE_EXPR (new_var, x);
|
||||
SET_DECL_VALUE_EXPR (placeholder, NULL_TREE);
|
||||
DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
|
||||
x = lang_hooks.decls.omp_clause_assign_op (c, val, var2);
|
||||
gimplify_and_add (x, &before);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (input_phase)
|
||||
{
|
||||
/* input phase. Set val to initializer before
|
||||
the body. */
|
||||
tree x = omp_reduction_init (c, TREE_TYPE (new_var));
|
||||
gimplify_assign (val, x, &before);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* scan phase. */
|
||||
enum tree_code code = OMP_CLAUSE_REDUCTION_CODE (c);
|
||||
if (code == MINUS_EXPR)
|
||||
code = PLUS_EXPR;
|
||||
|
||||
tree x = build2 (code, TREE_TYPE (var2),
|
||||
unshare_expr (var2), unshare_expr (val));
|
||||
gimplify_assign (unshare_expr (var2), x, &before);
|
||||
gimplify_assign (val, var2, &before);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (has_clauses)
|
||||
sorry_at (gimple_location (stmt),
|
||||
"%<#pragma omp scan%> not supported yet");
|
||||
gsi_insert_seq_after (gsi_p, gimple_omp_body (stmt), GSI_SAME_STMT);
|
||||
gsi_insert_seq_after (gsi_p, before, GSI_SAME_STMT);
|
||||
gsi_replace (gsi_p, gimple_build_nop (), true);
|
||||
}
|
||||
|
||||
|
||||
/* Gimplify a GIMPLE_OMP_CRITICAL statement. This is a relatively simple
|
||||
substitution of a couple of function calls. But in the NAMED case,
|
||||
requires that languages coordinate a symbol name. It is therefore
|
||||
|
@ -10843,11 +11184,7 @@ lower_omp_1 (gimple_stmt_iterator *gsi_p, omp_context *ctx)
|
|||
case GIMPLE_OMP_SCAN:
|
||||
ctx = maybe_lookup_ctx (stmt);
|
||||
gcc_assert (ctx);
|
||||
gsi_insert_seq_after (gsi_p, gimple_omp_body (stmt), GSI_SAME_STMT);
|
||||
if (gimple_omp_scan_clauses (as_a <gomp_scan *> (stmt)))
|
||||
sorry_at (gimple_location (stmt),
|
||||
"%<#pragma omp scan%> not supported yet");
|
||||
gsi_replace (gsi_p, gimple_build_nop (), true);
|
||||
lower_omp_scan (gsi_p, ctx);
|
||||
break;
|
||||
case GIMPLE_OMP_CRITICAL:
|
||||
ctx = maybe_lookup_ctx (stmt);
|
||||
|
|
|
@ -1,3 +1,10 @@
|
|||
2019-06-17 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
* gcc.dg/vect/vect-simd-8.c: New test.
|
||||
* gcc.dg/vect/vect-simd-9.c: New test.
|
||||
* g++.dg/vect/simd-2.cc: New test.
|
||||
* g++.dg/gomp/scan-1.C: New test.
|
||||
|
||||
2019-06-17 Uroš Bizjak <ubizjak@gmail.com>
|
||||
|
||||
PR target/62055
|
||||
|
|
26
gcc/testsuite/g++.dg/gomp/scan-1.C
Normal file
26
gcc/testsuite/g++.dg/gomp/scan-1.C
Normal file
|
@ -0,0 +1,26 @@
|
|||
// { dg-do compile { target c++11 } }
|
||||
|
||||
struct S { S (); ~S (); S &operator = (const S &) = delete; int s; }; // { dg-message "declared here" }
|
||||
#pragma omp declare reduction (+ : S : omp_out.s += omp_in.s)
|
||||
|
||||
S s;
|
||||
|
||||
void
|
||||
foo (void)
|
||||
{
|
||||
#pragma omp simd reduction (+: s)
|
||||
for (int i = 0; i < 64; ++i)
|
||||
s.s += i;
|
||||
}
|
||||
|
||||
void
|
||||
bar (int *x)
|
||||
{
|
||||
#pragma omp simd reduction (inscan, +: s) // { dg-error "use of deleted function" }
|
||||
for (int i = 0; i < 64; ++i)
|
||||
{
|
||||
s.s += i;
|
||||
#pragma omp scan inclusive (s) // { dg-error "" }
|
||||
x[i] = s.s;
|
||||
}
|
||||
}
|
153
gcc/testsuite/g++.dg/vect/simd-2.cc
Normal file
153
gcc/testsuite/g++.dg/vect/simd-2.cc
Normal file
|
@ -0,0 +1,153 @@
|
|||
// { dg-require-effective-target size32plus }
|
||||
// { dg-additional-options "-fopenmp-simd" }
|
||||
// { dg-additional-options "-mavx" { target avx_runtime } }
|
||||
// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { xfail *-*-* } } }
|
||||
|
||||
#include "../../gcc.dg/vect/tree-vect.h"
|
||||
|
||||
struct S {
|
||||
inline S ();
|
||||
inline ~S ();
|
||||
inline S (const S &);
|
||||
inline S & operator= (const S &);
|
||||
int s;
|
||||
};
|
||||
|
||||
S::S () : s (0)
|
||||
{
|
||||
}
|
||||
|
||||
S::~S ()
|
||||
{
|
||||
}
|
||||
|
||||
S::S (const S &x)
|
||||
{
|
||||
s = x.s;
|
||||
}
|
||||
|
||||
S &
|
||||
S::operator= (const S &x)
|
||||
{
|
||||
s = x.s;
|
||||
return *this;
|
||||
}
|
||||
|
||||
static inline void
|
||||
ini (S &x)
|
||||
{
|
||||
x.s = 0;
|
||||
}
|
||||
|
||||
S r, a[1024], b[1024];
|
||||
|
||||
#pragma omp declare reduction (+: S: omp_out.s += omp_in.s)
|
||||
#pragma omp declare reduction (plus: S: omp_out.s += omp_in.s) initializer (ini (omp_priv))
|
||||
|
||||
__attribute__((noipa)) void
|
||||
foo (S *a, S *b)
|
||||
{
|
||||
#pragma omp simd reduction (inscan, +:r)
|
||||
for (int i = 0; i < 1024; i++)
|
||||
{
|
||||
r.s += a[i].s;
|
||||
#pragma omp scan inclusive(r)
|
||||
b[i] = r;
|
||||
}
|
||||
}
|
||||
|
||||
__attribute__((noipa)) S
|
||||
bar (void)
|
||||
{
|
||||
S s;
|
||||
#pragma omp simd reduction (inscan, plus:s)
|
||||
for (int i = 0; i < 1024; i++)
|
||||
{
|
||||
s.s += 2 * a[i].s;
|
||||
#pragma omp scan inclusive(s)
|
||||
b[i] = s;
|
||||
}
|
||||
return S (s);
|
||||
}
|
||||
|
||||
__attribute__((noipa)) void
|
||||
baz (S *a, S *b)
|
||||
{
|
||||
#pragma omp simd reduction (inscan, +:r) simdlen(1)
|
||||
for (int i = 0; i < 1024; i++)
|
||||
{
|
||||
r.s += a[i].s;
|
||||
#pragma omp scan inclusive(r)
|
||||
b[i] = r;
|
||||
}
|
||||
}
|
||||
|
||||
__attribute__((noipa)) S
|
||||
qux (void)
|
||||
{
|
||||
S s;
|
||||
#pragma omp simd if (0) reduction (inscan, plus:s)
|
||||
for (int i = 0; i < 1024; i++)
|
||||
{
|
||||
s.s += 2 * a[i].s;
|
||||
#pragma omp scan inclusive(s)
|
||||
b[i] = s;
|
||||
}
|
||||
return S (s);
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
S s;
|
||||
check_vect ();
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
a[i].s = i;
|
||||
b[i].s = -1;
|
||||
asm ("" : "+g" (i));
|
||||
}
|
||||
foo (a, b);
|
||||
if (r.s != 1024 * 1023 / 2)
|
||||
abort ();
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
s.s += i;
|
||||
if (b[i].s != s.s)
|
||||
abort ();
|
||||
else
|
||||
b[i].s = 25;
|
||||
}
|
||||
if (bar ().s != 1024 * 1023)
|
||||
abort ();
|
||||
s.s = 0;
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
s.s += 2 * i;
|
||||
if (b[i].s != s.s)
|
||||
abort ();
|
||||
}
|
||||
r.s = 0;
|
||||
baz (a, b);
|
||||
if (r.s != 1024 * 1023 / 2)
|
||||
abort ();
|
||||
s.s = 0;
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
s.s += i;
|
||||
if (b[i].s != s.s)
|
||||
abort ();
|
||||
else
|
||||
b[i].s = 25;
|
||||
}
|
||||
if (qux ().s != 1024 * 1023)
|
||||
abort ();
|
||||
s.s = 0;
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
s.s += 2 * i;
|
||||
if (b[i].s != s.s)
|
||||
abort ();
|
||||
}
|
||||
return 0;
|
||||
}
|
118
gcc/testsuite/gcc.dg/vect/vect-simd-8.c
Normal file
118
gcc/testsuite/gcc.dg/vect/vect-simd-8.c
Normal file
|
@ -0,0 +1,118 @@
|
|||
/* { dg-require-effective-target size32plus } */
|
||||
/* { dg-additional-options "-fopenmp-simd" } */
|
||||
/* { dg-additional-options "-mavx" { target avx_runtime } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */
|
||||
|
||||
#include "tree-vect.h"
|
||||
|
||||
int r, a[1024], b[1024];
|
||||
|
||||
__attribute__((noipa)) void
|
||||
foo (int *a, int *b)
|
||||
{
|
||||
#pragma omp simd reduction (inscan, +:r)
|
||||
for (int i = 0; i < 1024; i++)
|
||||
{
|
||||
r += a[i];
|
||||
#pragma omp scan inclusive(r)
|
||||
b[i] = r;
|
||||
}
|
||||
}
|
||||
|
||||
__attribute__((noipa)) int
|
||||
bar (void)
|
||||
{
|
||||
int s = 0;
|
||||
#pragma omp simd reduction (inscan, +:s)
|
||||
for (int i = 0; i < 1024; i++)
|
||||
{
|
||||
s += 2 * a[i];
|
||||
#pragma omp scan inclusive(s)
|
||||
b[i] = s;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
__attribute__((noipa)) void
|
||||
baz (int *a, int *b)
|
||||
{
|
||||
#pragma omp simd reduction (inscan, +:r) if (simd: 0)
|
||||
for (int i = 0; i < 1024; i++)
|
||||
{
|
||||
r += a[i];
|
||||
#pragma omp scan inclusive(r)
|
||||
b[i] = r;
|
||||
}
|
||||
}
|
||||
|
||||
__attribute__((noipa)) int
|
||||
qux (void)
|
||||
{
|
||||
int s = 0;
|
||||
#pragma omp simd reduction (inscan, +:s) simdlen (1)
|
||||
for (int i = 0; i < 1024; i++)
|
||||
{
|
||||
s += 2 * a[i];
|
||||
#pragma omp scan inclusive(s)
|
||||
b[i] = s;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
int s = 0;
|
||||
check_vect ();
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
a[i] = i;
|
||||
b[i] = -1;
|
||||
asm ("" : "+g" (i));
|
||||
}
|
||||
foo (a, b);
|
||||
if (r != 1024 * 1023 / 2)
|
||||
abort ();
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
s += i;
|
||||
if (b[i] != s)
|
||||
abort ();
|
||||
else
|
||||
b[i] = 25;
|
||||
}
|
||||
if (bar () != 1024 * 1023)
|
||||
abort ();
|
||||
s = 0;
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
s += 2 * i;
|
||||
if (b[i] != s)
|
||||
abort ();
|
||||
else
|
||||
b[i] = -1;
|
||||
}
|
||||
r = 0;
|
||||
baz (a, b);
|
||||
if (r != 1024 * 1023 / 2)
|
||||
abort ();
|
||||
s = 0;
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
s += i;
|
||||
if (b[i] != s)
|
||||
abort ();
|
||||
else
|
||||
b[i] = -25;
|
||||
}
|
||||
if (qux () != 1024 * 1023)
|
||||
abort ();
|
||||
s = 0;
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
s += 2 * i;
|
||||
if (b[i] != s)
|
||||
abort ();
|
||||
}
|
||||
return 0;
|
||||
}
|
120
gcc/testsuite/gcc.dg/vect/vect-simd-9.c
Normal file
120
gcc/testsuite/gcc.dg/vect/vect-simd-9.c
Normal file
|
@ -0,0 +1,120 @@
|
|||
/* { dg-require-effective-target size32plus } */
|
||||
/* { dg-additional-options "-fopenmp-simd" } */
|
||||
/* { dg-additional-options "-mavx" { target avx_runtime } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */
|
||||
|
||||
#include "tree-vect.h"
|
||||
|
||||
int r, a[1024], b[1024];
|
||||
|
||||
#pragma omp declare reduction (foo: int: omp_out += omp_in) initializer (omp_priv = 0)
|
||||
|
||||
__attribute__((noipa)) void
|
||||
foo (int *a, int *b)
|
||||
{
|
||||
#pragma omp simd reduction (inscan, foo:r)
|
||||
for (int i = 0; i < 1024; i++)
|
||||
{
|
||||
r += a[i];
|
||||
#pragma omp scan inclusive(r)
|
||||
b[i] = r;
|
||||
}
|
||||
}
|
||||
|
||||
__attribute__((noipa)) int
|
||||
bar (void)
|
||||
{
|
||||
int s = 0;
|
||||
#pragma omp simd reduction (inscan, foo:s)
|
||||
for (int i = 0; i < 1024; i++)
|
||||
{
|
||||
s += 2 * a[i];
|
||||
#pragma omp scan inclusive(s)
|
||||
b[i] = s;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
__attribute__((noipa)) void
|
||||
baz (int *a, int *b)
|
||||
{
|
||||
#pragma omp simd reduction (inscan, foo:r) if (simd: 0)
|
||||
for (int i = 0; i < 1024; i++)
|
||||
{
|
||||
r += a[i];
|
||||
#pragma omp scan inclusive(r)
|
||||
b[i] = r;
|
||||
}
|
||||
}
|
||||
|
||||
__attribute__((noipa)) int
|
||||
qux (void)
|
||||
{
|
||||
int s = 0;
|
||||
#pragma omp simd reduction (inscan, foo:s) simdlen (1)
|
||||
for (int i = 0; i < 1024; i++)
|
||||
{
|
||||
s += 2 * a[i];
|
||||
#pragma omp scan inclusive(s)
|
||||
b[i] = s;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
int s = 0;
|
||||
check_vect ();
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
a[i] = i;
|
||||
b[i] = -1;
|
||||
asm ("" : "+g" (i));
|
||||
}
|
||||
foo (a, b);
|
||||
if (r != 1024 * 1023 / 2)
|
||||
abort ();
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
s += i;
|
||||
if (b[i] != s)
|
||||
abort ();
|
||||
else
|
||||
b[i] = 25;
|
||||
}
|
||||
if (bar () != 1024 * 1023)
|
||||
abort ();
|
||||
s = 0;
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
s += 2 * i;
|
||||
if (b[i] != s)
|
||||
abort ();
|
||||
else
|
||||
b[i] = -1;
|
||||
}
|
||||
r = 0;
|
||||
baz (a, b);
|
||||
if (r != 1024 * 1023 / 2)
|
||||
abort ();
|
||||
s = 0;
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
s += i;
|
||||
if (b[i] != s)
|
||||
abort ();
|
||||
else
|
||||
b[i] = -25;
|
||||
}
|
||||
if (qux () != 1024 * 1023)
|
||||
abort ();
|
||||
s = 0;
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
s += 2 * i;
|
||||
if (b[i] != s)
|
||||
abort ();
|
||||
}
|
||||
return 0;
|
||||
}
|
|
@ -1339,14 +1339,14 @@ eliminate_unnecessary_stmts (void)
|
|||
update_stmt (stmt);
|
||||
release_ssa_name (name);
|
||||
|
||||
/* GOMP_SIMD_LANE (unless two argument) or ASAN_POISON
|
||||
/* GOMP_SIMD_LANE (unless three argument) or ASAN_POISON
|
||||
without lhs is not needed. */
|
||||
if (gimple_call_internal_p (stmt))
|
||||
switch (gimple_call_internal_fn (stmt))
|
||||
{
|
||||
case IFN_GOMP_SIMD_LANE:
|
||||
if (gimple_call_num_args (stmt) >= 2
|
||||
&& !integer_nonzerop (gimple_call_arg (stmt, 1)))
|
||||
if (gimple_call_num_args (stmt) >= 3
|
||||
&& !integer_nonzerop (gimple_call_arg (stmt, 2)))
|
||||
break;
|
||||
/* FALLTHRU */
|
||||
case IFN_ASAN_POISON:
|
||||
|
|
|
@ -3003,6 +3003,13 @@ vect_analyze_data_ref_accesses (vec_info *vinfo)
|
|||
|| TREE_CODE (DR_INIT (drb)) != INTEGER_CST)
|
||||
break;
|
||||
|
||||
/* Different .GOMP_SIMD_LANE calls still give the same lane,
|
||||
just hold extra information. */
|
||||
if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmtinfo_a)
|
||||
&& STMT_VINFO_SIMD_LANE_ACCESS_P (stmtinfo_b)
|
||||
&& data_ref_compare_tree (DR_INIT (dra), DR_INIT (drb)) == 0)
|
||||
break;
|
||||
|
||||
/* Sorting has ensured that DR_INIT (dra) <= DR_INIT (drb). */
|
||||
HOST_WIDE_INT init_a = TREE_INT_CST_LOW (DR_INIT (dra));
|
||||
HOST_WIDE_INT init_b = TREE_INT_CST_LOW (DR_INIT (drb));
|
||||
|
@ -4101,7 +4108,8 @@ vect_find_stmt_data_reference (loop_p loop, gimple *stmt,
|
|||
DR_STEP_ALIGNMENT (newdr)
|
||||
= highest_pow2_factor (step);
|
||||
/* Mark as simd-lane access. */
|
||||
newdr->aux = (void *)-1;
|
||||
tree arg2 = gimple_call_arg (def, 1);
|
||||
newdr->aux = (void *) (-1 - tree_to_uhwi (arg2));
|
||||
free_data_ref (dr);
|
||||
datarefs->safe_push (newdr);
|
||||
return opt_result::success ();
|
||||
|
@ -4210,14 +4218,17 @@ vect_analyze_data_refs (vec_info *vinfo, poly_uint64 *min_vf)
|
|||
}
|
||||
|
||||
/* See if this was detected as SIMD lane access. */
|
||||
if (dr->aux == (void *)-1)
|
||||
if (dr->aux == (void *)-1
|
||||
|| dr->aux == (void *)-2
|
||||
|| dr->aux == (void *)-3)
|
||||
{
|
||||
if (nested_in_vect_loop_p (loop, stmt_info))
|
||||
return opt_result::failure_at (stmt_info->stmt,
|
||||
"not vectorized:"
|
||||
" data ref analysis failed: %G",
|
||||
stmt_info->stmt);
|
||||
STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) = true;
|
||||
STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)
|
||||
= -(uintptr_t) dr->aux;
|
||||
}
|
||||
|
||||
tree base = get_base_address (DR_REF (dr));
|
||||
|
|
|
@ -824,6 +824,7 @@ _loop_vec_info::_loop_vec_info (struct loop *loop_in, vec_info_shared *shared)
|
|||
peeling_for_alignment (0),
|
||||
ptr_mask (0),
|
||||
ivexpr_map (NULL),
|
||||
scan_map (NULL),
|
||||
slp_unrolling_factor (1),
|
||||
single_scalar_iteration_cost (0),
|
||||
vectorizable (false),
|
||||
|
@ -863,8 +864,8 @@ _loop_vec_info::_loop_vec_info (struct loop *loop_in, vec_info_shared *shared)
|
|||
gimple *stmt = gsi_stmt (si);
|
||||
gimple_set_uid (stmt, 0);
|
||||
add_stmt (stmt);
|
||||
/* If .GOMP_SIMD_LANE call for the current loop has 2 arguments, the
|
||||
second argument is the #pragma omp simd if (x) condition, when 0,
|
||||
/* If .GOMP_SIMD_LANE call for the current loop has 3 arguments, the
|
||||
third argument is the #pragma omp simd if (x) condition, when 0,
|
||||
loop shouldn't be vectorized, when non-zero constant, it should
|
||||
be vectorized normally, otherwise versioned with vectorized loop
|
||||
done if the condition is non-zero at runtime. */
|
||||
|
@ -872,12 +873,12 @@ _loop_vec_info::_loop_vec_info (struct loop *loop_in, vec_info_shared *shared)
|
|||
&& is_gimple_call (stmt)
|
||||
&& gimple_call_internal_p (stmt)
|
||||
&& gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
|
||||
&& gimple_call_num_args (stmt) >= 2
|
||||
&& gimple_call_num_args (stmt) >= 3
|
||||
&& TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
|
||||
&& (loop_in->simduid
|
||||
== SSA_NAME_VAR (gimple_call_arg (stmt, 0))))
|
||||
{
|
||||
tree arg = gimple_call_arg (stmt, 1);
|
||||
tree arg = gimple_call_arg (stmt, 2);
|
||||
if (integer_zerop (arg) || TREE_CODE (arg) == SSA_NAME)
|
||||
simd_if_cond = arg;
|
||||
else
|
||||
|
@ -959,6 +960,7 @@ _loop_vec_info::~_loop_vec_info ()
|
|||
|
||||
release_vec_loop_masks (&masks);
|
||||
delete ivexpr_map;
|
||||
delete scan_map;
|
||||
|
||||
loop->aux = NULL;
|
||||
}
|
||||
|
|
|
@ -54,6 +54,7 @@ along with GCC; see the file COPYING3. If not see
|
|||
#include "tree-ssa-loop-niter.h"
|
||||
#include "gimple-fold.h"
|
||||
#include "regs.h"
|
||||
#include "attribs.h"
|
||||
|
||||
/* For lang_hooks.types.type_for_mode. */
|
||||
#include "langhooks.h"
|
||||
|
@ -3257,7 +3258,7 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|||
if (nargs == 0 || nargs > 4)
|
||||
return false;
|
||||
|
||||
/* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
|
||||
/* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
|
||||
combined_fn cfn = gimple_call_combined_fn (stmt);
|
||||
if (cfn == CFN_GOMP_SIMD_LANE)
|
||||
{
|
||||
|
@ -6320,6 +6321,489 @@ get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
|
|||
}
|
||||
|
||||
|
||||
/* Function scan_operand_equal_p.
|
||||
|
||||
Helper function for check_scan_store. Compare two references
|
||||
with .GOMP_SIMD_LANE bases. */
|
||||
|
||||
static bool
|
||||
scan_operand_equal_p (tree ref1, tree ref2)
|
||||
{
|
||||
machine_mode mode1, mode2;
|
||||
poly_int64 bitsize1, bitsize2, bitpos1, bitpos2;
|
||||
tree offset1, offset2;
|
||||
int unsignedp1, unsignedp2, reversep1, reversep2;
|
||||
int volatilep1 = 0, volatilep2 = 0;
|
||||
tree base1 = get_inner_reference (ref1, &bitsize1, &bitpos1, &offset1,
|
||||
&mode1, &unsignedp1, &reversep1,
|
||||
&volatilep1);
|
||||
tree base2 = get_inner_reference (ref2, &bitsize2, &bitpos2, &offset2,
|
||||
&mode2, &unsignedp2, &reversep2,
|
||||
&volatilep2);
|
||||
if (reversep1 || reversep2 || volatilep1 || volatilep2)
|
||||
return false;
|
||||
if (!operand_equal_p (base1, base2, 0))
|
||||
return false;
|
||||
if (maybe_ne (bitpos1, 0) || maybe_ne (bitpos2, 0))
|
||||
return false;
|
||||
if (maybe_ne (bitsize1, bitsize2))
|
||||
return false;
|
||||
if (!operand_equal_p (offset1, offset2, 0))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/* Function check_scan_store.
|
||||
|
||||
Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
|
||||
|
||||
static bool
|
||||
check_scan_store (stmt_vec_info stmt_info, tree vectype,
|
||||
enum vect_def_type rhs_dt, bool slp, tree mask,
|
||||
vect_memory_access_type memory_access_type)
|
||||
{
|
||||
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
|
||||
dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
|
||||
tree ref_type;
|
||||
|
||||
gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
|
||||
if (slp
|
||||
|| mask
|
||||
|| memory_access_type != VMAT_CONTIGUOUS
|
||||
|| TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
|
||||
|| !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
|
||||
|| loop_vinfo == NULL
|
||||
|| LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
|
||||
|| STMT_VINFO_GROUPED_ACCESS (stmt_info)
|
||||
|| !integer_zerop (DR_OFFSET (dr_info->dr))
|
||||
|| !integer_zerop (DR_INIT (dr_info->dr))
|
||||
|| !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
|
||||
|| !alias_sets_conflict_p (get_alias_set (vectype),
|
||||
get_alias_set (TREE_TYPE (ref_type))))
|
||||
{
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||
"unsupported OpenMP scan store.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
/* We need to pattern match code built by OpenMP lowering and simplified
|
||||
by following optimizations into something we can handle.
|
||||
#pragma omp simd reduction(inscan,+:r)
|
||||
for (...)
|
||||
{
|
||||
r += something ();
|
||||
#pragma omp scan inclusive (r)
|
||||
use (r);
|
||||
}
|
||||
shall have body with:
|
||||
// Initialization for input phase, store the reduction initializer:
|
||||
_20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
|
||||
_21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
|
||||
D.2042[_21] = 0;
|
||||
// Actual input phase:
|
||||
...
|
||||
r.0_5 = D.2042[_20];
|
||||
_6 = _4 + r.0_5;
|
||||
D.2042[_20] = _6;
|
||||
// Initialization for scan phase:
|
||||
_25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
|
||||
_26 = D.2043[_25];
|
||||
_27 = D.2042[_25];
|
||||
_28 = _26 + _27;
|
||||
D.2043[_25] = _28;
|
||||
D.2042[_25] = _28;
|
||||
// Actual scan phase:
|
||||
...
|
||||
r.1_8 = D.2042[_20];
|
||||
...
|
||||
The "omp simd array" variable D.2042 holds the privatized copy used
|
||||
inside of the loop and D.2043 is another one that holds copies of
|
||||
the current original list item. The separate GOMP_SIMD_LANE ifn
|
||||
kinds are there in order to allow optimizing the initializer store
|
||||
and combiner sequence, e.g. if it is originally some C++ish user
|
||||
defined reduction, but allow the vectorizer to pattern recognize it
|
||||
and turn into the appropriate vectorized scan. */
|
||||
|
||||
if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
|
||||
{
|
||||
/* Match the D.2042[_21] = 0; store above. Just require that
|
||||
it is a constant or external definition store. */
|
||||
if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
|
||||
{
|
||||
fail_init:
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||
"unsupported OpenMP scan initializer store.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (! loop_vinfo->scan_map)
|
||||
loop_vinfo->scan_map = new hash_map<tree, tree>;
|
||||
tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
|
||||
tree &cached = loop_vinfo->scan_map->get_or_insert (var);
|
||||
if (cached)
|
||||
goto fail_init;
|
||||
cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
|
||||
|
||||
/* These stores can be vectorized normally. */
|
||||
return true;
|
||||
}
|
||||
|
||||
if (rhs_dt != vect_internal_def)
|
||||
{
|
||||
fail:
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||
"unsupported OpenMP scan combiner pattern.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
gimple *stmt = STMT_VINFO_STMT (stmt_info);
|
||||
tree rhs = gimple_assign_rhs1 (stmt);
|
||||
if (TREE_CODE (rhs) != SSA_NAME)
|
||||
goto fail;
|
||||
|
||||
use_operand_p use_p;
|
||||
imm_use_iterator iter;
|
||||
gimple *other_store_stmt = NULL;
|
||||
FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
|
||||
{
|
||||
gimple *use_stmt = USE_STMT (use_p);
|
||||
if (use_stmt == stmt || is_gimple_debug (use_stmt))
|
||||
continue;
|
||||
if (gimple_bb (use_stmt) != gimple_bb (stmt)
|
||||
|| !gimple_store_p (use_stmt)
|
||||
|| other_store_stmt)
|
||||
goto fail;
|
||||
other_store_stmt = use_stmt;
|
||||
}
|
||||
if (other_store_stmt == NULL)
|
||||
goto fail;
|
||||
stmt_vec_info other_store_stmt_info
|
||||
= loop_vinfo->lookup_stmt (other_store_stmt);
|
||||
if (other_store_stmt_info == NULL
|
||||
|| STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info) != 3)
|
||||
goto fail;
|
||||
|
||||
gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
|
||||
if (gimple_bb (def_stmt) != gimple_bb (stmt)
|
||||
|| !is_gimple_assign (def_stmt)
|
||||
|| gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
|
||||
goto fail;
|
||||
|
||||
enum tree_code code = gimple_assign_rhs_code (def_stmt);
|
||||
/* For pointer addition, we should use the normal plus for the vector
|
||||
operation. */
|
||||
switch (code)
|
||||
{
|
||||
case POINTER_PLUS_EXPR:
|
||||
code = PLUS_EXPR;
|
||||
break;
|
||||
case MULT_HIGHPART_EXPR:
|
||||
goto fail;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
|
||||
goto fail;
|
||||
|
||||
tree rhs1 = gimple_assign_rhs1 (def_stmt);
|
||||
tree rhs2 = gimple_assign_rhs2 (def_stmt);
|
||||
if (TREE_CODE (rhs1) != SSA_NAME
|
||||
|| TREE_CODE (rhs2) != SSA_NAME)
|
||||
goto fail;
|
||||
|
||||
gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
|
||||
gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
|
||||
if (gimple_bb (load1_stmt) != gimple_bb (stmt)
|
||||
|| !gimple_assign_load_p (load1_stmt)
|
||||
|| gimple_bb (load2_stmt) != gimple_bb (stmt)
|
||||
|| !gimple_assign_load_p (load2_stmt))
|
||||
goto fail;
|
||||
|
||||
stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
|
||||
stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
|
||||
if (load1_stmt_info == NULL
|
||||
|| load2_stmt_info == NULL
|
||||
|| STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info) != 3
|
||||
|| STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info) != 3)
|
||||
goto fail;
|
||||
|
||||
if (scan_operand_equal_p (gimple_assign_lhs (stmt),
|
||||
gimple_assign_rhs1 (load2_stmt)))
|
||||
{
|
||||
std::swap (rhs1, rhs2);
|
||||
std::swap (load1_stmt, load2_stmt);
|
||||
std::swap (load1_stmt_info, load2_stmt_info);
|
||||
}
|
||||
if (!scan_operand_equal_p (gimple_assign_lhs (stmt),
|
||||
gimple_assign_rhs1 (load1_stmt))
|
||||
|| !scan_operand_equal_p (gimple_assign_lhs (other_store_stmt),
|
||||
gimple_assign_rhs1 (load2_stmt)))
|
||||
goto fail;
|
||||
|
||||
dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
|
||||
if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
|
||||
|| !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
|
||||
goto fail;
|
||||
|
||||
tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
|
||||
tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
|
||||
if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
|
||||
|| !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
|
||||
|| (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
|
||||
== (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
|
||||
goto fail;
|
||||
|
||||
if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
|
||||
std::swap (var1, var2);
|
||||
|
||||
if (loop_vinfo->scan_map == NULL)
|
||||
goto fail;
|
||||
tree *init = loop_vinfo->scan_map->get (var1);
|
||||
if (init == NULL)
|
||||
goto fail;
|
||||
|
||||
/* The IL is as expected, now check if we can actually vectorize it.
|
||||
_26 = D.2043[_25];
|
||||
_27 = D.2042[_25];
|
||||
_28 = _26 + _27;
|
||||
D.2043[_25] = _28;
|
||||
D.2042[_25] = _28;
|
||||
should be vectorized as (where _40 is the vectorized rhs
|
||||
from the D.2042[_21] = 0; store):
|
||||
_30 = MEM <vector(8) int> [(int *)&D.2043];
|
||||
_31 = MEM <vector(8) int> [(int *)&D.2042];
|
||||
_32 = VEC_PERM_EXPR <_31, _40, { 8, 0, 1, 2, 3, 4, 5, 6 }>;
|
||||
_33 = _31 + _32;
|
||||
// _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
|
||||
_34 = VEC_PERM_EXPR <_33, _40, { 8, 9, 0, 1, 2, 3, 4, 5 }>;
|
||||
_35 = _33 + _34;
|
||||
// _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
|
||||
// _31[1]+.._31[4], ... _31[4]+.._31[7] };
|
||||
_36 = VEC_PERM_EXPR <_35, _40, { 8, 9, 10, 11, 0, 1, 2, 3 }>;
|
||||
_37 = _35 + _36;
|
||||
// _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
|
||||
// _31[0]+.._31[4], ... _31[0]+.._31[7] };
|
||||
_38 = _30 + _37;
|
||||
_39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
|
||||
MEM <vector(8) int> [(int *)&D.2043] = _39;
|
||||
MEM <vector(8) int> [(int *)&D.2042] = _38; */
|
||||
enum machine_mode vec_mode = TYPE_MODE (vectype);
|
||||
optab optab = optab_for_tree_code (code, vectype, optab_default);
|
||||
if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
|
||||
goto fail;
|
||||
|
||||
unsigned HOST_WIDE_INT nunits;
|
||||
if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
|
||||
goto fail;
|
||||
int units_log2 = exact_log2 (nunits);
|
||||
if (units_log2 <= 0)
|
||||
goto fail;
|
||||
|
||||
for (int i = 0; i <= units_log2; ++i)
|
||||
{
|
||||
unsigned HOST_WIDE_INT j, k;
|
||||
vec_perm_builder sel (nunits, nunits, 1);
|
||||
sel.quick_grow (nunits);
|
||||
if (i == units_log2)
|
||||
{
|
||||
for (j = 0; j < nunits; ++j)
|
||||
sel[j] = nunits - 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
|
||||
sel[j] = nunits + j;
|
||||
for (k = 0; j < nunits; ++j, ++k)
|
||||
sel[j] = k;
|
||||
}
|
||||
vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
|
||||
if (!can_vec_perm_const_p (vec_mode, indices))
|
||||
goto fail;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/* Function vectorizable_scan_store.
|
||||
|
||||
Helper of vectorizable_score, arguments like on vectorizable_store.
|
||||
Handle only the transformation, checking is done in check_scan_store. */
|
||||
|
||||
static bool
|
||||
vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
||||
stmt_vec_info *vec_stmt, int ncopies)
|
||||
{
|
||||
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
|
||||
dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
|
||||
tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
|
||||
vec_info *vinfo = stmt_info->vinfo;
|
||||
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
|
||||
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_NOTE, vect_location,
|
||||
"transform scan store. ncopies = %d\n", ncopies);
|
||||
|
||||
gimple *stmt = STMT_VINFO_STMT (stmt_info);
|
||||
tree rhs = gimple_assign_rhs1 (stmt);
|
||||
gcc_assert (TREE_CODE (rhs) == SSA_NAME);
|
||||
|
||||
gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
|
||||
enum tree_code code = gimple_assign_rhs_code (def_stmt);
|
||||
if (code == POINTER_PLUS_EXPR)
|
||||
code = PLUS_EXPR;
|
||||
gcc_assert (TREE_CODE_LENGTH (code) == binary_op
|
||||
&& commutative_tree_code (code));
|
||||
tree rhs1 = gimple_assign_rhs1 (def_stmt);
|
||||
tree rhs2 = gimple_assign_rhs2 (def_stmt);
|
||||
gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
|
||||
gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
|
||||
gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
|
||||
stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
|
||||
stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
|
||||
dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
|
||||
dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
|
||||
tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
|
||||
tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
|
||||
|
||||
if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
|
||||
{
|
||||
std::swap (rhs1, rhs2);
|
||||
std::swap (var1, var2);
|
||||
}
|
||||
|
||||
tree *init = loop_vinfo->scan_map->get (var1);
|
||||
gcc_assert (init);
|
||||
|
||||
tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
|
||||
bool inscan_var_store
|
||||
= lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
|
||||
|
||||
unsigned HOST_WIDE_INT nunits;
|
||||
if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
|
||||
gcc_unreachable ();
|
||||
int units_log2 = exact_log2 (nunits);
|
||||
gcc_assert (units_log2 > 0);
|
||||
auto_vec<tree, 16> perms;
|
||||
perms.quick_grow (units_log2 + 1);
|
||||
for (int i = 0; i <= units_log2; ++i)
|
||||
{
|
||||
unsigned HOST_WIDE_INT j, k;
|
||||
vec_perm_builder sel (nunits, nunits, 1);
|
||||
sel.quick_grow (nunits);
|
||||
if (i == units_log2)
|
||||
{
|
||||
for (j = 0; j < nunits; ++j)
|
||||
sel[j] = nunits - 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
|
||||
sel[j] = nunits + j;
|
||||
for (k = 0; j < nunits; ++j, ++k)
|
||||
sel[j] = k;
|
||||
}
|
||||
vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
|
||||
perms[i] = vect_gen_perm_mask_checked (vectype, indices);
|
||||
}
|
||||
|
||||
stmt_vec_info prev_stmt_info = NULL;
|
||||
tree vec_oprnd1 = NULL_TREE;
|
||||
tree vec_oprnd2 = NULL_TREE;
|
||||
tree vec_oprnd3 = NULL_TREE;
|
||||
tree dataref_ptr = unshare_expr (DR_BASE_ADDRESS (dr_info->dr));
|
||||
tree dataref_offset = build_int_cst (ref_type, 0);
|
||||
tree bump = vect_get_data_ptr_increment (dr_info, vectype, VMAT_CONTIGUOUS);
|
||||
tree orig = NULL_TREE;
|
||||
for (int j = 0; j < ncopies; j++)
|
||||
{
|
||||
stmt_vec_info new_stmt_info;
|
||||
if (j == 0)
|
||||
{
|
||||
vec_oprnd1 = vect_get_vec_def_for_operand (*init, stmt_info);
|
||||
vec_oprnd2 = vect_get_vec_def_for_operand (rhs1, stmt_info);
|
||||
vec_oprnd3 = vect_get_vec_def_for_operand (rhs2, stmt_info);
|
||||
orig = vec_oprnd3;
|
||||
}
|
||||
else
|
||||
{
|
||||
vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
|
||||
vec_oprnd2 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd2);
|
||||
vec_oprnd3 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd3);
|
||||
if (!inscan_var_store)
|
||||
dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
|
||||
}
|
||||
|
||||
tree v = vec_oprnd2;
|
||||
for (int i = 0; i < units_log2; ++i)
|
||||
{
|
||||
tree new_temp = make_ssa_name (vectype);
|
||||
gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR, v,
|
||||
vec_oprnd1, perms[i]);
|
||||
new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
|
||||
if (prev_stmt_info == NULL)
|
||||
STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
|
||||
else
|
||||
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
|
||||
prev_stmt_info = new_stmt_info;
|
||||
|
||||
tree new_temp2 = make_ssa_name (vectype);
|
||||
g = gimple_build_assign (new_temp2, code, v, new_temp);
|
||||
new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
|
||||
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
|
||||
prev_stmt_info = new_stmt_info;
|
||||
|
||||
v = new_temp2;
|
||||
}
|
||||
|
||||
tree new_temp = make_ssa_name (vectype);
|
||||
gimple *g = gimple_build_assign (new_temp, code, orig, v);
|
||||
new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
|
||||
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
|
||||
prev_stmt_info = new_stmt_info;
|
||||
|
||||
orig = make_ssa_name (vectype);
|
||||
g = gimple_build_assign (orig, VEC_PERM_EXPR, new_temp, new_temp,
|
||||
perms[units_log2]);
|
||||
new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
|
||||
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
|
||||
prev_stmt_info = new_stmt_info;
|
||||
|
||||
if (!inscan_var_store)
|
||||
{
|
||||
tree data_ref = fold_build2 (MEM_REF, vectype, dataref_ptr,
|
||||
dataref_offset);
|
||||
vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
|
||||
g = gimple_build_assign (data_ref, new_temp);
|
||||
new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
|
||||
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
|
||||
prev_stmt_info = new_stmt_info;
|
||||
}
|
||||
}
|
||||
|
||||
if (inscan_var_store)
|
||||
for (int j = 0; j < ncopies; j++)
|
||||
{
|
||||
if (j != 0)
|
||||
dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
|
||||
|
||||
tree data_ref = fold_build2 (MEM_REF, vectype, dataref_ptr,
|
||||
dataref_offset);
|
||||
vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
|
||||
gimple *g = gimple_build_assign (data_ref, orig);
|
||||
stmt_vec_info new_stmt_info
|
||||
= vect_finish_stmt_generation (stmt_info, g, gsi);
|
||||
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
|
||||
prev_stmt_info = new_stmt_info;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/* Function vectorizable_store.
|
||||
|
||||
Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
|
||||
|
@ -6514,6 +6998,13 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|||
group_size = vec_num = 1;
|
||||
}
|
||||
|
||||
if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
|
||||
{
|
||||
if (!check_scan_store (stmt_info, vectype, rhs_dt, slp, mask,
|
||||
memory_access_type))
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!vec_stmt) /* transformation not required. */
|
||||
{
|
||||
STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
|
||||
|
@ -6737,6 +7228,8 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|||
}
|
||||
return true;
|
||||
}
|
||||
else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
|
||||
return vectorizable_scan_store (stmt_info, gsi, vec_stmt, ncopies);
|
||||
|
||||
if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
|
||||
DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
|
||||
|
@ -7162,7 +7655,7 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|||
gcc_assert (useless_type_conversion_p (vectype,
|
||||
TREE_TYPE (vec_oprnd)));
|
||||
bool simd_lane_access_p
|
||||
= STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
|
||||
= STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
|
||||
if (simd_lane_access_p
|
||||
&& !loop_masks
|
||||
&& TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
|
||||
|
@ -8347,7 +8840,7 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|||
if (j == 0)
|
||||
{
|
||||
bool simd_lane_access_p
|
||||
= STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
|
||||
= STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
|
||||
if (simd_lane_access_p
|
||||
&& TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
|
||||
&& VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
|
||||
|
|
|
@ -491,6 +491,10 @@ typedef struct _loop_vec_info : public vec_info {
|
|||
/* Map of IV base/step expressions to inserted name in the preheader. */
|
||||
hash_map<tree_operand_hash, tree> *ivexpr_map;
|
||||
|
||||
/* Map of OpenMP "omp simd array" scan variables to corresponding
|
||||
rhs of the store of the initializer. */
|
||||
hash_map<tree, tree> *scan_map;
|
||||
|
||||
/* The unrolling factor needed to SLP the loop. In case of that pure SLP is
|
||||
applied to the loop, i.e., no unrolling is needed, this is 1. */
|
||||
poly_uint64 slp_unrolling_factor;
|
||||
|
@ -913,7 +917,7 @@ struct _stmt_vec_info {
|
|||
bool strided_p;
|
||||
|
||||
/* For both loads and stores. */
|
||||
bool simd_lane_access_p;
|
||||
unsigned simd_lane_access_p : 2;
|
||||
|
||||
/* Classifies how the load or store is going to be implemented
|
||||
for loop vectorization. */
|
||||
|
|
Loading…
Add table
Reference in a new issue