omp-low.c (struct omp_context): Add scan_inclusive field.

* omp-low.c (struct omp_context): Add scan_inclusive field.
	(scan_omp_1_stmt) <case GIMPLE_OMP_SCAN>: Set ctx->scan_inclusive
	if inclusive scan.
	(struct omplow_simd_context): Add lastlane member.
	(lower_rec_simd_input_clauses): Add rvar argument, handle inscan
	reductions.  Build 2 or 3 argument .GOMP_SIMD_LANE calls rather than
	1 or 2 argument.
	(lower_rec_input_clauses): Handle inscan reductions in simd contexts.
	(lower_lastprivate_clauses): Set TREE_THIS_NOTRAP on the ARRAY_REF.
	(lower_omp_scan): New function.
	(lower_omp_1) <case GIMPLE_OMP_SCAN>: Use lower_omp_scan.
	* tree-ssa-dce.c (eliminate_unnecessary_stmts): For IFN_GOMP_SIMD_LANE
	check 3rd argument if present rather than 2nd.
	* tree-vectorizer.h (struct _loop_vec_info): Add scan_map member.
	(struct _stmt_vec_info): Change simd_lane_access_p from bool into
	2-bit bitfield.
	* tree-vect-loop.c (_loop_vec_info::_loop_vec_info): Initialize
	scan_map.  For IFN_GOMP_SIMD_LANE check 3rd argument if present rather
	than 2nd.
	(_loop_vec_info::~_loop_vec_info): Delete scan_map.
	* tree-vect-data-refs.c (vect_analyze_data_ref_accesses): Allow two
	different STMT_VINFO_SIMD_LANE_ACCESS_P refs if they have the same
	init.
	(vect_find_stmt_data_reference): Encode in ->aux the 2nd
	IFN_GOMP_SIMD_LANE argument.
	(vect_analyze_data_refs): Set STMT_VINFO_SIMD_LANE_ACCESS_P from the
	encoded ->aux value.
	* tree-vect-stmts.c: Include attribs.h.
	(vectorizable_call): Adjust comment about IFN_GOMP_SIMD_LANE.
	(scan_operand_equal_p, check_scan_store, vectorizable_scan_store): New
	functions.
	(vectorizable_load): For STMT_VINFO_SIMD_LANE_ACCESS_P tests use != 0.
	(vectorizable_store): Handle STMT_VINFO_SIMD_LANE_ACCESS_P > 1.
cp/
	* semantics.c (finish_omp_clauses): For OMP_CLAUSE_REDUCTION_INSCAN
	set need_copy_assignment.
testsuite/
	* gcc.dg/vect/vect-simd-8.c: New test.
	* gcc.dg/vect/vect-simd-9.c: New test.
	* g++.dg/vect/simd-2.cc: New test.
	* g++.dg/gomp/scan-1.C: New test.

From-SVN: r272399
This commit is contained in:
Jakub Jelinek 2019-06-17 23:20:00 +02:00 committed by Jakub Jelinek
parent a064fd4c73
commit 0356aab806
14 changed files with 1348 additions and 34 deletions

View file

@ -1,3 +1,39 @@
2019-06-17 Jakub Jelinek <jakub@redhat.com>
* omp-low.c (struct omp_context): Add scan_inclusive field.
(scan_omp_1_stmt) <case GIMPLE_OMP_SCAN>: Set ctx->scan_inclusive
if inclusive scan.
(struct omplow_simd_context): Add lastlane member.
(lower_rec_simd_input_clauses): Add rvar argument, handle inscan
reductions. Build 2 or 3 argument .GOMP_SIMD_LANE calls rather than
1 or 2 argument.
(lower_rec_input_clauses): Handle inscan reductions in simd contexts.
(lower_lastprivate_clauses): Set TREE_THIS_NOTRAP on the ARRAY_REF.
(lower_omp_scan): New function.
(lower_omp_1) <case GIMPLE_OMP_SCAN>: Use lower_omp_scan.
* tree-ssa-dce.c (eliminate_unnecessary_stmts): For IFN_GOMP_SIMD_LANE
check 3rd argument if present rather than 2nd.
* tree-vectorizer.h (struct _loop_vec_info): Add scan_map member.
(struct _stmt_vec_info): Change simd_lane_access_p from bool into
2-bit bitfield.
* tree-vect-loop.c (_loop_vec_info::_loop_vec_info): Initialize
scan_map. For IFN_GOMP_SIMD_LANE check 3rd argument if present rather
than 2nd.
(_loop_vec_info::~_loop_vec_info): Delete scan_map.
* tree-vect-data-refs.c (vect_analyze_data_ref_accesses): Allow two
different STMT_VINFO_SIMD_LANE_ACCESS_P refs if they have the same
init.
(vect_find_stmt_data_reference): Encode in ->aux the 2nd
IFN_GOMP_SIMD_LANE argument.
(vect_analyze_data_refs): Set STMT_VINFO_SIMD_LANE_ACCESS_P from the
encoded ->aux value.
* tree-vect-stmts.c: Include attribs.h.
(vectorizable_call): Adjust comment about IFN_GOMP_SIMD_LANE.
(scan_operand_equal_p, check_scan_store, vectorizable_scan_store): New
functions.
(vectorizable_load): For STMT_VINFO_SIMD_LANE_ACCESS_P tests use != 0.
(vectorizable_store): Handle STMT_VINFO_SIMD_LANE_ACCESS_P > 1.
2019-06-17 Uroš Bizjak <ubizjak@gmail.com>
PR target/62055

View file

@ -1,3 +1,8 @@
2019-06-17 Jakub Jelinek <jakub@redhat.com>
* semantics.c (finish_omp_clauses): For OMP_CLAUSE_REDUCTION_INSCAN
set need_copy_assignment.
2019-06-17 Marek Polacek <polacek@redhat.com>
PR c++/83820 - excessive attribute arguments not detected.

View file

@ -7688,6 +7688,8 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
case OMP_CLAUSE_REDUCTION:
if (reduction_seen == -2)
OMP_CLAUSE_REDUCTION_INSCAN (c) = 0;
if (OMP_CLAUSE_REDUCTION_INSCAN (c))
need_copy_assignment = true;
need_implicitly_determined = true;
break;
case OMP_CLAUSE_IN_REDUCTION:

View file

@ -141,6 +141,9 @@ struct omp_context
/* True if lower_omp_1 should look up lastprivate conditional in parent
context. */
bool combined_into_simd_safelen0;
/* True if there is nested scan context with inclusive clause. */
bool scan_inclusive;
};
static splay_tree all_contexts;
@ -3329,11 +3332,15 @@ scan_omp_1_stmt (gimple_stmt_iterator *gsi, bool *handled_ops_p,
scan_omp_single (as_a <gomp_single *> (stmt), ctx);
break;
case GIMPLE_OMP_SCAN:
if (tree clauses = gimple_omp_scan_clauses (as_a <gomp_scan *> (stmt)))
if (OMP_CLAUSE_CODE (clauses) == OMP_CLAUSE_INCLUSIVE)
ctx->scan_inclusive = true;
/* FALLTHRU */
case GIMPLE_OMP_SECTION:
case GIMPLE_OMP_MASTER:
case GIMPLE_OMP_ORDERED:
case GIMPLE_OMP_CRITICAL:
case GIMPLE_OMP_SCAN:
case GIMPLE_OMP_GRID_BODY:
ctx = new_omp_context (stmt, ctx);
scan_omp (gimple_omp_body_ptr (stmt), ctx);
@ -3671,6 +3678,7 @@ struct omplow_simd_context {
omplow_simd_context () { memset (this, 0, sizeof (*this)); }
tree idx;
tree lane;
tree lastlane;
vec<tree, va_heap> simt_eargs;
gimple_seq simt_dlist;
poly_uint64_pod max_vf;
@ -3682,7 +3690,8 @@ struct omplow_simd_context {
static bool
lower_rec_simd_input_clauses (tree new_var, omp_context *ctx,
omplow_simd_context *sctx, tree &ivar, tree &lvar)
omplow_simd_context *sctx, tree &ivar,
tree &lvar, tree *rvar = NULL)
{
if (known_eq (sctx->max_vf, 0U))
{
@ -3738,7 +3747,27 @@ lower_rec_simd_input_clauses (tree new_var, omp_context *ctx,
= tree_cons (get_identifier ("omp simd array"), NULL,
DECL_ATTRIBUTES (avar));
gimple_add_tmp_var (avar);
ivar = build4 (ARRAY_REF, TREE_TYPE (new_var), avar, sctx->idx,
tree iavar = avar;
if (rvar)
{
/* For inscan reductions, create another array temporary,
which will hold the reduced value. */
iavar = create_tmp_var_raw (atype);
if (TREE_ADDRESSABLE (new_var))
TREE_ADDRESSABLE (iavar) = 1;
DECL_ATTRIBUTES (iavar)
= tree_cons (get_identifier ("omp simd array"), NULL,
tree_cons (get_identifier ("omp simd inscan"), NULL,
DECL_ATTRIBUTES (iavar)));
gimple_add_tmp_var (iavar);
ctx->cb.decl_map->put (avar, iavar);
if (sctx->lastlane == NULL_TREE)
sctx->lastlane = create_tmp_var (unsigned_type_node);
*rvar = build4 (ARRAY_REF, TREE_TYPE (new_var), iavar,
sctx->lastlane, NULL_TREE, NULL_TREE);
TREE_THIS_NOTRAP (*rvar) = 1;
}
ivar = build4 (ARRAY_REF, TREE_TYPE (new_var), iavar, sctx->idx,
NULL_TREE, NULL_TREE);
lvar = build4 (ARRAY_REF, TREE_TYPE (new_var), avar, sctx->lane,
NULL_TREE, NULL_TREE);
@ -3814,7 +3843,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
omplow_simd_context sctx = omplow_simd_context ();
tree simt_lane = NULL_TREE, simtrec = NULL_TREE;
tree ivar = NULL_TREE, lvar = NULL_TREE, uid = NULL_TREE;
gimple_seq llist[3] = { };
gimple_seq llist[4] = { };
tree nonconst_simd_if = NULL_TREE;
copyin_seq = NULL;
@ -5155,9 +5184,14 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
new_vard = TREE_OPERAND (new_var, 0);
gcc_assert (DECL_P (new_vard));
}
tree rvar = NULL_TREE, *rvarp = NULL;
if (is_simd
&& OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
&& OMP_CLAUSE_REDUCTION_INSCAN (c))
rvarp = &rvar;
if (is_simd
&& lower_rec_simd_input_clauses (new_var, ctx, &sctx,
ivar, lvar))
ivar, lvar, rvarp))
{
if (new_vard == new_var)
{
@ -5173,6 +5207,93 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
x = lang_hooks.decls.omp_clause_default_ctor
(c, unshare_expr (ivar),
build_outer_var_ref (var, ctx));
if (rvarp)
{
if (x)
{
gimplify_and_add (x, &llist[0]);
tree ivar2 = unshare_expr (lvar);
TREE_OPERAND (ivar2, 1) = sctx.idx;
x = lang_hooks.decls.omp_clause_default_ctor
(c, ivar2, build_outer_var_ref (var, ctx));
gimplify_and_add (x, &llist[0]);
/* For types that need construction, add another
private var which will be default constructed
and optionally initialized with
OMP_CLAUSE_REDUCTION_GIMPLE_INIT, as in the
loop we want to assign this value instead of
constructing and destructing it in each
iteration. */
tree nv = create_tmp_var_raw (TREE_TYPE (ivar));
gimple_add_tmp_var (nv);
ctx->cb.decl_map->put (TREE_OPERAND (ivar, 0),
nv);
x = lang_hooks.decls.omp_clause_default_ctor
(c, nv, build_outer_var_ref (var, ctx));
gimplify_and_add (x, ilist);
if (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c))
{
tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c);
x = DECL_VALUE_EXPR (new_var);
SET_DECL_VALUE_EXPR (new_var, nv);
lower_omp (&tseq, ctx);
SET_DECL_VALUE_EXPR (new_var, x);
gimple_seq_add_seq (ilist, tseq);
OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL;
}
x = lang_hooks.decls.omp_clause_dtor (c, nv);
if (x)
{
tseq = NULL;
dtor = x;
gimplify_stmt (&dtor, &tseq);
gimple_seq_add_seq (dlist, tseq);
}
}
tree ref = build_outer_var_ref (var, ctx);
x = unshare_expr (ivar);
x = lang_hooks.decls.omp_clause_assign_op (c, x,
ref);
gimplify_and_add (x, &llist[0]);
ref = build_outer_var_ref (var, ctx);
x = lang_hooks.decls.omp_clause_assign_op (c, ref,
rvar);
gimplify_and_add (x, &llist[3]);
DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
if (new_vard == new_var)
SET_DECL_VALUE_EXPR (new_var, lvar);
else
SET_DECL_VALUE_EXPR (new_vard,
build_fold_addr_expr (lvar));
x = lang_hooks.decls.omp_clause_dtor (c, ivar);
if (x)
{
tseq = NULL;
dtor = x;
gimplify_stmt (&dtor, &tseq);
gimple_seq_add_seq (&llist[1], tseq);
}
tree ivar2 = unshare_expr (lvar);
TREE_OPERAND (ivar2, 1) = sctx.idx;
x = lang_hooks.decls.omp_clause_dtor (c, ivar2);
if (x)
{
tseq = NULL;
dtor = x;
gimplify_stmt (&dtor, &tseq);
gimple_seq_add_seq (&llist[1], tseq);
}
break;
}
if (x)
gimplify_and_add (x, &llist[0]);
if (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c))
@ -5240,6 +5361,41 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
: build_outer_var_ref (var, ctx));
if (x)
gimplify_and_add (x, ilist);
if (rvarp)
{
if (x)
{
tree nv = create_tmp_var_raw (TREE_TYPE (new_vard));
gimple_add_tmp_var (nv);
ctx->cb.decl_map->put (new_var, nv);
x = lang_hooks.decls.omp_clause_default_ctor
(c, nv, build_outer_var_ref (var, ctx));
gimplify_and_add (x, ilist);
if (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c))
{
tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c);
SET_DECL_VALUE_EXPR (new_var, nv);
DECL_HAS_VALUE_EXPR_P (new_var) = 1;
lower_omp (&tseq, ctx);
SET_DECL_VALUE_EXPR (new_var, NULL_TREE);
DECL_HAS_VALUE_EXPR_P (new_var) = 0;
gimple_seq_add_seq (ilist, tseq);
}
OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL;
x = lang_hooks.decls.omp_clause_dtor (c, nv);
if (x)
{
tseq = NULL;
dtor = x;
gimplify_stmt (&dtor, &tseq);
gimple_seq_add_seq (dlist, tseq);
}
}
DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
goto do_dtor;
}
if (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c))
{
tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c);
@ -5324,12 +5480,32 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
new_vard = TREE_OPERAND (new_var, 0);
gcc_assert (DECL_P (new_vard));
}
tree rvar = NULL_TREE, *rvarp = NULL;
if (is_simd
&& OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
&& OMP_CLAUSE_REDUCTION_INSCAN (c))
rvarp = &rvar;
if (is_simd
&& lower_rec_simd_input_clauses (new_var, ctx, &sctx,
ivar, lvar))
ivar, lvar, rvarp))
{
if (new_vard != new_var)
{
SET_DECL_VALUE_EXPR (new_vard,
build_fold_addr_expr (lvar));
DECL_HAS_VALUE_EXPR_P (new_vard) = 1;
}
tree ref = build_outer_var_ref (var, ctx);
if (rvarp)
{
gimplify_assign (ivar, ref, &llist[0]);
ref = build_outer_var_ref (var, ctx);
gimplify_assign (ref, rvar, &llist[3]);
break;
}
gimplify_assign (unshare_expr (ivar), x, &llist[0]);
if (sctx.is_simt)
@ -5346,14 +5522,8 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
ref = build_outer_var_ref (var, ctx);
gimplify_assign (ref, x, &llist[1]);
if (new_vard != new_var)
{
SET_DECL_VALUE_EXPR (new_vard,
build_fold_addr_expr (lvar));
DECL_HAS_VALUE_EXPR_P (new_vard) = 1;
}
}
else
else if (rvarp == NULL)
{
if (omp_is_reference (var) && is_simd)
handle_simd_reference (clause_loc, new_vard, ilist);
@ -5456,14 +5626,23 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
if (sctx.lane)
{
gimple *g = gimple_build_call_internal (IFN_GOMP_SIMD_LANE,
1 + (nonconst_simd_if != NULL),
uid, nonconst_simd_if);
2 + (nonconst_simd_if != NULL),
uid, integer_zero_node,
nonconst_simd_if);
gimple_call_set_lhs (g, sctx.lane);
gimple_stmt_iterator gsi = gsi_start_1 (gimple_omp_body_ptr (ctx->stmt));
gsi_insert_before_without_update (&gsi, g, GSI_SAME_STMT);
g = gimple_build_assign (sctx.lane, INTEGER_CST,
build_int_cst (unsigned_type_node, 0));
gimple_seq_add_stmt (ilist, g);
if (sctx.lastlane)
{
g = gimple_build_call_internal (IFN_GOMP_SIMD_LAST_LANE,
2, uid, sctx.lane);
gimple_call_set_lhs (g, sctx.lastlane);
gimple_seq_add_stmt (dlist, g);
gimple_seq_add_seq (dlist, llist[3]);
}
/* Emit reductions across SIMT lanes in log_2(simt_vf) steps. */
if (llist[2])
{
@ -5865,6 +6044,7 @@ lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *body_p,
new_var = build4 (ARRAY_REF, TREE_TYPE (val),
TREE_OPERAND (val, 0), lastlane,
NULL_TREE, NULL_TREE);
TREE_THIS_NOTRAP (new_var) = 1;
}
}
else if (maybe_simt)
@ -8371,6 +8551,167 @@ lower_omp_ordered (gimple_stmt_iterator *gsi_p, omp_context *ctx)
}
/* Expand code for an OpenMP scan directive and the structured block
before the scan directive. */
static void
lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx)
{
gimple *stmt = gsi_stmt (*gsi_p);
bool has_clauses
= gimple_omp_scan_clauses (as_a <gomp_scan *> (stmt)) != NULL;
tree lane = NULL_TREE;
gimple_seq before = NULL;
omp_context *octx = ctx->outer;
gcc_assert (octx);
bool input_phase = has_clauses ^ octx->scan_inclusive;
if (gimple_code (octx->stmt) == GIMPLE_OMP_FOR
&& (gimple_omp_for_kind (octx->stmt) & GF_OMP_FOR_SIMD)
&& !gimple_omp_for_combined_into_p (octx->stmt)
&& octx->scan_inclusive)
{
if (tree c = omp_find_clause (gimple_omp_for_clauses (octx->stmt),
OMP_CLAUSE__SIMDUID_))
{
tree uid = OMP_CLAUSE__SIMDUID__DECL (c);
lane = create_tmp_var (unsigned_type_node);
tree t = build_int_cst (integer_type_node, 1 + !input_phase);
gimple *g
= gimple_build_call_internal (IFN_GOMP_SIMD_LANE, 2, uid, t);
gimple_call_set_lhs (g, lane);
gimple_seq_add_stmt (&before, g);
}
for (tree c = gimple_omp_for_clauses (octx->stmt);
c; c = OMP_CLAUSE_CHAIN (c))
if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
&& OMP_CLAUSE_REDUCTION_INSCAN (c))
{
tree var = OMP_CLAUSE_DECL (c);
tree new_var = lookup_decl (var, octx);
tree val = new_var;
tree var2 = NULL_TREE;
tree var3 = NULL_TREE;
if (DECL_HAS_VALUE_EXPR_P (new_var))
{
val = DECL_VALUE_EXPR (new_var);
if (TREE_CODE (val) == ARRAY_REF
&& VAR_P (TREE_OPERAND (val, 0)))
{
tree v = TREE_OPERAND (val, 0);
if (lookup_attribute ("omp simd array",
DECL_ATTRIBUTES (v)))
{
val = unshare_expr (val);
TREE_OPERAND (val, 1) = lane;
var2 = lookup_decl (v, octx);
if (input_phase
&& OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
var3 = maybe_lookup_decl (var2, octx);
if (!input_phase)
{
var2 = build4 (ARRAY_REF, TREE_TYPE (val),
var2, lane, NULL_TREE, NULL_TREE);
TREE_THIS_NOTRAP (var2) = 1;
}
else
var2 = val;
}
}
}
else
{
var2 = build_outer_var_ref (var, octx);
if (input_phase && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
{
var3 = maybe_lookup_decl (new_var, octx);
if (var3 == new_var)
var3 = NULL_TREE;
}
}
if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
{
tree placeholder = OMP_CLAUSE_REDUCTION_PLACEHOLDER (c);
if (input_phase)
{
if (var3)
{
/* If we've added a separate identity element
variable, copy it over into val. */
tree x = lang_hooks.decls.omp_clause_assign_op (c, val,
var3);
gimplify_and_add (x, &before);
}
else if (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c))
{
/* Otherwise, assign to it the identity element. */
gimple_seq tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c);
tree x = (DECL_HAS_VALUE_EXPR_P (new_var)
? DECL_VALUE_EXPR (new_var) : NULL_TREE);
tree ref = build_outer_var_ref (var, octx);
SET_DECL_VALUE_EXPR (new_var, val);
SET_DECL_VALUE_EXPR (placeholder, ref);
DECL_HAS_VALUE_EXPR_P (placeholder) = 1;
lower_omp (&tseq, octx);
SET_DECL_VALUE_EXPR (new_var, x);
SET_DECL_VALUE_EXPR (placeholder, NULL_TREE);
DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
if (x == NULL_TREE)
DECL_HAS_VALUE_EXPR_P (new_var) = 0;
gimple_seq_add_seq (&before, tseq);
OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL;
}
}
else
{
gimple_seq tseq = OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c);
tree x = (DECL_HAS_VALUE_EXPR_P (new_var)
? DECL_VALUE_EXPR (new_var) : NULL_TREE);
SET_DECL_VALUE_EXPR (new_var, val);
SET_DECL_VALUE_EXPR (placeholder, var2);
DECL_HAS_VALUE_EXPR_P (placeholder) = 1;
lower_omp (&tseq, octx);
gimple_seq_add_seq (&before, tseq);
OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c) = NULL;
SET_DECL_VALUE_EXPR (new_var, x);
SET_DECL_VALUE_EXPR (placeholder, NULL_TREE);
DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
x = lang_hooks.decls.omp_clause_assign_op (c, val, var2);
gimplify_and_add (x, &before);
}
}
else
{
if (input_phase)
{
/* input phase. Set val to initializer before
the body. */
tree x = omp_reduction_init (c, TREE_TYPE (new_var));
gimplify_assign (val, x, &before);
}
else
{
/* scan phase. */
enum tree_code code = OMP_CLAUSE_REDUCTION_CODE (c);
if (code == MINUS_EXPR)
code = PLUS_EXPR;
tree x = build2 (code, TREE_TYPE (var2),
unshare_expr (var2), unshare_expr (val));
gimplify_assign (unshare_expr (var2), x, &before);
gimplify_assign (val, var2, &before);
}
}
}
}
else if (has_clauses)
sorry_at (gimple_location (stmt),
"%<#pragma omp scan%> not supported yet");
gsi_insert_seq_after (gsi_p, gimple_omp_body (stmt), GSI_SAME_STMT);
gsi_insert_seq_after (gsi_p, before, GSI_SAME_STMT);
gsi_replace (gsi_p, gimple_build_nop (), true);
}
/* Gimplify a GIMPLE_OMP_CRITICAL statement. This is a relatively simple
substitution of a couple of function calls. But in the NAMED case,
requires that languages coordinate a symbol name. It is therefore
@ -10843,11 +11184,7 @@ lower_omp_1 (gimple_stmt_iterator *gsi_p, omp_context *ctx)
case GIMPLE_OMP_SCAN:
ctx = maybe_lookup_ctx (stmt);
gcc_assert (ctx);
gsi_insert_seq_after (gsi_p, gimple_omp_body (stmt), GSI_SAME_STMT);
if (gimple_omp_scan_clauses (as_a <gomp_scan *> (stmt)))
sorry_at (gimple_location (stmt),
"%<#pragma omp scan%> not supported yet");
gsi_replace (gsi_p, gimple_build_nop (), true);
lower_omp_scan (gsi_p, ctx);
break;
case GIMPLE_OMP_CRITICAL:
ctx = maybe_lookup_ctx (stmt);

View file

@ -1,3 +1,10 @@
2019-06-17 Jakub Jelinek <jakub@redhat.com>
* gcc.dg/vect/vect-simd-8.c: New test.
* gcc.dg/vect/vect-simd-9.c: New test.
* g++.dg/vect/simd-2.cc: New test.
* g++.dg/gomp/scan-1.C: New test.
2019-06-17 Uroš Bizjak <ubizjak@gmail.com>
PR target/62055

View file

@ -0,0 +1,26 @@
// { dg-do compile { target c++11 } }
struct S { S (); ~S (); S &operator = (const S &) = delete; int s; }; // { dg-message "declared here" }
#pragma omp declare reduction (+ : S : omp_out.s += omp_in.s)
S s;
void
foo (void)
{
#pragma omp simd reduction (+: s)
for (int i = 0; i < 64; ++i)
s.s += i;
}
void
bar (int *x)
{
#pragma omp simd reduction (inscan, +: s) // { dg-error "use of deleted function" }
for (int i = 0; i < 64; ++i)
{
s.s += i;
#pragma omp scan inclusive (s) // { dg-error "" }
x[i] = s.s;
}
}

View file

@ -0,0 +1,153 @@
// { dg-require-effective-target size32plus }
// { dg-additional-options "-fopenmp-simd" }
// { dg-additional-options "-mavx" { target avx_runtime } }
// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { xfail *-*-* } } }
#include "../../gcc.dg/vect/tree-vect.h"
struct S {
inline S ();
inline ~S ();
inline S (const S &);
inline S & operator= (const S &);
int s;
};
S::S () : s (0)
{
}
S::~S ()
{
}
S::S (const S &x)
{
s = x.s;
}
S &
S::operator= (const S &x)
{
s = x.s;
return *this;
}
static inline void
ini (S &x)
{
x.s = 0;
}
S r, a[1024], b[1024];
#pragma omp declare reduction (+: S: omp_out.s += omp_in.s)
#pragma omp declare reduction (plus: S: omp_out.s += omp_in.s) initializer (ini (omp_priv))
__attribute__((noipa)) void
foo (S *a, S *b)
{
#pragma omp simd reduction (inscan, +:r)
for (int i = 0; i < 1024; i++)
{
r.s += a[i].s;
#pragma omp scan inclusive(r)
b[i] = r;
}
}
__attribute__((noipa)) S
bar (void)
{
S s;
#pragma omp simd reduction (inscan, plus:s)
for (int i = 0; i < 1024; i++)
{
s.s += 2 * a[i].s;
#pragma omp scan inclusive(s)
b[i] = s;
}
return S (s);
}
__attribute__((noipa)) void
baz (S *a, S *b)
{
#pragma omp simd reduction (inscan, +:r) simdlen(1)
for (int i = 0; i < 1024; i++)
{
r.s += a[i].s;
#pragma omp scan inclusive(r)
b[i] = r;
}
}
__attribute__((noipa)) S
qux (void)
{
S s;
#pragma omp simd if (0) reduction (inscan, plus:s)
for (int i = 0; i < 1024; i++)
{
s.s += 2 * a[i].s;
#pragma omp scan inclusive(s)
b[i] = s;
}
return S (s);
}
int
main ()
{
S s;
check_vect ();
for (int i = 0; i < 1024; ++i)
{
a[i].s = i;
b[i].s = -1;
asm ("" : "+g" (i));
}
foo (a, b);
if (r.s != 1024 * 1023 / 2)
abort ();
for (int i = 0; i < 1024; ++i)
{
s.s += i;
if (b[i].s != s.s)
abort ();
else
b[i].s = 25;
}
if (bar ().s != 1024 * 1023)
abort ();
s.s = 0;
for (int i = 0; i < 1024; ++i)
{
s.s += 2 * i;
if (b[i].s != s.s)
abort ();
}
r.s = 0;
baz (a, b);
if (r.s != 1024 * 1023 / 2)
abort ();
s.s = 0;
for (int i = 0; i < 1024; ++i)
{
s.s += i;
if (b[i].s != s.s)
abort ();
else
b[i].s = 25;
}
if (qux ().s != 1024 * 1023)
abort ();
s.s = 0;
for (int i = 0; i < 1024; ++i)
{
s.s += 2 * i;
if (b[i].s != s.s)
abort ();
}
return 0;
}

View file

@ -0,0 +1,118 @@
/* { dg-require-effective-target size32plus } */
/* { dg-additional-options "-fopenmp-simd" } */
/* { dg-additional-options "-mavx" { target avx_runtime } } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */
#include "tree-vect.h"
int r, a[1024], b[1024];
__attribute__((noipa)) void
foo (int *a, int *b)
{
#pragma omp simd reduction (inscan, +:r)
for (int i = 0; i < 1024; i++)
{
r += a[i];
#pragma omp scan inclusive(r)
b[i] = r;
}
}
__attribute__((noipa)) int
bar (void)
{
int s = 0;
#pragma omp simd reduction (inscan, +:s)
for (int i = 0; i < 1024; i++)
{
s += 2 * a[i];
#pragma omp scan inclusive(s)
b[i] = s;
}
return s;
}
__attribute__((noipa)) void
baz (int *a, int *b)
{
#pragma omp simd reduction (inscan, +:r) if (simd: 0)
for (int i = 0; i < 1024; i++)
{
r += a[i];
#pragma omp scan inclusive(r)
b[i] = r;
}
}
__attribute__((noipa)) int
qux (void)
{
int s = 0;
#pragma omp simd reduction (inscan, +:s) simdlen (1)
for (int i = 0; i < 1024; i++)
{
s += 2 * a[i];
#pragma omp scan inclusive(s)
b[i] = s;
}
return s;
}
int
main ()
{
int s = 0;
check_vect ();
for (int i = 0; i < 1024; ++i)
{
a[i] = i;
b[i] = -1;
asm ("" : "+g" (i));
}
foo (a, b);
if (r != 1024 * 1023 / 2)
abort ();
for (int i = 0; i < 1024; ++i)
{
s += i;
if (b[i] != s)
abort ();
else
b[i] = 25;
}
if (bar () != 1024 * 1023)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
s += 2 * i;
if (b[i] != s)
abort ();
else
b[i] = -1;
}
r = 0;
baz (a, b);
if (r != 1024 * 1023 / 2)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
s += i;
if (b[i] != s)
abort ();
else
b[i] = -25;
}
if (qux () != 1024 * 1023)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
s += 2 * i;
if (b[i] != s)
abort ();
}
return 0;
}

View file

@ -0,0 +1,120 @@
/* { dg-require-effective-target size32plus } */
/* { dg-additional-options "-fopenmp-simd" } */
/* { dg-additional-options "-mavx" { target avx_runtime } } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */
#include "tree-vect.h"
int r, a[1024], b[1024];
#pragma omp declare reduction (foo: int: omp_out += omp_in) initializer (omp_priv = 0)
__attribute__((noipa)) void
foo (int *a, int *b)
{
#pragma omp simd reduction (inscan, foo:r)
for (int i = 0; i < 1024; i++)
{
r += a[i];
#pragma omp scan inclusive(r)
b[i] = r;
}
}
__attribute__((noipa)) int
bar (void)
{
int s = 0;
#pragma omp simd reduction (inscan, foo:s)
for (int i = 0; i < 1024; i++)
{
s += 2 * a[i];
#pragma omp scan inclusive(s)
b[i] = s;
}
return s;
}
__attribute__((noipa)) void
baz (int *a, int *b)
{
#pragma omp simd reduction (inscan, foo:r) if (simd: 0)
for (int i = 0; i < 1024; i++)
{
r += a[i];
#pragma omp scan inclusive(r)
b[i] = r;
}
}
__attribute__((noipa)) int
qux (void)
{
int s = 0;
#pragma omp simd reduction (inscan, foo:s) simdlen (1)
for (int i = 0; i < 1024; i++)
{
s += 2 * a[i];
#pragma omp scan inclusive(s)
b[i] = s;
}
return s;
}
int
main ()
{
int s = 0;
check_vect ();
for (int i = 0; i < 1024; ++i)
{
a[i] = i;
b[i] = -1;
asm ("" : "+g" (i));
}
foo (a, b);
if (r != 1024 * 1023 / 2)
abort ();
for (int i = 0; i < 1024; ++i)
{
s += i;
if (b[i] != s)
abort ();
else
b[i] = 25;
}
if (bar () != 1024 * 1023)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
s += 2 * i;
if (b[i] != s)
abort ();
else
b[i] = -1;
}
r = 0;
baz (a, b);
if (r != 1024 * 1023 / 2)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
s += i;
if (b[i] != s)
abort ();
else
b[i] = -25;
}
if (qux () != 1024 * 1023)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
s += 2 * i;
if (b[i] != s)
abort ();
}
return 0;
}

View file

@ -1339,14 +1339,14 @@ eliminate_unnecessary_stmts (void)
update_stmt (stmt);
release_ssa_name (name);
/* GOMP_SIMD_LANE (unless two argument) or ASAN_POISON
/* GOMP_SIMD_LANE (unless three argument) or ASAN_POISON
without lhs is not needed. */
if (gimple_call_internal_p (stmt))
switch (gimple_call_internal_fn (stmt))
{
case IFN_GOMP_SIMD_LANE:
if (gimple_call_num_args (stmt) >= 2
&& !integer_nonzerop (gimple_call_arg (stmt, 1)))
if (gimple_call_num_args (stmt) >= 3
&& !integer_nonzerop (gimple_call_arg (stmt, 2)))
break;
/* FALLTHRU */
case IFN_ASAN_POISON:

View file

@ -3003,6 +3003,13 @@ vect_analyze_data_ref_accesses (vec_info *vinfo)
|| TREE_CODE (DR_INIT (drb)) != INTEGER_CST)
break;
/* Different .GOMP_SIMD_LANE calls still give the same lane,
just hold extra information. */
if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmtinfo_a)
&& STMT_VINFO_SIMD_LANE_ACCESS_P (stmtinfo_b)
&& data_ref_compare_tree (DR_INIT (dra), DR_INIT (drb)) == 0)
break;
/* Sorting has ensured that DR_INIT (dra) <= DR_INIT (drb). */
HOST_WIDE_INT init_a = TREE_INT_CST_LOW (DR_INIT (dra));
HOST_WIDE_INT init_b = TREE_INT_CST_LOW (DR_INIT (drb));
@ -4101,7 +4108,8 @@ vect_find_stmt_data_reference (loop_p loop, gimple *stmt,
DR_STEP_ALIGNMENT (newdr)
= highest_pow2_factor (step);
/* Mark as simd-lane access. */
newdr->aux = (void *)-1;
tree arg2 = gimple_call_arg (def, 1);
newdr->aux = (void *) (-1 - tree_to_uhwi (arg2));
free_data_ref (dr);
datarefs->safe_push (newdr);
return opt_result::success ();
@ -4210,14 +4218,17 @@ vect_analyze_data_refs (vec_info *vinfo, poly_uint64 *min_vf)
}
/* See if this was detected as SIMD lane access. */
if (dr->aux == (void *)-1)
if (dr->aux == (void *)-1
|| dr->aux == (void *)-2
|| dr->aux == (void *)-3)
{
if (nested_in_vect_loop_p (loop, stmt_info))
return opt_result::failure_at (stmt_info->stmt,
"not vectorized:"
" data ref analysis failed: %G",
stmt_info->stmt);
STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) = true;
STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)
= -(uintptr_t) dr->aux;
}
tree base = get_base_address (DR_REF (dr));

View file

@ -824,6 +824,7 @@ _loop_vec_info::_loop_vec_info (struct loop *loop_in, vec_info_shared *shared)
peeling_for_alignment (0),
ptr_mask (0),
ivexpr_map (NULL),
scan_map (NULL),
slp_unrolling_factor (1),
single_scalar_iteration_cost (0),
vectorizable (false),
@ -863,8 +864,8 @@ _loop_vec_info::_loop_vec_info (struct loop *loop_in, vec_info_shared *shared)
gimple *stmt = gsi_stmt (si);
gimple_set_uid (stmt, 0);
add_stmt (stmt);
/* If .GOMP_SIMD_LANE call for the current loop has 2 arguments, the
second argument is the #pragma omp simd if (x) condition, when 0,
/* If .GOMP_SIMD_LANE call for the current loop has 3 arguments, the
third argument is the #pragma omp simd if (x) condition, when 0,
loop shouldn't be vectorized, when non-zero constant, it should
be vectorized normally, otherwise versioned with vectorized loop
done if the condition is non-zero at runtime. */
@ -872,12 +873,12 @@ _loop_vec_info::_loop_vec_info (struct loop *loop_in, vec_info_shared *shared)
&& is_gimple_call (stmt)
&& gimple_call_internal_p (stmt)
&& gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
&& gimple_call_num_args (stmt) >= 2
&& gimple_call_num_args (stmt) >= 3
&& TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
&& (loop_in->simduid
== SSA_NAME_VAR (gimple_call_arg (stmt, 0))))
{
tree arg = gimple_call_arg (stmt, 1);
tree arg = gimple_call_arg (stmt, 2);
if (integer_zerop (arg) || TREE_CODE (arg) == SSA_NAME)
simd_if_cond = arg;
else
@ -959,6 +960,7 @@ _loop_vec_info::~_loop_vec_info ()
release_vec_loop_masks (&masks);
delete ivexpr_map;
delete scan_map;
loop->aux = NULL;
}

View file

@ -54,6 +54,7 @@ along with GCC; see the file COPYING3. If not see
#include "tree-ssa-loop-niter.h"
#include "gimple-fold.h"
#include "regs.h"
#include "attribs.h"
/* For lang_hooks.types.type_for_mode. */
#include "langhooks.h"
@ -3257,7 +3258,7 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
if (nargs == 0 || nargs > 4)
return false;
/* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
/* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
combined_fn cfn = gimple_call_combined_fn (stmt);
if (cfn == CFN_GOMP_SIMD_LANE)
{
@ -6320,6 +6321,489 @@ get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
}
/* Function scan_operand_equal_p.
Helper function for check_scan_store. Compare two references
with .GOMP_SIMD_LANE bases. */
static bool
scan_operand_equal_p (tree ref1, tree ref2)
{
machine_mode mode1, mode2;
poly_int64 bitsize1, bitsize2, bitpos1, bitpos2;
tree offset1, offset2;
int unsignedp1, unsignedp2, reversep1, reversep2;
int volatilep1 = 0, volatilep2 = 0;
tree base1 = get_inner_reference (ref1, &bitsize1, &bitpos1, &offset1,
&mode1, &unsignedp1, &reversep1,
&volatilep1);
tree base2 = get_inner_reference (ref2, &bitsize2, &bitpos2, &offset2,
&mode2, &unsignedp2, &reversep2,
&volatilep2);
if (reversep1 || reversep2 || volatilep1 || volatilep2)
return false;
if (!operand_equal_p (base1, base2, 0))
return false;
if (maybe_ne (bitpos1, 0) || maybe_ne (bitpos2, 0))
return false;
if (maybe_ne (bitsize1, bitsize2))
return false;
if (!operand_equal_p (offset1, offset2, 0))
return false;
return true;
}
/* Function check_scan_store.
Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
static bool
check_scan_store (stmt_vec_info stmt_info, tree vectype,
enum vect_def_type rhs_dt, bool slp, tree mask,
vect_memory_access_type memory_access_type)
{
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
tree ref_type;
gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
if (slp
|| mask
|| memory_access_type != VMAT_CONTIGUOUS
|| TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
|| !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
|| loop_vinfo == NULL
|| LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
|| STMT_VINFO_GROUPED_ACCESS (stmt_info)
|| !integer_zerop (DR_OFFSET (dr_info->dr))
|| !integer_zerop (DR_INIT (dr_info->dr))
|| !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
|| !alias_sets_conflict_p (get_alias_set (vectype),
get_alias_set (TREE_TYPE (ref_type))))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"unsupported OpenMP scan store.\n");
return false;
}
/* We need to pattern match code built by OpenMP lowering and simplified
by following optimizations into something we can handle.
#pragma omp simd reduction(inscan,+:r)
for (...)
{
r += something ();
#pragma omp scan inclusive (r)
use (r);
}
shall have body with:
// Initialization for input phase, store the reduction initializer:
_20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
_21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
D.2042[_21] = 0;
// Actual input phase:
...
r.0_5 = D.2042[_20];
_6 = _4 + r.0_5;
D.2042[_20] = _6;
// Initialization for scan phase:
_25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
_26 = D.2043[_25];
_27 = D.2042[_25];
_28 = _26 + _27;
D.2043[_25] = _28;
D.2042[_25] = _28;
// Actual scan phase:
...
r.1_8 = D.2042[_20];
...
The "omp simd array" variable D.2042 holds the privatized copy used
inside of the loop and D.2043 is another one that holds copies of
the current original list item. The separate GOMP_SIMD_LANE ifn
kinds are there in order to allow optimizing the initializer store
and combiner sequence, e.g. if it is originally some C++ish user
defined reduction, but allow the vectorizer to pattern recognize it
and turn into the appropriate vectorized scan. */
if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
{
/* Match the D.2042[_21] = 0; store above. Just require that
it is a constant or external definition store. */
if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
{
fail_init:
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"unsupported OpenMP scan initializer store.\n");
return false;
}
if (! loop_vinfo->scan_map)
loop_vinfo->scan_map = new hash_map<tree, tree>;
tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
tree &cached = loop_vinfo->scan_map->get_or_insert (var);
if (cached)
goto fail_init;
cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
/* These stores can be vectorized normally. */
return true;
}
if (rhs_dt != vect_internal_def)
{
fail:
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"unsupported OpenMP scan combiner pattern.\n");
return false;
}
gimple *stmt = STMT_VINFO_STMT (stmt_info);
tree rhs = gimple_assign_rhs1 (stmt);
if (TREE_CODE (rhs) != SSA_NAME)
goto fail;
use_operand_p use_p;
imm_use_iterator iter;
gimple *other_store_stmt = NULL;
FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
{
gimple *use_stmt = USE_STMT (use_p);
if (use_stmt == stmt || is_gimple_debug (use_stmt))
continue;
if (gimple_bb (use_stmt) != gimple_bb (stmt)
|| !gimple_store_p (use_stmt)
|| other_store_stmt)
goto fail;
other_store_stmt = use_stmt;
}
if (other_store_stmt == NULL)
goto fail;
stmt_vec_info other_store_stmt_info
= loop_vinfo->lookup_stmt (other_store_stmt);
if (other_store_stmt_info == NULL
|| STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info) != 3)
goto fail;
gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
if (gimple_bb (def_stmt) != gimple_bb (stmt)
|| !is_gimple_assign (def_stmt)
|| gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
goto fail;
enum tree_code code = gimple_assign_rhs_code (def_stmt);
/* For pointer addition, we should use the normal plus for the vector
operation. */
switch (code)
{
case POINTER_PLUS_EXPR:
code = PLUS_EXPR;
break;
case MULT_HIGHPART_EXPR:
goto fail;
default:
break;
}
if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
goto fail;
tree rhs1 = gimple_assign_rhs1 (def_stmt);
tree rhs2 = gimple_assign_rhs2 (def_stmt);
if (TREE_CODE (rhs1) != SSA_NAME
|| TREE_CODE (rhs2) != SSA_NAME)
goto fail;
gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
if (gimple_bb (load1_stmt) != gimple_bb (stmt)
|| !gimple_assign_load_p (load1_stmt)
|| gimple_bb (load2_stmt) != gimple_bb (stmt)
|| !gimple_assign_load_p (load2_stmt))
goto fail;
stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
if (load1_stmt_info == NULL
|| load2_stmt_info == NULL
|| STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info) != 3
|| STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info) != 3)
goto fail;
if (scan_operand_equal_p (gimple_assign_lhs (stmt),
gimple_assign_rhs1 (load2_stmt)))
{
std::swap (rhs1, rhs2);
std::swap (load1_stmt, load2_stmt);
std::swap (load1_stmt_info, load2_stmt_info);
}
if (!scan_operand_equal_p (gimple_assign_lhs (stmt),
gimple_assign_rhs1 (load1_stmt))
|| !scan_operand_equal_p (gimple_assign_lhs (other_store_stmt),
gimple_assign_rhs1 (load2_stmt)))
goto fail;
dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
|| !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
goto fail;
tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
|| !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
|| (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
== (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
goto fail;
if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
std::swap (var1, var2);
if (loop_vinfo->scan_map == NULL)
goto fail;
tree *init = loop_vinfo->scan_map->get (var1);
if (init == NULL)
goto fail;
/* The IL is as expected, now check if we can actually vectorize it.
_26 = D.2043[_25];
_27 = D.2042[_25];
_28 = _26 + _27;
D.2043[_25] = _28;
D.2042[_25] = _28;
should be vectorized as (where _40 is the vectorized rhs
from the D.2042[_21] = 0; store):
_30 = MEM <vector(8) int> [(int *)&D.2043];
_31 = MEM <vector(8) int> [(int *)&D.2042];
_32 = VEC_PERM_EXPR <_31, _40, { 8, 0, 1, 2, 3, 4, 5, 6 }>;
_33 = _31 + _32;
// _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
_34 = VEC_PERM_EXPR <_33, _40, { 8, 9, 0, 1, 2, 3, 4, 5 }>;
_35 = _33 + _34;
// _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
// _31[1]+.._31[4], ... _31[4]+.._31[7] };
_36 = VEC_PERM_EXPR <_35, _40, { 8, 9, 10, 11, 0, 1, 2, 3 }>;
_37 = _35 + _36;
// _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
// _31[0]+.._31[4], ... _31[0]+.._31[7] };
_38 = _30 + _37;
_39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
MEM <vector(8) int> [(int *)&D.2043] = _39;
MEM <vector(8) int> [(int *)&D.2042] = _38; */
enum machine_mode vec_mode = TYPE_MODE (vectype);
optab optab = optab_for_tree_code (code, vectype, optab_default);
if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
goto fail;
unsigned HOST_WIDE_INT nunits;
if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
goto fail;
int units_log2 = exact_log2 (nunits);
if (units_log2 <= 0)
goto fail;
for (int i = 0; i <= units_log2; ++i)
{
unsigned HOST_WIDE_INT j, k;
vec_perm_builder sel (nunits, nunits, 1);
sel.quick_grow (nunits);
if (i == units_log2)
{
for (j = 0; j < nunits; ++j)
sel[j] = nunits - 1;
}
else
{
for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
sel[j] = nunits + j;
for (k = 0; j < nunits; ++j, ++k)
sel[j] = k;
}
vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
if (!can_vec_perm_const_p (vec_mode, indices))
goto fail;
}
return true;
}
/* Function vectorizable_scan_store.
Helper of vectorizable_score, arguments like on vectorizable_store.
Handle only the transformation, checking is done in check_scan_store. */
static bool
vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
stmt_vec_info *vec_stmt, int ncopies)
{
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
vec_info *vinfo = stmt_info->vinfo;
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"transform scan store. ncopies = %d\n", ncopies);
gimple *stmt = STMT_VINFO_STMT (stmt_info);
tree rhs = gimple_assign_rhs1 (stmt);
gcc_assert (TREE_CODE (rhs) == SSA_NAME);
gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
enum tree_code code = gimple_assign_rhs_code (def_stmt);
if (code == POINTER_PLUS_EXPR)
code = PLUS_EXPR;
gcc_assert (TREE_CODE_LENGTH (code) == binary_op
&& commutative_tree_code (code));
tree rhs1 = gimple_assign_rhs1 (def_stmt);
tree rhs2 = gimple_assign_rhs2 (def_stmt);
gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
{
std::swap (rhs1, rhs2);
std::swap (var1, var2);
}
tree *init = loop_vinfo->scan_map->get (var1);
gcc_assert (init);
tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
bool inscan_var_store
= lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
unsigned HOST_WIDE_INT nunits;
if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
gcc_unreachable ();
int units_log2 = exact_log2 (nunits);
gcc_assert (units_log2 > 0);
auto_vec<tree, 16> perms;
perms.quick_grow (units_log2 + 1);
for (int i = 0; i <= units_log2; ++i)
{
unsigned HOST_WIDE_INT j, k;
vec_perm_builder sel (nunits, nunits, 1);
sel.quick_grow (nunits);
if (i == units_log2)
{
for (j = 0; j < nunits; ++j)
sel[j] = nunits - 1;
}
else
{
for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
sel[j] = nunits + j;
for (k = 0; j < nunits; ++j, ++k)
sel[j] = k;
}
vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
perms[i] = vect_gen_perm_mask_checked (vectype, indices);
}
stmt_vec_info prev_stmt_info = NULL;
tree vec_oprnd1 = NULL_TREE;
tree vec_oprnd2 = NULL_TREE;
tree vec_oprnd3 = NULL_TREE;
tree dataref_ptr = unshare_expr (DR_BASE_ADDRESS (dr_info->dr));
tree dataref_offset = build_int_cst (ref_type, 0);
tree bump = vect_get_data_ptr_increment (dr_info, vectype, VMAT_CONTIGUOUS);
tree orig = NULL_TREE;
for (int j = 0; j < ncopies; j++)
{
stmt_vec_info new_stmt_info;
if (j == 0)
{
vec_oprnd1 = vect_get_vec_def_for_operand (*init, stmt_info);
vec_oprnd2 = vect_get_vec_def_for_operand (rhs1, stmt_info);
vec_oprnd3 = vect_get_vec_def_for_operand (rhs2, stmt_info);
orig = vec_oprnd3;
}
else
{
vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
vec_oprnd2 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd2);
vec_oprnd3 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd3);
if (!inscan_var_store)
dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
}
tree v = vec_oprnd2;
for (int i = 0; i < units_log2; ++i)
{
tree new_temp = make_ssa_name (vectype);
gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR, v,
vec_oprnd1, perms[i]);
new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
if (prev_stmt_info == NULL)
STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
else
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
prev_stmt_info = new_stmt_info;
tree new_temp2 = make_ssa_name (vectype);
g = gimple_build_assign (new_temp2, code, v, new_temp);
new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
prev_stmt_info = new_stmt_info;
v = new_temp2;
}
tree new_temp = make_ssa_name (vectype);
gimple *g = gimple_build_assign (new_temp, code, orig, v);
new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
prev_stmt_info = new_stmt_info;
orig = make_ssa_name (vectype);
g = gimple_build_assign (orig, VEC_PERM_EXPR, new_temp, new_temp,
perms[units_log2]);
new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
prev_stmt_info = new_stmt_info;
if (!inscan_var_store)
{
tree data_ref = fold_build2 (MEM_REF, vectype, dataref_ptr,
dataref_offset);
vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
g = gimple_build_assign (data_ref, new_temp);
new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
prev_stmt_info = new_stmt_info;
}
}
if (inscan_var_store)
for (int j = 0; j < ncopies; j++)
{
if (j != 0)
dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
tree data_ref = fold_build2 (MEM_REF, vectype, dataref_ptr,
dataref_offset);
vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
gimple *g = gimple_build_assign (data_ref, orig);
stmt_vec_info new_stmt_info
= vect_finish_stmt_generation (stmt_info, g, gsi);
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
prev_stmt_info = new_stmt_info;
}
return true;
}
/* Function vectorizable_store.
Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
@ -6514,6 +6998,13 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
group_size = vec_num = 1;
}
if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
{
if (!check_scan_store (stmt_info, vectype, rhs_dt, slp, mask,
memory_access_type))
return false;
}
if (!vec_stmt) /* transformation not required. */
{
STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
@ -6737,6 +7228,8 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
}
return true;
}
else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
return vectorizable_scan_store (stmt_info, gsi, vec_stmt, ncopies);
if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
@ -7162,7 +7655,7 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
gcc_assert (useless_type_conversion_p (vectype,
TREE_TYPE (vec_oprnd)));
bool simd_lane_access_p
= STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
= STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
if (simd_lane_access_p
&& !loop_masks
&& TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
@ -8347,7 +8840,7 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
if (j == 0)
{
bool simd_lane_access_p
= STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
= STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
if (simd_lane_access_p
&& TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
&& VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))

View file

@ -491,6 +491,10 @@ typedef struct _loop_vec_info : public vec_info {
/* Map of IV base/step expressions to inserted name in the preheader. */
hash_map<tree_operand_hash, tree> *ivexpr_map;
/* Map of OpenMP "omp simd array" scan variables to corresponding
rhs of the store of the initializer. */
hash_map<tree, tree> *scan_map;
/* The unrolling factor needed to SLP the loop. In case of that pure SLP is
applied to the loop, i.e., no unrolling is needed, this is 1. */
poly_uint64 slp_unrolling_factor;
@ -913,7 +917,7 @@ struct _stmt_vec_info {
bool strided_p;
/* For both loads and stores. */
bool simd_lane_access_p;
unsigned simd_lane_access_p : 2;
/* Classifies how the load or store is going to be implemented
for loop vectorization. */