omp-low.c (lower_rec_simd_input_clauses): Add rvar2 argument...
* omp-low.c (lower_rec_simd_input_clauses): Add rvar2 argument, create another "omp scan inscan exclusive" array if !ctx->scan_inclusive. (lower_rec_input_clauses): Handle exclusive scan inscan reductions. (lower_omp_scan): Likewise. * tree-vectorizer.h (struct _stmt_vec_info): Use 3-bit instead of 2-bit bitfield for simd_lane_access_p member. * tree-vect-data-refs.c (vect_analyze_data_refs): Also handle aux == (void *)-4 as simd lane access. * tree-vect-stmts.c (check_scan_store): Handle exclusive scan. Update comment with permutations to show the canonical permutation order. (vectorizable_scan_store): Handle exclusive scan. (vectorizable_store): Call vectorizable_scan_store even for STMT_VINFO_SIMD_LANE_ACCESS_P > 3. * gcc.dg/vect/vect-simd-12.c: New test. * gcc.dg/vect/vect-simd-13.c: New test. * gcc.dg/vect/vect-simd-14.c: New test. * gcc.dg/vect/vect-simd-15.c: New test. * gcc.target/i386/sse2-vect-simd-12.c: New test. * gcc.target/i386/sse2-vect-simd-13.c: New test. * gcc.target/i386/sse2-vect-simd-14.c: New test. * gcc.target/i386/sse2-vect-simd-15.c: New test. * gcc.target/i386/avx2-vect-simd-12.c: New test. * gcc.target/i386/avx2-vect-simd-13.c: New test. * gcc.target/i386/avx2-vect-simd-14.c: New test. * gcc.target/i386/avx2-vect-simd-15.c: New test. * gcc.target/i386/avx512f-vect-simd-12.c: New test. * gcc.target/i386/avx512f-vect-simd-13.c: New test. * gcc.target/i386/avx512f-vect-simd-14.c: New test. * gcc.target/i386/avx512bw-vect-simd-15.c: New test. * g++.dg/vect/simd-6.cc: New test. * g++.dg/vect/simd-7.cc: New test. * g++.dg/vect/simd-8.cc: New test. * g++.dg/vect/simd-9.cc: New test. * c-c++-common/gomp/scan-2.c: Don't expect any diagnostics. From-SVN: r272544
This commit is contained in:
parent
e73fb06d5a
commit
1612b1febd
27 changed files with 1757 additions and 63 deletions
|
@ -1,5 +1,20 @@
|
|||
2019-06-21 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
* omp-low.c (lower_rec_simd_input_clauses): Add rvar2 argument,
|
||||
create another "omp scan inscan exclusive" array if
|
||||
!ctx->scan_inclusive.
|
||||
(lower_rec_input_clauses): Handle exclusive scan inscan reductions.
|
||||
(lower_omp_scan): Likewise.
|
||||
* tree-vectorizer.h (struct _stmt_vec_info): Use 3-bit instead of
|
||||
2-bit bitfield for simd_lane_access_p member.
|
||||
* tree-vect-data-refs.c (vect_analyze_data_refs): Also handle
|
||||
aux == (void *)-4 as simd lane access.
|
||||
* tree-vect-stmts.c (check_scan_store): Handle exclusive scan. Update
|
||||
comment with permutations to show the canonical permutation order.
|
||||
(vectorizable_scan_store): Handle exclusive scan.
|
||||
(vectorizable_store): Call vectorizable_scan_store even for
|
||||
STMT_VINFO_SIMD_LANE_ACCESS_P > 3.
|
||||
|
||||
* tree-vect-data-refs.c (vect_find_stmt_data_reference): Handle
|
||||
"omp simd array" arrays with one byte elements.
|
||||
|
||||
|
|
200
gcc/omp-low.c
200
gcc/omp-low.c
|
@ -3692,7 +3692,8 @@ struct omplow_simd_context {
|
|||
static bool
|
||||
lower_rec_simd_input_clauses (tree new_var, omp_context *ctx,
|
||||
omplow_simd_context *sctx, tree &ivar,
|
||||
tree &lvar, tree *rvar = NULL)
|
||||
tree &lvar, tree *rvar = NULL,
|
||||
tree *rvar2 = NULL)
|
||||
{
|
||||
if (known_eq (sctx->max_vf, 0U))
|
||||
{
|
||||
|
@ -3767,6 +3768,25 @@ lower_rec_simd_input_clauses (tree new_var, omp_context *ctx,
|
|||
*rvar = build4 (ARRAY_REF, TREE_TYPE (new_var), iavar,
|
||||
sctx->lastlane, NULL_TREE, NULL_TREE);
|
||||
TREE_THIS_NOTRAP (*rvar) = 1;
|
||||
|
||||
if (!ctx->scan_inclusive)
|
||||
{
|
||||
/* And for exclusive scan yet another one, which will
|
||||
hold the value during the scan phase. */
|
||||
tree savar = create_tmp_var_raw (atype);
|
||||
if (TREE_ADDRESSABLE (new_var))
|
||||
TREE_ADDRESSABLE (savar) = 1;
|
||||
DECL_ATTRIBUTES (savar)
|
||||
= tree_cons (get_identifier ("omp simd array"), NULL,
|
||||
tree_cons (get_identifier ("omp simd inscan "
|
||||
"exclusive"), NULL,
|
||||
DECL_ATTRIBUTES (savar)));
|
||||
gimple_add_tmp_var (savar);
|
||||
ctx->cb.decl_map->put (iavar, savar);
|
||||
*rvar2 = build4 (ARRAY_REF, TREE_TYPE (new_var), savar,
|
||||
sctx->idx, NULL_TREE, NULL_TREE);
|
||||
TREE_THIS_NOTRAP (*rvar2) = 1;
|
||||
}
|
||||
}
|
||||
ivar = build4 (ARRAY_REF, TREE_TYPE (new_var), iavar, sctx->idx,
|
||||
NULL_TREE, NULL_TREE);
|
||||
|
@ -5185,14 +5205,15 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
|
|||
new_vard = TREE_OPERAND (new_var, 0);
|
||||
gcc_assert (DECL_P (new_vard));
|
||||
}
|
||||
tree rvar = NULL_TREE, *rvarp = NULL;
|
||||
tree rvar = NULL_TREE, *rvarp = NULL, rvar2 = NULL_TREE;
|
||||
if (is_simd
|
||||
&& OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
|
||||
&& OMP_CLAUSE_REDUCTION_INSCAN (c))
|
||||
rvarp = &rvar;
|
||||
if (is_simd
|
||||
&& lower_rec_simd_input_clauses (new_var, ctx, &sctx,
|
||||
ivar, lvar, rvarp))
|
||||
ivar, lvar, rvarp,
|
||||
&rvar2))
|
||||
{
|
||||
if (new_vard == new_var)
|
||||
{
|
||||
|
@ -5220,6 +5241,14 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
|
|||
(c, ivar2, build_outer_var_ref (var, ctx));
|
||||
gimplify_and_add (x, &llist[0]);
|
||||
|
||||
if (rvar2)
|
||||
{
|
||||
x = lang_hooks.decls.omp_clause_default_ctor
|
||||
(c, unshare_expr (rvar2),
|
||||
build_outer_var_ref (var, ctx));
|
||||
gimplify_and_add (x, &llist[0]);
|
||||
}
|
||||
|
||||
/* For types that need construction, add another
|
||||
private var which will be default constructed
|
||||
and optionally initialized with
|
||||
|
@ -5229,7 +5258,9 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
|
|||
iteration. */
|
||||
tree nv = create_tmp_var_raw (TREE_TYPE (ivar));
|
||||
gimple_add_tmp_var (nv);
|
||||
ctx->cb.decl_map->put (TREE_OPERAND (ivar, 0),
|
||||
ctx->cb.decl_map->put (TREE_OPERAND (rvar2
|
||||
? rvar2
|
||||
: ivar, 0),
|
||||
nv);
|
||||
x = lang_hooks.decls.omp_clause_default_ctor
|
||||
(c, nv, build_outer_var_ref (var, ctx));
|
||||
|
@ -5296,6 +5327,18 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
|
|||
gimplify_stmt (&dtor, &tseq);
|
||||
gimple_seq_add_seq (&llist[1], tseq);
|
||||
}
|
||||
|
||||
if (rvar2)
|
||||
{
|
||||
x = lang_hooks.decls.omp_clause_dtor (c, rvar2);
|
||||
if (x)
|
||||
{
|
||||
tseq = NULL;
|
||||
dtor = x;
|
||||
gimplify_stmt (&dtor, &tseq);
|
||||
gimple_seq_add_seq (&llist[1], tseq);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (x)
|
||||
|
@ -5390,6 +5433,24 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
|
|||
gimple_seq_add_seq (ilist, tseq);
|
||||
}
|
||||
OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL;
|
||||
if (!ctx->scan_inclusive)
|
||||
{
|
||||
tree nv2
|
||||
= create_tmp_var_raw (TREE_TYPE (new_var));
|
||||
gimple_add_tmp_var (nv2);
|
||||
ctx->cb.decl_map->put (nv, nv2);
|
||||
x = lang_hooks.decls.omp_clause_default_ctor
|
||||
(c, nv2, build_outer_var_ref (var, ctx));
|
||||
gimplify_and_add (x, ilist);
|
||||
x = lang_hooks.decls.omp_clause_dtor (c, nv2);
|
||||
if (x)
|
||||
{
|
||||
tseq = NULL;
|
||||
dtor = x;
|
||||
gimplify_stmt (&dtor, &tseq);
|
||||
gimple_seq_add_seq (dlist, tseq);
|
||||
}
|
||||
}
|
||||
x = lang_hooks.decls.omp_clause_dtor (c, nv);
|
||||
if (x)
|
||||
{
|
||||
|
@ -5399,6 +5460,21 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
|
|||
gimple_seq_add_seq (dlist, tseq);
|
||||
}
|
||||
}
|
||||
else if (!ctx->scan_inclusive
|
||||
&& TREE_ADDRESSABLE (TREE_TYPE (new_var)))
|
||||
{
|
||||
tree nv2 = create_tmp_var_raw (TREE_TYPE (new_var));
|
||||
gimple_add_tmp_var (nv2);
|
||||
ctx->cb.decl_map->put (new_vard, nv2);
|
||||
x = lang_hooks.decls.omp_clause_dtor (c, nv2);
|
||||
if (x)
|
||||
{
|
||||
tseq = NULL;
|
||||
dtor = x;
|
||||
gimplify_stmt (&dtor, &tseq);
|
||||
gimple_seq_add_seq (dlist, tseq);
|
||||
}
|
||||
}
|
||||
DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
|
||||
goto do_dtor;
|
||||
}
|
||||
|
@ -5487,14 +5563,15 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
|
|||
new_vard = TREE_OPERAND (new_var, 0);
|
||||
gcc_assert (DECL_P (new_vard));
|
||||
}
|
||||
tree rvar = NULL_TREE, *rvarp = NULL;
|
||||
tree rvar = NULL_TREE, *rvarp = NULL, rvar2 = NULL_TREE;
|
||||
if (is_simd
|
||||
&& OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
|
||||
&& OMP_CLAUSE_REDUCTION_INSCAN (c))
|
||||
rvarp = &rvar;
|
||||
if (is_simd
|
||||
&& lower_rec_simd_input_clauses (new_var, ctx, &sctx,
|
||||
ivar, lvar, rvarp))
|
||||
ivar, lvar, rvarp,
|
||||
&rvar2))
|
||||
{
|
||||
if (new_vard != new_var)
|
||||
{
|
||||
|
@ -8573,18 +8650,40 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx)
|
|||
gimple_seq before = NULL;
|
||||
omp_context *octx = ctx->outer;
|
||||
gcc_assert (octx);
|
||||
if (!octx->scan_inclusive && !has_clauses)
|
||||
{
|
||||
gimple_stmt_iterator gsi2 = *gsi_p;
|
||||
gsi_next (&gsi2);
|
||||
gimple *stmt2 = gsi_stmt (gsi2);
|
||||
/* For exclusive scan, swap GIMPLE_OMP_SCAN without clauses
|
||||
with following GIMPLE_OMP_SCAN with clauses, so that input_phase,
|
||||
the one with exclusive clause(s), comes first. */
|
||||
if (stmt2
|
||||
&& gimple_code (stmt2) == GIMPLE_OMP_SCAN
|
||||
&& gimple_omp_scan_clauses (as_a <gomp_scan *> (stmt2)) != NULL)
|
||||
{
|
||||
gsi_remove (gsi_p, false);
|
||||
gsi_insert_after (gsi_p, stmt, GSI_SAME_STMT);
|
||||
ctx = maybe_lookup_ctx (stmt2);
|
||||
gcc_assert (ctx);
|
||||
lower_omp_scan (gsi_p, ctx);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
bool input_phase = has_clauses ^ octx->scan_inclusive;
|
||||
if (gimple_code (octx->stmt) == GIMPLE_OMP_FOR
|
||||
&& (gimple_omp_for_kind (octx->stmt) & GF_OMP_FOR_SIMD)
|
||||
&& !gimple_omp_for_combined_into_p (octx->stmt)
|
||||
&& octx->scan_inclusive)
|
||||
&& !gimple_omp_for_combined_into_p (octx->stmt))
|
||||
{
|
||||
if (tree c = omp_find_clause (gimple_omp_for_clauses (octx->stmt),
|
||||
OMP_CLAUSE__SIMDUID_))
|
||||
{
|
||||
tree uid = OMP_CLAUSE__SIMDUID__DECL (c);
|
||||
lane = create_tmp_var (unsigned_type_node);
|
||||
tree t = build_int_cst (integer_type_node, 1 + !input_phase);
|
||||
tree t = build_int_cst (integer_type_node,
|
||||
input_phase ? 1
|
||||
: octx->scan_inclusive ? 2 : 3);
|
||||
gimple *g
|
||||
= gimple_build_call_internal (IFN_GOMP_SIMD_LANE, 2, uid, t);
|
||||
gimple_call_set_lhs (g, lane);
|
||||
|
@ -8601,6 +8700,8 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx)
|
|||
tree val = new_var;
|
||||
tree var2 = NULL_TREE;
|
||||
tree var3 = NULL_TREE;
|
||||
tree var4 = NULL_TREE;
|
||||
tree lane0 = NULL_TREE;
|
||||
tree new_vard = new_var;
|
||||
if (omp_is_reference (var))
|
||||
{
|
||||
|
@ -8623,16 +8724,26 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx)
|
|||
DECL_ATTRIBUTES (v)))
|
||||
{
|
||||
val = unshare_expr (val);
|
||||
lane0 = TREE_OPERAND (val, 1);
|
||||
TREE_OPERAND (val, 1) = lane;
|
||||
var2 = lookup_decl (v, octx);
|
||||
if (!octx->scan_inclusive)
|
||||
var4 = lookup_decl (var2, octx);
|
||||
if (input_phase
|
||||
&& OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
|
||||
var3 = maybe_lookup_decl (var2, octx);
|
||||
var3 = maybe_lookup_decl (var4 ? var4 : var2, octx);
|
||||
if (!input_phase)
|
||||
{
|
||||
var2 = build4 (ARRAY_REF, TREE_TYPE (val),
|
||||
var2, lane, NULL_TREE, NULL_TREE);
|
||||
TREE_THIS_NOTRAP (var2) = 1;
|
||||
if (!octx->scan_inclusive)
|
||||
{
|
||||
var4 = build4 (ARRAY_REF, TREE_TYPE (val),
|
||||
var4, lane, NULL_TREE,
|
||||
NULL_TREE);
|
||||
TREE_THIS_NOTRAP (var4) = 1;
|
||||
}
|
||||
}
|
||||
else
|
||||
var2 = val;
|
||||
|
@ -8643,12 +8754,28 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx)
|
|||
else
|
||||
{
|
||||
var2 = build_outer_var_ref (var, octx);
|
||||
if (input_phase && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
|
||||
if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
|
||||
{
|
||||
var3 = maybe_lookup_decl (new_vard, octx);
|
||||
if (var3 == new_vard)
|
||||
if (var3 == new_vard || var3 == NULL_TREE)
|
||||
var3 = NULL_TREE;
|
||||
else if (!octx->scan_inclusive && !input_phase)
|
||||
{
|
||||
var4 = maybe_lookup_decl (var3, octx);
|
||||
if (var4 == var3 || var4 == NULL_TREE)
|
||||
{
|
||||
if (TREE_ADDRESSABLE (TREE_TYPE (new_var)))
|
||||
{
|
||||
var4 = var3;
|
||||
var3 = NULL_TREE;
|
||||
}
|
||||
else
|
||||
var4 = NULL_TREE;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!octx->scan_inclusive && !input_phase && var4 == NULL_TREE)
|
||||
var4 = create_tmp_var (TREE_TYPE (val));
|
||||
}
|
||||
if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
|
||||
{
|
||||
|
@ -8689,9 +8816,17 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx)
|
|||
}
|
||||
else
|
||||
{
|
||||
tree x;
|
||||
if (!octx->scan_inclusive)
|
||||
{
|
||||
tree v4 = unshare_expr (var4);
|
||||
tree v2 = unshare_expr (var2);
|
||||
x = lang_hooks.decls.omp_clause_assign_op (c, v4, v2);
|
||||
gimplify_and_add (x, &before);
|
||||
}
|
||||
gimple_seq tseq = OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c);
|
||||
tree x = (DECL_HAS_VALUE_EXPR_P (new_vard)
|
||||
? DECL_VALUE_EXPR (new_vard) : NULL_TREE);
|
||||
x = (DECL_HAS_VALUE_EXPR_P (new_vard)
|
||||
? DECL_VALUE_EXPR (new_vard) : NULL_TREE);
|
||||
tree vexpr = val;
|
||||
if (x && omp_is_reference (var))
|
||||
vexpr = build_fold_addr_expr_loc (clause_loc, val);
|
||||
|
@ -8706,8 +8841,18 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx)
|
|||
SET_DECL_VALUE_EXPR (new_vard, x);
|
||||
SET_DECL_VALUE_EXPR (placeholder, NULL_TREE);
|
||||
DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
|
||||
x = lang_hooks.decls.omp_clause_assign_op (c, val, var2);
|
||||
gimplify_and_add (x, &before);
|
||||
if (octx->scan_inclusive)
|
||||
{
|
||||
x = lang_hooks.decls.omp_clause_assign_op (c, val,
|
||||
var2);
|
||||
gimplify_and_add (x, &before);
|
||||
}
|
||||
else if (lane0 == NULL_TREE)
|
||||
{
|
||||
x = lang_hooks.decls.omp_clause_assign_op (c, val,
|
||||
var4);
|
||||
gimplify_and_add (x, &before);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -8728,10 +8873,29 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx)
|
|||
|
||||
tree x = build2 (code, TREE_TYPE (var2),
|
||||
unshare_expr (var2), unshare_expr (val));
|
||||
gimplify_assign (unshare_expr (var2), x, &before);
|
||||
gimplify_assign (val, var2, &before);
|
||||
if (octx->scan_inclusive)
|
||||
{
|
||||
gimplify_assign (unshare_expr (var2), x, &before);
|
||||
gimplify_assign (val, var2, &before);
|
||||
}
|
||||
else
|
||||
{
|
||||
gimplify_assign (unshare_expr (var4),
|
||||
unshare_expr (var2), &before);
|
||||
gimplify_assign (var2, x, &before);
|
||||
if (lane0 == NULL_TREE)
|
||||
gimplify_assign (val, var4, &before);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!octx->scan_inclusive && !input_phase && lane0)
|
||||
{
|
||||
tree vexpr = unshare_expr (var4);
|
||||
TREE_OPERAND (vexpr, 1) = lane0;
|
||||
if (omp_is_reference (var))
|
||||
vexpr = build_fold_addr_expr_loc (clause_loc, vexpr);
|
||||
SET_DECL_VALUE_EXPR (new_vard, vexpr);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (has_clauses)
|
||||
|
|
|
@ -1,5 +1,27 @@
|
|||
2019-06-21 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
* gcc.dg/vect/vect-simd-12.c: New test.
|
||||
* gcc.dg/vect/vect-simd-13.c: New test.
|
||||
* gcc.dg/vect/vect-simd-14.c: New test.
|
||||
* gcc.dg/vect/vect-simd-15.c: New test.
|
||||
* gcc.target/i386/sse2-vect-simd-12.c: New test.
|
||||
* gcc.target/i386/sse2-vect-simd-13.c: New test.
|
||||
* gcc.target/i386/sse2-vect-simd-14.c: New test.
|
||||
* gcc.target/i386/sse2-vect-simd-15.c: New test.
|
||||
* gcc.target/i386/avx2-vect-simd-12.c: New test.
|
||||
* gcc.target/i386/avx2-vect-simd-13.c: New test.
|
||||
* gcc.target/i386/avx2-vect-simd-14.c: New test.
|
||||
* gcc.target/i386/avx2-vect-simd-15.c: New test.
|
||||
* gcc.target/i386/avx512f-vect-simd-12.c: New test.
|
||||
* gcc.target/i386/avx512f-vect-simd-13.c: New test.
|
||||
* gcc.target/i386/avx512f-vect-simd-14.c: New test.
|
||||
* gcc.target/i386/avx512bw-vect-simd-15.c: New test.
|
||||
* g++.dg/vect/simd-6.cc: New test.
|
||||
* g++.dg/vect/simd-7.cc: New test.
|
||||
* g++.dg/vect/simd-8.cc: New test.
|
||||
* g++.dg/vect/simd-9.cc: New test.
|
||||
* c-c++-common/gomp/scan-2.c: Don't expect any diagnostics.
|
||||
|
||||
PR c++/90950
|
||||
* g++.dg/gomp/lastprivate-1.C: New test.
|
||||
|
||||
|
|
|
@ -8,7 +8,7 @@ f1 (int *c, int *d)
|
|||
for (i = 0; i < 64; i++)
|
||||
{
|
||||
d[i] = a;
|
||||
#pragma omp scan exclusive (a) /* { dg-message "sorry, unimplemented: '#pragma omp scan' not supported yet" } */
|
||||
#pragma omp scan exclusive (a)
|
||||
a += c[i];
|
||||
}
|
||||
}
|
||||
|
|
161
gcc/testsuite/g++.dg/vect/simd-6.cc
Normal file
161
gcc/testsuite/g++.dg/vect/simd-6.cc
Normal file
|
@ -0,0 +1,161 @@
|
|||
// { dg-require-effective-target size32plus }
|
||||
// { dg-additional-options "-fopenmp-simd" }
|
||||
// { dg-additional-options "-mavx" { target avx_runtime } }
|
||||
// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { xfail *-*-* } } }
|
||||
|
||||
#include "../../gcc.dg/vect/tree-vect.h"
|
||||
|
||||
template <typename T>
|
||||
struct S {
|
||||
inline S ();
|
||||
inline ~S ();
|
||||
inline S (const S &);
|
||||
inline S & operator= (const S &);
|
||||
T s;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
S<T>::S () : s (0)
|
||||
{
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
S<T>::~S ()
|
||||
{
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
S<T>::S (const S &x)
|
||||
{
|
||||
s = x.s;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
S<T> &
|
||||
S<T>::operator= (const S &x)
|
||||
{
|
||||
s = x.s;
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static inline void
|
||||
ini (S<T> &x)
|
||||
{
|
||||
x.s = 0;
|
||||
}
|
||||
|
||||
S<int> r, a[1024], b[1024];
|
||||
|
||||
#pragma omp declare reduction (+: S<int>: omp_out.s += omp_in.s)
|
||||
#pragma omp declare reduction (plus: S<int>: omp_out.s += omp_in.s) initializer (ini (omp_priv))
|
||||
|
||||
template <typename T>
|
||||
__attribute__((noipa)) void
|
||||
foo (S<T> *a, S<T> *b)
|
||||
{
|
||||
#pragma omp simd reduction (inscan, +:r)
|
||||
for (int i = 0; i < 1024; i++)
|
||||
{
|
||||
b[i] = r;
|
||||
#pragma omp scan exclusive(r)
|
||||
r.s += a[i].s;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__attribute__((noipa)) S<T>
|
||||
bar (void)
|
||||
{
|
||||
S<T> s;
|
||||
#pragma omp simd reduction (inscan, plus:s)
|
||||
for (int i = 0; i < 1024; i++)
|
||||
{
|
||||
b[i] = s;
|
||||
#pragma omp scan exclusive(s)
|
||||
s.s += 2 * a[i].s;
|
||||
}
|
||||
return S<T> (s);
|
||||
}
|
||||
|
||||
__attribute__((noipa)) void
|
||||
baz (S<int> *a, S<int> *b)
|
||||
{
|
||||
#pragma omp simd reduction (inscan, +:r) simdlen(1)
|
||||
for (int i = 0; i < 1024; i++)
|
||||
{
|
||||
b[i] = r;
|
||||
#pragma omp scan exclusive(r)
|
||||
r.s += a[i].s;
|
||||
}
|
||||
}
|
||||
|
||||
__attribute__((noipa)) S<int>
|
||||
qux (void)
|
||||
{
|
||||
S<int> s;
|
||||
#pragma omp simd if (0) reduction (inscan, plus:s)
|
||||
for (int i = 0; i < 1024; i++)
|
||||
{
|
||||
b[i] = s;
|
||||
#pragma omp scan exclusive(s)
|
||||
s.s += 2 * a[i].s;
|
||||
}
|
||||
return S<int> (s);
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
S<int> s;
|
||||
check_vect ();
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
a[i].s = i;
|
||||
b[i].s = -1;
|
||||
asm ("" : "+g" (i));
|
||||
}
|
||||
foo (a, b);
|
||||
if (r.s != 1024 * 1023 / 2)
|
||||
abort ();
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
if (b[i].s != s.s)
|
||||
abort ();
|
||||
else
|
||||
b[i].s = 25;
|
||||
s.s += i;
|
||||
}
|
||||
if (bar<int> ().s != 1024 * 1023)
|
||||
abort ();
|
||||
s.s = 0;
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
if (b[i].s != s.s)
|
||||
abort ();
|
||||
s.s += 2 * i;
|
||||
}
|
||||
r.s = 0;
|
||||
baz (a, b);
|
||||
if (r.s != 1024 * 1023 / 2)
|
||||
abort ();
|
||||
s.s = 0;
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
if (b[i].s != s.s)
|
||||
abort ();
|
||||
else
|
||||
b[i].s = 25;
|
||||
s.s += i;
|
||||
}
|
||||
if (qux ().s != 1024 * 1023)
|
||||
abort ();
|
||||
s.s = 0;
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
if (b[i].s != s.s)
|
||||
abort ();
|
||||
s.s += 2 * i;
|
||||
}
|
||||
return 0;
|
||||
}
|
124
gcc/testsuite/g++.dg/vect/simd-7.cc
Normal file
124
gcc/testsuite/g++.dg/vect/simd-7.cc
Normal file
|
@ -0,0 +1,124 @@
|
|||
// { dg-require-effective-target size32plus }
|
||||
// { dg-additional-options "-fopenmp-simd" }
|
||||
// { dg-additional-options "-mavx" { target avx_runtime } }
|
||||
// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */
|
||||
|
||||
#include "../../gcc.dg/vect/tree-vect.h"
|
||||
|
||||
int r, a[1024], b[1024], q;
|
||||
|
||||
template <typename T, typename U>
|
||||
__attribute__((noipa)) void
|
||||
foo (T a, T b, U r)
|
||||
{
|
||||
#pragma omp simd reduction (inscan, +:r)
|
||||
for (int i = 0; i < 1024; i++)
|
||||
{
|
||||
b[i] = r;
|
||||
#pragma omp scan exclusive(r)
|
||||
r += a[i];
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__attribute__((noipa)) T
|
||||
bar (void)
|
||||
{
|
||||
T &s = q;
|
||||
q = 0;
|
||||
#pragma omp simd reduction (inscan, +:s)
|
||||
for (int i = 0; i < 1024; i++)
|
||||
{
|
||||
b[i] = s;
|
||||
#pragma omp scan exclusive(s)
|
||||
s += 2 * a[i];
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__attribute__((noipa)) void
|
||||
baz (T *a, T *b, T &r)
|
||||
{
|
||||
#pragma omp simd reduction (inscan, +:r) if (simd: 0)
|
||||
for (T i = 0; i < 1024; i++)
|
||||
{
|
||||
b[i] = r;
|
||||
#pragma omp scan exclusive(r)
|
||||
r += a[i];
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__attribute__((noipa)) int
|
||||
qux (void)
|
||||
{
|
||||
T s = q;
|
||||
q = 0;
|
||||
#pragma omp simd reduction (inscan, +:s) simdlen (1)
|
||||
for (int i = 0; i < 1024; i++)
|
||||
{
|
||||
b[i] = s;
|
||||
#pragma omp scan exclusive(s)
|
||||
s += 2 * a[i];
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
int s = 0;
|
||||
check_vect ();
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
a[i] = i;
|
||||
b[i] = -1;
|
||||
asm ("" : "+g" (i));
|
||||
}
|
||||
foo<int *, int &> (a, b, r);
|
||||
if (r != 1024 * 1023 / 2)
|
||||
abort ();
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
if (b[i] != s)
|
||||
abort ();
|
||||
else
|
||||
b[i] = 25;
|
||||
s += i;
|
||||
}
|
||||
if (bar<int> () != 1024 * 1023)
|
||||
abort ();
|
||||
s = 0;
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
if (b[i] != s)
|
||||
abort ();
|
||||
else
|
||||
b[i] = -1;
|
||||
s += 2 * i;
|
||||
}
|
||||
r = 0;
|
||||
baz<int> (a, b, r);
|
||||
if (r != 1024 * 1023 / 2)
|
||||
abort ();
|
||||
s = 0;
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
if (b[i] != s)
|
||||
abort ();
|
||||
else
|
||||
b[i] = -25;
|
||||
s += i;
|
||||
}
|
||||
if (qux<int &> () != 1024 * 1023)
|
||||
abort ();
|
||||
s = 0;
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
if (b[i] != s)
|
||||
abort ();
|
||||
s += 2 * i;
|
||||
}
|
||||
return 0;
|
||||
}
|
122
gcc/testsuite/g++.dg/vect/simd-8.cc
Normal file
122
gcc/testsuite/g++.dg/vect/simd-8.cc
Normal file
|
@ -0,0 +1,122 @@
|
|||
// { dg-require-effective-target size32plus }
|
||||
// { dg-additional-options "-fopenmp-simd" }
|
||||
// { dg-additional-options "-mavx" { target avx_runtime } }
|
||||
// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } }
|
||||
|
||||
#include "../../gcc.dg/vect/tree-vect.h"
|
||||
|
||||
int r, a[1024], b[1024], q;
|
||||
|
||||
#pragma omp declare reduction (foo: int: omp_out += omp_in) initializer (omp_priv = 0)
|
||||
|
||||
__attribute__((noipa)) void
|
||||
foo (int *a, int *b, int &r)
|
||||
{
|
||||
#pragma omp simd reduction (inscan, foo:r)
|
||||
for (int i = 0; i < 1024; i++)
|
||||
{
|
||||
b[i] = r;
|
||||
#pragma omp scan exclusive(r)
|
||||
r += a[i];
|
||||
}
|
||||
}
|
||||
|
||||
__attribute__((noipa)) int
|
||||
bar (void)
|
||||
{
|
||||
int &s = q;
|
||||
q = 0;
|
||||
#pragma omp simd reduction (inscan, foo:s)
|
||||
for (int i = 0; i < 1024; i++)
|
||||
{
|
||||
b[i] = s;
|
||||
#pragma omp scan exclusive(s)
|
||||
s += 2 * a[i];
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
__attribute__((noipa)) void
|
||||
baz (int *a, int *b, int &r)
|
||||
{
|
||||
#pragma omp simd reduction (inscan, foo:r) if (simd: 0)
|
||||
for (int i = 0; i < 1024; i++)
|
||||
{
|
||||
b[i] = r;
|
||||
#pragma omp scan exclusive(r)
|
||||
r += a[i];
|
||||
}
|
||||
}
|
||||
|
||||
__attribute__((noipa)) int
|
||||
qux (void)
|
||||
{
|
||||
int &s = q;
|
||||
q = 0;
|
||||
#pragma omp simd reduction (inscan, foo:s) simdlen (1)
|
||||
for (int i = 0; i < 1024; i++)
|
||||
{
|
||||
b[i] = s;
|
||||
#pragma omp scan exclusive(s)
|
||||
s += 2 * a[i];
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
int s = 0;
|
||||
check_vect ();
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
a[i] = i;
|
||||
b[i] = -1;
|
||||
asm ("" : "+g" (i));
|
||||
}
|
||||
foo (a, b, r);
|
||||
if (r != 1024 * 1023 / 2)
|
||||
abort ();
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
if (b[i] != s)
|
||||
abort ();
|
||||
else
|
||||
b[i] = 25;
|
||||
s += i;
|
||||
}
|
||||
if (bar () != 1024 * 1023)
|
||||
abort ();
|
||||
s = 0;
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
if (b[i] != s)
|
||||
abort ();
|
||||
else
|
||||
b[i] = -1;
|
||||
s += 2 * i;
|
||||
}
|
||||
r = 0;
|
||||
baz (a, b, r);
|
||||
if (r != 1024 * 1023 / 2)
|
||||
abort ();
|
||||
s = 0;
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
if (b[i] != s)
|
||||
abort ();
|
||||
else
|
||||
b[i] = -25;
|
||||
s += i;
|
||||
}
|
||||
if (qux () != 1024 * 1023)
|
||||
abort ();
|
||||
s = 0;
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
if (b[i] != s)
|
||||
abort ();
|
||||
s += 2 * i;
|
||||
}
|
||||
return 0;
|
||||
}
|
153
gcc/testsuite/g++.dg/vect/simd-9.cc
Normal file
153
gcc/testsuite/g++.dg/vect/simd-9.cc
Normal file
|
@ -0,0 +1,153 @@
|
|||
// { dg-require-effective-target size32plus }
|
||||
// { dg-additional-options "-fopenmp-simd" }
|
||||
// { dg-additional-options "-mavx" { target avx_runtime } }
|
||||
// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { xfail *-*-* } } }
|
||||
|
||||
#include "../../gcc.dg/vect/tree-vect.h"
|
||||
|
||||
struct S {
|
||||
inline S ();
|
||||
inline ~S ();
|
||||
inline S (const S &);
|
||||
inline S & operator= (const S &);
|
||||
int s;
|
||||
};
|
||||
|
||||
S::S () : s (0)
|
||||
{
|
||||
}
|
||||
|
||||
S::~S ()
|
||||
{
|
||||
}
|
||||
|
||||
S::S (const S &x)
|
||||
{
|
||||
s = x.s;
|
||||
}
|
||||
|
||||
S &
|
||||
S::operator= (const S &x)
|
||||
{
|
||||
s = x.s;
|
||||
return *this;
|
||||
}
|
||||
|
||||
static inline void
|
||||
ini (S &x)
|
||||
{
|
||||
x.s = 0;
|
||||
}
|
||||
|
||||
S r, a[1024], b[1024];
|
||||
|
||||
#pragma omp declare reduction (+: S: omp_out.s += omp_in.s)
|
||||
#pragma omp declare reduction (plus: S: omp_out.s += omp_in.s) initializer (ini (omp_priv))
|
||||
|
||||
__attribute__((noipa)) void
|
||||
foo (S *a, S *b, S &r)
|
||||
{
|
||||
#pragma omp simd reduction (inscan, +:r)
|
||||
for (int i = 0; i < 1024; i++)
|
||||
{
|
||||
b[i] = r;
|
||||
#pragma omp scan exclusive(r)
|
||||
r.s += a[i].s;
|
||||
}
|
||||
}
|
||||
|
||||
__attribute__((noipa)) S
|
||||
bar (void)
|
||||
{
|
||||
S s;
|
||||
#pragma omp simd reduction (inscan, plus:s)
|
||||
for (int i = 0; i < 1024; i++)
|
||||
{
|
||||
b[i] = s;
|
||||
#pragma omp scan exclusive(s)
|
||||
s.s += 2 * a[i].s;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
__attribute__((noipa)) void
|
||||
baz (S *a, S *b, S &r)
|
||||
{
|
||||
#pragma omp simd reduction (inscan, +:r) simdlen(1)
|
||||
for (int i = 0; i < 1024; i++)
|
||||
{
|
||||
b[i] = r;
|
||||
#pragma omp scan exclusive(r)
|
||||
r.s += a[i].s;
|
||||
}
|
||||
}
|
||||
|
||||
__attribute__((noipa)) S
|
||||
qux (void)
|
||||
{
|
||||
S s;
|
||||
#pragma omp simd if (0) reduction (inscan, plus:s)
|
||||
for (int i = 0; i < 1024; i++)
|
||||
{
|
||||
b[i] = s;
|
||||
#pragma omp scan exclusive(s)
|
||||
s.s += 2 * a[i].s;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
S s;
|
||||
check_vect ();
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
a[i].s = i;
|
||||
b[i].s = -1;
|
||||
asm ("" : "+g" (i));
|
||||
}
|
||||
foo (a, b, r);
|
||||
if (r.s != 1024 * 1023 / 2)
|
||||
abort ();
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
if (b[i].s != s.s)
|
||||
abort ();
|
||||
else
|
||||
b[i].s = 25;
|
||||
s.s += i;
|
||||
}
|
||||
if (bar ().s != 1024 * 1023)
|
||||
abort ();
|
||||
s.s = 0;
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
if (b[i].s != s.s)
|
||||
abort ();
|
||||
s.s += 2 * i;
|
||||
}
|
||||
r.s = 0;
|
||||
baz (a, b, r);
|
||||
if (r.s != 1024 * 1023 / 2)
|
||||
abort ();
|
||||
s.s = 0;
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
if (b[i].s != s.s)
|
||||
abort ();
|
||||
else
|
||||
b[i].s = 25;
|
||||
s.s += i;
|
||||
}
|
||||
if (qux ().s != 1024 * 1023)
|
||||
abort ();
|
||||
s.s = 0;
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
if (b[i].s != s.s)
|
||||
abort ();
|
||||
s.s += 2 * i;
|
||||
}
|
||||
return 0;
|
||||
}
|
122
gcc/testsuite/gcc.dg/vect/vect-simd-12.c
Normal file
122
gcc/testsuite/gcc.dg/vect/vect-simd-12.c
Normal file
|
@ -0,0 +1,122 @@
|
|||
/* { dg-require-effective-target size32plus } */
|
||||
/* { dg-additional-options "-fopenmp-simd" } */
|
||||
/* { dg-additional-options "-mavx" { target avx_runtime } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */
|
||||
|
||||
#ifndef main
|
||||
#include "tree-vect.h"
|
||||
#endif
|
||||
|
||||
int r, a[1024], b[1024];
|
||||
|
||||
__attribute__((noipa)) void
|
||||
foo (int *a, int *b)
|
||||
{
|
||||
#pragma omp simd reduction (inscan, +:r)
|
||||
for (int i = 0; i < 1024; i++)
|
||||
{
|
||||
b[i] = r;
|
||||
#pragma omp scan exclusive(r)
|
||||
r += a[i];
|
||||
}
|
||||
}
|
||||
|
||||
__attribute__((noipa)) int
|
||||
bar (void)
|
||||
{
|
||||
int s = 0;
|
||||
#pragma omp simd reduction (inscan, +:s)
|
||||
for (int i = 0; i < 1024; i++)
|
||||
{
|
||||
b[i] = s;
|
||||
#pragma omp scan exclusive(s)
|
||||
s += 2 * a[i];
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
__attribute__((noipa)) void
|
||||
baz (int *a, int *b)
|
||||
{
|
||||
#pragma omp simd reduction (inscan, +:r) if (simd: 0)
|
||||
for (int i = 0; i < 1024; i++)
|
||||
{
|
||||
b[i] = r;
|
||||
#pragma omp scan exclusive(r)
|
||||
r += a[i];
|
||||
}
|
||||
}
|
||||
|
||||
__attribute__((noipa)) int
|
||||
qux (void)
|
||||
{
|
||||
int s = 0;
|
||||
#pragma omp simd reduction (inscan, +:s) simdlen (1)
|
||||
for (int i = 0; i < 1024; i++)
|
||||
{
|
||||
b[i] = s;
|
||||
#pragma omp scan exclusive(s)
|
||||
s += 2 * a[i];
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
int s = 0;
|
||||
#ifndef main
|
||||
check_vect ();
|
||||
#endif
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
a[i] = i;
|
||||
b[i] = -1;
|
||||
asm ("" : "+g" (i));
|
||||
}
|
||||
foo (a, b);
|
||||
if (r != 1024 * 1023 / 2)
|
||||
abort ();
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
if (b[i] != s)
|
||||
abort ();
|
||||
else
|
||||
b[i] = 25;
|
||||
s += i;
|
||||
}
|
||||
if (bar () != 1024 * 1023)
|
||||
abort ();
|
||||
s = 0;
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
if (b[i] != s)
|
||||
abort ();
|
||||
else
|
||||
b[i] = -1;
|
||||
s += 2 * i;
|
||||
}
|
||||
r = 0;
|
||||
baz (a, b);
|
||||
if (r != 1024 * 1023 / 2)
|
||||
abort ();
|
||||
s = 0;
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
if (b[i] != s)
|
||||
abort ();
|
||||
else
|
||||
b[i] = -25;
|
||||
s += i;
|
||||
}
|
||||
if (qux () != 1024 * 1023)
|
||||
abort ();
|
||||
s = 0;
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
if (b[i] != s)
|
||||
abort ();
|
||||
s += 2 * i;
|
||||
}
|
||||
return 0;
|
||||
}
|
124
gcc/testsuite/gcc.dg/vect/vect-simd-13.c
Normal file
124
gcc/testsuite/gcc.dg/vect/vect-simd-13.c
Normal file
|
@ -0,0 +1,124 @@
|
|||
/* { dg-require-effective-target size32plus } */
|
||||
/* { dg-additional-options "-fopenmp-simd" } */
|
||||
/* { dg-additional-options "-mavx" { target avx_runtime } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */
|
||||
|
||||
#ifndef main
|
||||
#include "tree-vect.h"
|
||||
#endif
|
||||
|
||||
int r, a[1024], b[1024];
|
||||
|
||||
#pragma omp declare reduction (foo: int: omp_out += omp_in) initializer (omp_priv = 0)
|
||||
|
||||
__attribute__((noipa)) void
|
||||
foo (int *a, int *b)
|
||||
{
|
||||
#pragma omp simd reduction (inscan, foo:r)
|
||||
for (int i = 0; i < 1024; i++)
|
||||
{
|
||||
b[i] = r;
|
||||
#pragma omp scan exclusive(r)
|
||||
r += a[i];
|
||||
}
|
||||
}
|
||||
|
||||
__attribute__((noipa)) int
|
||||
bar (void)
|
||||
{
|
||||
int s = 0;
|
||||
#pragma omp simd reduction (inscan, foo:s)
|
||||
for (int i = 0; i < 1024; i++)
|
||||
{
|
||||
b[i] = s;
|
||||
#pragma omp scan exclusive(s)
|
||||
s += 2 * a[i];
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
__attribute__((noipa)) void
|
||||
baz (int *a, int *b)
|
||||
{
|
||||
#pragma omp simd reduction (inscan, foo:r) if (simd: 0)
|
||||
for (int i = 0; i < 1024; i++)
|
||||
{
|
||||
b[i] = r;
|
||||
#pragma omp scan exclusive(r)
|
||||
r += a[i];
|
||||
}
|
||||
}
|
||||
|
||||
__attribute__((noipa)) int
|
||||
qux (void)
|
||||
{
|
||||
int s = 0;
|
||||
#pragma omp simd reduction (inscan, foo:s) simdlen (1)
|
||||
for (int i = 0; i < 1024; i++)
|
||||
{
|
||||
b[i] = s;
|
||||
#pragma omp scan exclusive(s)
|
||||
s += 2 * a[i];
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
int s = 0;
|
||||
#ifndef main
|
||||
check_vect ();
|
||||
#endif
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
a[i] = i;
|
||||
b[i] = -1;
|
||||
asm ("" : "+g" (i));
|
||||
}
|
||||
foo (a, b);
|
||||
if (r != 1024 * 1023 / 2)
|
||||
abort ();
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
if (b[i] != s)
|
||||
abort ();
|
||||
else
|
||||
b[i] = 25;
|
||||
s += i;
|
||||
}
|
||||
if (bar () != 1024 * 1023)
|
||||
abort ();
|
||||
s = 0;
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
if (b[i] != s)
|
||||
abort ();
|
||||
else
|
||||
b[i] = -1;
|
||||
s += 2 * i;
|
||||
}
|
||||
r = 0;
|
||||
baz (a, b);
|
||||
if (r != 1024 * 1023 / 2)
|
||||
abort ();
|
||||
s = 0;
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
if (b[i] != s)
|
||||
abort ();
|
||||
else
|
||||
b[i] = -25;
|
||||
s += i;
|
||||
}
|
||||
if (qux () != 1024 * 1023)
|
||||
abort ();
|
||||
s = 0;
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
if (b[i] != s)
|
||||
abort ();
|
||||
s += 2 * i;
|
||||
}
|
||||
return 0;
|
||||
}
|
94
gcc/testsuite/gcc.dg/vect/vect-simd-14.c
Normal file
94
gcc/testsuite/gcc.dg/vect/vect-simd-14.c
Normal file
|
@ -0,0 +1,94 @@
|
|||
/* { dg-require-effective-target size32plus } */
|
||||
/* { dg-additional-options "-fopenmp-simd" } */
|
||||
/* { dg-additional-options "-mavx" { target avx_runtime } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */
|
||||
|
||||
#ifndef main
|
||||
#include "tree-vect.h"
|
||||
#endif
|
||||
|
||||
float r = 1.0f, a[1024], b[1024];
|
||||
|
||||
__attribute__((noipa)) void
|
||||
foo (float *a, float *b)
|
||||
{
|
||||
#pragma omp simd reduction (inscan, *:r)
|
||||
for (int i = 0; i < 1024; i++)
|
||||
{
|
||||
b[i] = r;
|
||||
#pragma omp scan exclusive(r)
|
||||
r *= a[i];
|
||||
}
|
||||
}
|
||||
|
||||
__attribute__((noipa)) float
|
||||
bar (void)
|
||||
{
|
||||
float s = -__builtin_inff ();
|
||||
#pragma omp simd reduction (inscan, max:s)
|
||||
for (int i = 0; i < 1024; i++)
|
||||
{
|
||||
b[i] = s;
|
||||
#pragma omp scan exclusive(s)
|
||||
s = s > a[i] ? s : a[i];
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
float s = 1.0f;
|
||||
#ifndef main
|
||||
check_vect ();
|
||||
#endif
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
if (i < 80)
|
||||
a[i] = (i & 1) ? 0.25f : 0.5f;
|
||||
else if (i < 200)
|
||||
a[i] = (i % 3) == 0 ? 2.0f : (i % 3) == 1 ? 4.0f : 1.0f;
|
||||
else if (i < 280)
|
||||
a[i] = (i & 1) ? 0.25f : 0.5f;
|
||||
else if (i < 380)
|
||||
a[i] = (i % 3) == 0 ? 2.0f : (i % 3) == 1 ? 4.0f : 1.0f;
|
||||
else
|
||||
switch (i % 6)
|
||||
{
|
||||
case 0: a[i] = 0.25f; break;
|
||||
case 1: a[i] = 2.0f; break;
|
||||
case 2: a[i] = -1.0f; break;
|
||||
case 3: a[i] = -4.0f; break;
|
||||
case 4: a[i] = 0.5f; break;
|
||||
case 5: a[i] = 1.0f; break;
|
||||
default: a[i] = 0.0f; break;
|
||||
}
|
||||
b[i] = -19.0f;
|
||||
asm ("" : "+g" (i));
|
||||
}
|
||||
foo (a, b);
|
||||
if (r * 16384.0f != 0.125f)
|
||||
abort ();
|
||||
float m = -175.25f;
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
if (b[i] != s)
|
||||
abort ();
|
||||
else
|
||||
b[i] = -231.75f;
|
||||
s *= a[i];
|
||||
a[i] = m - ((i % 3) == 1 ? 2.0f : (i % 3) == 2 ? 4.0f : 0.0f);
|
||||
m += 0.75f;
|
||||
}
|
||||
if (bar () != 592.0f)
|
||||
abort ();
|
||||
s = -__builtin_inff ();
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
if (b[i] != s)
|
||||
abort ();
|
||||
if (s < a[i])
|
||||
s = a[i];
|
||||
}
|
||||
return 0;
|
||||
}
|
186
gcc/testsuite/gcc.dg/vect/vect-simd-15.c
Normal file
186
gcc/testsuite/gcc.dg/vect/vect-simd-15.c
Normal file
|
@ -0,0 +1,186 @@
|
|||
/* { dg-require-effective-target size32plus } */
|
||||
/* { dg-additional-options "-fopenmp-simd" } */
|
||||
/* { dg-additional-options "-mavx" { target avx_runtime } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */
|
||||
|
||||
#ifndef main
|
||||
#include "tree-vect.h"
|
||||
#endif
|
||||
|
||||
int r, a[1024], b[1024];
|
||||
unsigned short r2, b2[1024];
|
||||
unsigned char r3, b3[1024];
|
||||
|
||||
__attribute__((noipa)) void
|
||||
foo (int *a, int *b, unsigned short *b2, unsigned char *b3)
|
||||
{
|
||||
#pragma omp simd reduction (inscan, +:r, r2, r3)
|
||||
for (int i = 0; i < 1024; i++)
|
||||
{
|
||||
{
|
||||
b[i] = r;
|
||||
b2[i] = r2;
|
||||
b3[i] = r3;
|
||||
}
|
||||
#pragma omp scan exclusive(r, r2, r3)
|
||||
{ r += a[i]; r2 += a[i]; r3 += a[i]; }
|
||||
}
|
||||
}
|
||||
|
||||
__attribute__((noipa)) int
|
||||
bar (unsigned short *s2p, unsigned char *s3p)
|
||||
{
|
||||
int s = 0;
|
||||
unsigned short s2 = 0;
|
||||
unsigned char s3 = 0;
|
||||
#pragma omp simd reduction (inscan, +:s, s2, s3)
|
||||
for (int i = 0; i < 1024; i++)
|
||||
{
|
||||
{ b[i] = s; b2[i] = s2; b3[i] = s3; }
|
||||
#pragma omp scan exclusive(s, s2, s3)
|
||||
{
|
||||
s += 2 * a[i];
|
||||
s2 += 2 * a[i];
|
||||
s3 += 2 * a[i];
|
||||
}
|
||||
}
|
||||
*s2p = s2;
|
||||
*s3p = s3;
|
||||
return s;
|
||||
}
|
||||
|
||||
__attribute__((noipa)) void
|
||||
baz (int *a, int *b, unsigned short *b2, unsigned char *b3)
|
||||
{
|
||||
#pragma omp simd reduction (inscan, +:r, r2, r3) if (simd: 0)
|
||||
for (int i = 0; i < 1024; i++)
|
||||
{
|
||||
{
|
||||
b[i] = r;
|
||||
b2[i] = r2;
|
||||
b3[i] = r3;
|
||||
}
|
||||
#pragma omp scan exclusive(r, r2, r3)
|
||||
{
|
||||
r += a[i];
|
||||
r2 += a[i];
|
||||
r3 += a[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__attribute__((noipa)) int
|
||||
qux (unsigned short *s2p, unsigned char *s3p)
|
||||
{
|
||||
int s = 0;
|
||||
unsigned short s2 = 0;
|
||||
unsigned char s3 = 0;
|
||||
#pragma omp simd reduction (inscan, +:s, s2, s3) simdlen (1)
|
||||
for (int i = 0; i < 1024; i++)
|
||||
{
|
||||
{ b[i] = s; b2[i] = s2; b3[i] = s3; }
|
||||
#pragma omp scan exclusive(s, s2, s3)
|
||||
{ s += 2 * a[i]; s2 += 2 * a[i]; s3 += 2 * a[i]; }
|
||||
}
|
||||
*s2p = s2;
|
||||
*s3p = s3;
|
||||
return s;
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
int s = 0;
|
||||
unsigned short s2;
|
||||
unsigned char s3;
|
||||
#ifndef main
|
||||
check_vect ();
|
||||
#endif
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
a[i] = i;
|
||||
b[i] = -1;
|
||||
b2[i] = -1;
|
||||
b3[i] = -1;
|
||||
asm ("" : "+g" (i));
|
||||
}
|
||||
foo (a, b, b2, b3);
|
||||
if (r != 1024 * 1023 / 2
|
||||
|| r2 != (unsigned short) r
|
||||
|| r3 != (unsigned char) r)
|
||||
abort ();
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
if (b[i] != s
|
||||
|| b2[i] != (unsigned short) s
|
||||
|| b3[i] != (unsigned char) s)
|
||||
abort ();
|
||||
else
|
||||
{
|
||||
b[i] = 25;
|
||||
b2[i] = 24;
|
||||
b3[i] = 26;
|
||||
}
|
||||
s += i;
|
||||
}
|
||||
if (bar (&s2, &s3) != 1024 * 1023)
|
||||
abort ();
|
||||
if (s2 != (unsigned short) (1024 * 1023)
|
||||
|| s3 != (unsigned char) (1024 * 1023))
|
||||
abort ();
|
||||
s = 0;
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
if (b[i] != s
|
||||
|| b2[i] != (unsigned short) s
|
||||
|| b3[i] != (unsigned char) s)
|
||||
abort ();
|
||||
else
|
||||
{
|
||||
b[i] = -1;
|
||||
b2[i] = -1;
|
||||
b3[i] = -1;
|
||||
}
|
||||
s += 2 * i;
|
||||
}
|
||||
r = 0;
|
||||
r2 = 0;
|
||||
r3 = 0;
|
||||
baz (a, b, b2, b3);
|
||||
if (r != 1024 * 1023 / 2
|
||||
|| r2 != (unsigned short) r
|
||||
|| r3 != (unsigned char) r)
|
||||
abort ();
|
||||
s = 0;
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
if (b[i] != s
|
||||
|| b2[i] != (unsigned short) s
|
||||
|| b3[i] != (unsigned char) s)
|
||||
abort ();
|
||||
else
|
||||
{
|
||||
b[i] = 25;
|
||||
b2[i] = 24;
|
||||
b3[i] = 26;
|
||||
}
|
||||
s += i;
|
||||
}
|
||||
s2 = 0;
|
||||
s3 = 0;
|
||||
if (qux (&s2, &s3) != 1024 * 1023)
|
||||
abort ();
|
||||
if (s2 != (unsigned short) (1024 * 1023)
|
||||
|| s3 != (unsigned char) (1024 * 1023))
|
||||
abort ();
|
||||
s = 0;
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
if (b[i] != s
|
||||
|| b2[i] != (unsigned short) s
|
||||
|| b3[i] != (unsigned char) s)
|
||||
abort ();
|
||||
s += 2 * i;
|
||||
}
|
||||
return 0;
|
||||
}
|
16
gcc/testsuite/gcc.target/i386/avx2-vect-simd-12.c
Normal file
16
gcc/testsuite/gcc.target/i386/avx2-vect-simd-12.c
Normal file
|
@ -0,0 +1,16 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -fopenmp-simd -mavx2 -fdump-tree-vect-details" } */
|
||||
/* { dg-require-effective-target avx2 } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
|
||||
|
||||
#include "avx2-check.h"
|
||||
|
||||
#define main() do_main ()
|
||||
|
||||
#include "../../gcc.dg/vect/vect-simd-12.c"
|
||||
|
||||
static void
|
||||
avx2_test (void)
|
||||
{
|
||||
do_main ();
|
||||
}
|
16
gcc/testsuite/gcc.target/i386/avx2-vect-simd-13.c
Normal file
16
gcc/testsuite/gcc.target/i386/avx2-vect-simd-13.c
Normal file
|
@ -0,0 +1,16 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -fopenmp-simd -mavx2 -fdump-tree-vect-details" } */
|
||||
/* { dg-require-effective-target avx2 } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
|
||||
|
||||
#include "avx2-check.h"
|
||||
|
||||
#define main() do_main ()
|
||||
|
||||
#include "../../gcc.dg/vect/vect-simd-13.c"
|
||||
|
||||
static void
|
||||
avx2_test (void)
|
||||
{
|
||||
do_main ();
|
||||
}
|
16
gcc/testsuite/gcc.target/i386/avx2-vect-simd-14.c
Normal file
16
gcc/testsuite/gcc.target/i386/avx2-vect-simd-14.c
Normal file
|
@ -0,0 +1,16 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -fopenmp-simd -mavx2 -fdump-tree-vect-details" } */
|
||||
/* { dg-require-effective-target avx2 } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
|
||||
|
||||
#include "avx2-check.h"
|
||||
|
||||
#define main() do_main ()
|
||||
|
||||
#include "../../gcc.dg/vect/vect-simd-14.c"
|
||||
|
||||
static void
|
||||
avx2_test (void)
|
||||
{
|
||||
do_main ();
|
||||
}
|
16
gcc/testsuite/gcc.target/i386/avx2-vect-simd-15.c
Normal file
16
gcc/testsuite/gcc.target/i386/avx2-vect-simd-15.c
Normal file
|
@ -0,0 +1,16 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -fopenmp-simd -mavx2 -fdump-tree-vect-details" } */
|
||||
/* { dg-require-effective-target avx2 } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
|
||||
|
||||
#include "avx2-check.h"
|
||||
|
||||
#define main() do_main ()
|
||||
|
||||
#include "../../gcc.dg/vect/vect-simd-15.c"
|
||||
|
||||
static void
|
||||
avx2_test (void)
|
||||
{
|
||||
do_main ();
|
||||
}
|
16
gcc/testsuite/gcc.target/i386/avx512bw-vect-simd-15.c
Normal file
16
gcc/testsuite/gcc.target/i386/avx512bw-vect-simd-15.c
Normal file
|
@ -0,0 +1,16 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -fopenmp-simd -mavx512bw -mprefer-vector-width=512 -fdump-tree-vect-details" } */
|
||||
/* { dg-require-effective-target avx512bw } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
|
||||
|
||||
#include "avx512bw-check.h"
|
||||
|
||||
#define main() do_main ()
|
||||
|
||||
#include "../../gcc.dg/vect/vect-simd-15.c"
|
||||
|
||||
static void
|
||||
avx512bw_test (void)
|
||||
{
|
||||
do_main ();
|
||||
}
|
16
gcc/testsuite/gcc.target/i386/avx512f-vect-simd-12.c
Normal file
16
gcc/testsuite/gcc.target/i386/avx512f-vect-simd-12.c
Normal file
|
@ -0,0 +1,16 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -fopenmp-simd -mavx512f -mprefer-vector-width=512 -fdump-tree-vect-details" } */
|
||||
/* { dg-require-effective-target avx512f } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
|
||||
|
||||
#include "avx512f-check.h"
|
||||
|
||||
#define main() do_main ()
|
||||
|
||||
#include "../../gcc.dg/vect/vect-simd-12.c"
|
||||
|
||||
static void
|
||||
avx512f_test (void)
|
||||
{
|
||||
do_main ();
|
||||
}
|
16
gcc/testsuite/gcc.target/i386/avx512f-vect-simd-13.c
Normal file
16
gcc/testsuite/gcc.target/i386/avx512f-vect-simd-13.c
Normal file
|
@ -0,0 +1,16 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -fopenmp-simd -mavx512f -mprefer-vector-width=512 -fdump-tree-vect-details" } */
|
||||
/* { dg-require-effective-target avx512f } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
|
||||
|
||||
#include "avx512f-check.h"
|
||||
|
||||
#define main() do_main ()
|
||||
|
||||
#include "../../gcc.dg/vect/vect-simd-13.c"
|
||||
|
||||
static void
|
||||
avx512f_test (void)
|
||||
{
|
||||
do_main ();
|
||||
}
|
16
gcc/testsuite/gcc.target/i386/avx512f-vect-simd-14.c
Normal file
16
gcc/testsuite/gcc.target/i386/avx512f-vect-simd-14.c
Normal file
|
@ -0,0 +1,16 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -fopenmp-simd -mavx512f -mprefer-vector-width=512 -fdump-tree-vect-details" } */
|
||||
/* { dg-require-effective-target avx512f } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
|
||||
|
||||
#include "avx512f-check.h"
|
||||
|
||||
#define main() do_main ()
|
||||
|
||||
#include "../../gcc.dg/vect/vect-simd-14.c"
|
||||
|
||||
static void
|
||||
avx512f_test (void)
|
||||
{
|
||||
do_main ();
|
||||
}
|
16
gcc/testsuite/gcc.target/i386/sse2-vect-simd-12.c
Normal file
16
gcc/testsuite/gcc.target/i386/sse2-vect-simd-12.c
Normal file
|
@ -0,0 +1,16 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -fopenmp-simd -msse2 -mno-sse3 -fdump-tree-vect-details" } */
|
||||
/* { dg-require-effective-target sse2 } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
|
||||
|
||||
#include "sse2-check.h"
|
||||
|
||||
#define main() do_main ()
|
||||
|
||||
#include "../../gcc.dg/vect/vect-simd-12.c"
|
||||
|
||||
static void
|
||||
sse2_test (void)
|
||||
{
|
||||
do_main ();
|
||||
}
|
16
gcc/testsuite/gcc.target/i386/sse2-vect-simd-13.c
Normal file
16
gcc/testsuite/gcc.target/i386/sse2-vect-simd-13.c
Normal file
|
@ -0,0 +1,16 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -fopenmp-simd -msse2 -mno-sse3 -fdump-tree-vect-details" } */
|
||||
/* { dg-require-effective-target sse2 } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
|
||||
|
||||
#include "sse2-check.h"
|
||||
|
||||
#define main() do_main ()
|
||||
|
||||
#include "../../gcc.dg/vect/vect-simd-13.c"
|
||||
|
||||
static void
|
||||
sse2_test (void)
|
||||
{
|
||||
do_main ();
|
||||
}
|
15
gcc/testsuite/gcc.target/i386/sse2-vect-simd-14.c
Normal file
15
gcc/testsuite/gcc.target/i386/sse2-vect-simd-14.c
Normal file
|
@ -0,0 +1,15 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -fopenmp-simd -msse2 -mno-sse3 -fdump-tree-vect-details" } */
|
||||
/* { dg-require-effective-target sse2 } */
|
||||
|
||||
#include "sse2-check.h"
|
||||
|
||||
#define main() do_main ()
|
||||
|
||||
#include "../../gcc.dg/vect/vect-simd-14.c"
|
||||
|
||||
static void
|
||||
sse2_test (void)
|
||||
{
|
||||
do_main ();
|
||||
}
|
16
gcc/testsuite/gcc.target/i386/sse2-vect-simd-15.c
Normal file
16
gcc/testsuite/gcc.target/i386/sse2-vect-simd-15.c
Normal file
|
@ -0,0 +1,16 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -fopenmp-simd -msse2 -mno-sse3 -fdump-tree-vect-details" } */
|
||||
/* { dg-require-effective-target sse2 } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
|
||||
|
||||
#include "sse2-check.h"
|
||||
|
||||
#define main() do_main ()
|
||||
|
||||
#include "../../gcc.dg/vect/vect-simd-15.c"
|
||||
|
||||
static void
|
||||
sse2_test (void)
|
||||
{
|
||||
do_main ();
|
||||
}
|
|
@ -4223,7 +4223,8 @@ vect_analyze_data_refs (vec_info *vinfo, poly_uint64 *min_vf)
|
|||
/* See if this was detected as SIMD lane access. */
|
||||
if (dr->aux == (void *)-1
|
||||
|| dr->aux == (void *)-2
|
||||
|| dr->aux == (void *)-3)
|
||||
|| dr->aux == (void *)-3
|
||||
|| dr->aux == (void *)-4)
|
||||
{
|
||||
if (nested_in_vect_loop_p (loop, stmt_info))
|
||||
return opt_result::failure_at (stmt_info->stmt,
|
||||
|
|
|
@ -6512,7 +6512,37 @@ check_scan_store (stmt_vec_info stmt_info, tree vectype,
|
|||
kinds are there in order to allow optimizing the initializer store
|
||||
and combiner sequence, e.g. if it is originally some C++ish user
|
||||
defined reduction, but allow the vectorizer to pattern recognize it
|
||||
and turn into the appropriate vectorized scan. */
|
||||
and turn into the appropriate vectorized scan.
|
||||
|
||||
For exclusive scan, this is slightly different:
|
||||
#pragma omp simd reduction(inscan,+:r)
|
||||
for (...)
|
||||
{
|
||||
use (r);
|
||||
#pragma omp scan exclusive (r)
|
||||
r += something ();
|
||||
}
|
||||
shall have body with:
|
||||
// Initialization for input phase, store the reduction initializer:
|
||||
_20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
|
||||
_21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
|
||||
D.2042[_21] = 0;
|
||||
// Actual input phase:
|
||||
...
|
||||
r.0_5 = D.2042[_20];
|
||||
_6 = _4 + r.0_5;
|
||||
D.2042[_20] = _6;
|
||||
// Initialization for scan phase:
|
||||
_25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
|
||||
_26 = D.2043[_25];
|
||||
D.2044[_25] = _26;
|
||||
_27 = D.2042[_25];
|
||||
_28 = _26 + _27;
|
||||
D.2043[_25] = _28;
|
||||
// Actual scan phase:
|
||||
...
|
||||
r.1_8 = D.2044[_20];
|
||||
... */
|
||||
|
||||
if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
|
||||
{
|
||||
|
@ -6553,26 +6583,52 @@ check_scan_store (stmt_vec_info stmt_info, tree vectype,
|
|||
if (TREE_CODE (rhs) != SSA_NAME)
|
||||
goto fail;
|
||||
|
||||
use_operand_p use_p;
|
||||
imm_use_iterator iter;
|
||||
gimple *other_store_stmt = NULL;
|
||||
FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
|
||||
tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
|
||||
bool inscan_var_store
|
||||
= lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
|
||||
|
||||
if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
|
||||
{
|
||||
gimple *use_stmt = USE_STMT (use_p);
|
||||
if (use_stmt == stmt || is_gimple_debug (use_stmt))
|
||||
continue;
|
||||
if (gimple_bb (use_stmt) != gimple_bb (stmt)
|
||||
|| !gimple_store_p (use_stmt)
|
||||
|| other_store_stmt)
|
||||
goto fail;
|
||||
other_store_stmt = use_stmt;
|
||||
if (!inscan_var_store)
|
||||
{
|
||||
use_operand_p use_p;
|
||||
imm_use_iterator iter;
|
||||
FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
|
||||
{
|
||||
gimple *use_stmt = USE_STMT (use_p);
|
||||
if (use_stmt == stmt || is_gimple_debug (use_stmt))
|
||||
continue;
|
||||
if (gimple_bb (use_stmt) != gimple_bb (stmt)
|
||||
|| !is_gimple_assign (use_stmt)
|
||||
|| gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
|
||||
|| other_store_stmt
|
||||
|| TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
|
||||
goto fail;
|
||||
other_store_stmt = use_stmt;
|
||||
}
|
||||
if (other_store_stmt == NULL)
|
||||
goto fail;
|
||||
rhs = gimple_assign_lhs (other_store_stmt);
|
||||
if (!single_imm_use (rhs, &use_p, &other_store_stmt))
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
if (other_store_stmt == NULL)
|
||||
goto fail;
|
||||
stmt_vec_info other_store_stmt_info
|
||||
= loop_vinfo->lookup_stmt (other_store_stmt);
|
||||
if (other_store_stmt_info == NULL
|
||||
|| STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info) != 3)
|
||||
else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
|
||||
{
|
||||
use_operand_p use_p;
|
||||
imm_use_iterator iter;
|
||||
FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
|
||||
{
|
||||
gimple *use_stmt = USE_STMT (use_p);
|
||||
if (use_stmt == stmt || is_gimple_debug (use_stmt))
|
||||
continue;
|
||||
if (other_store_stmt)
|
||||
goto fail;
|
||||
other_store_stmt = use_stmt;
|
||||
}
|
||||
}
|
||||
else
|
||||
goto fail;
|
||||
|
||||
gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
|
||||
|
@ -6599,8 +6655,7 @@ check_scan_store (stmt_vec_info stmt_info, tree vectype,
|
|||
|
||||
tree rhs1 = gimple_assign_rhs1 (def_stmt);
|
||||
tree rhs2 = gimple_assign_rhs2 (def_stmt);
|
||||
if (TREE_CODE (rhs1) != SSA_NAME
|
||||
|| TREE_CODE (rhs2) != SSA_NAME)
|
||||
if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
|
||||
goto fail;
|
||||
|
||||
gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
|
||||
|
@ -6615,22 +6670,83 @@ check_scan_store (stmt_vec_info stmt_info, tree vectype,
|
|||
stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
|
||||
if (load1_stmt_info == NULL
|
||||
|| load2_stmt_info == NULL
|
||||
|| STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info) != 3
|
||||
|| STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info) != 3)
|
||||
|| (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
|
||||
!= STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
|
||||
|| (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
|
||||
!= STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
|
||||
goto fail;
|
||||
|
||||
if (scan_operand_equal_p (gimple_assign_lhs (stmt),
|
||||
if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
|
||||
{
|
||||
dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
|
||||
if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
|
||||
|| !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
|
||||
goto fail;
|
||||
tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
|
||||
tree lrhs;
|
||||
if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
|
||||
lrhs = rhs1;
|
||||
else
|
||||
lrhs = rhs2;
|
||||
use_operand_p use_p;
|
||||
imm_use_iterator iter;
|
||||
FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
|
||||
{
|
||||
gimple *use_stmt = USE_STMT (use_p);
|
||||
if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
|
||||
continue;
|
||||
if (other_store_stmt)
|
||||
goto fail;
|
||||
other_store_stmt = use_stmt;
|
||||
}
|
||||
}
|
||||
|
||||
if (other_store_stmt == NULL)
|
||||
goto fail;
|
||||
if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
|
||||
|| !gimple_store_p (other_store_stmt))
|
||||
goto fail;
|
||||
|
||||
stmt_vec_info other_store_stmt_info
|
||||
= loop_vinfo->lookup_stmt (other_store_stmt);
|
||||
if (other_store_stmt_info == NULL
|
||||
|| (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
|
||||
!= STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
|
||||
goto fail;
|
||||
|
||||
gimple *stmt1 = stmt;
|
||||
gimple *stmt2 = other_store_stmt;
|
||||
if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
|
||||
std::swap (stmt1, stmt2);
|
||||
if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
|
||||
gimple_assign_rhs1 (load2_stmt)))
|
||||
{
|
||||
std::swap (rhs1, rhs2);
|
||||
std::swap (load1_stmt, load2_stmt);
|
||||
std::swap (load1_stmt_info, load2_stmt_info);
|
||||
}
|
||||
if (!scan_operand_equal_p (gimple_assign_lhs (stmt),
|
||||
gimple_assign_rhs1 (load1_stmt))
|
||||
|| !scan_operand_equal_p (gimple_assign_lhs (other_store_stmt),
|
||||
if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
|
||||
gimple_assign_rhs1 (load1_stmt)))
|
||||
goto fail;
|
||||
|
||||
tree var3 = NULL_TREE;
|
||||
if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
|
||||
&& !scan_operand_equal_p (gimple_assign_lhs (stmt2),
|
||||
gimple_assign_rhs1 (load2_stmt)))
|
||||
goto fail;
|
||||
else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
|
||||
{
|
||||
dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
|
||||
if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
|
||||
|| !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
|
||||
goto fail;
|
||||
var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
|
||||
if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
|
||||
|| lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
|
||||
|| lookup_attribute ("omp simd inscan exclusive",
|
||||
DECL_ATTRIBUTES (var3)))
|
||||
goto fail;
|
||||
}
|
||||
|
||||
dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
|
||||
if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
|
||||
|
@ -6648,6 +6764,14 @@ check_scan_store (stmt_vec_info stmt_info, tree vectype,
|
|||
if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
|
||||
std::swap (var1, var2);
|
||||
|
||||
if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
|
||||
{
|
||||
if (!lookup_attribute ("omp simd inscan exclusive",
|
||||
DECL_ATTRIBUTES (var1)))
|
||||
goto fail;
|
||||
var1 = var3;
|
||||
}
|
||||
|
||||
if (loop_vinfo->scan_map == NULL)
|
||||
goto fail;
|
||||
tree *init = loop_vinfo->scan_map->get (var1);
|
||||
|
@ -6655,6 +6779,7 @@ check_scan_store (stmt_vec_info stmt_info, tree vectype,
|
|||
goto fail;
|
||||
|
||||
/* The IL is as expected, now check if we can actually vectorize it.
|
||||
Inclusive scan:
|
||||
_26 = D.2043[_25];
|
||||
_27 = D.2042[_25];
|
||||
_28 = _26 + _27;
|
||||
|
@ -6664,21 +6789,49 @@ check_scan_store (stmt_vec_info stmt_info, tree vectype,
|
|||
from the D.2042[_21] = 0; store):
|
||||
_30 = MEM <vector(8) int> [(int *)&D.2043];
|
||||
_31 = MEM <vector(8) int> [(int *)&D.2042];
|
||||
_32 = VEC_PERM_EXPR <_31, _40, { 8, 0, 1, 2, 3, 4, 5, 6 }>;
|
||||
_32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
|
||||
_33 = _31 + _32;
|
||||
// _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
|
||||
_34 = VEC_PERM_EXPR <_33, _40, { 8, 9, 0, 1, 2, 3, 4, 5 }>;
|
||||
_34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
|
||||
_35 = _33 + _34;
|
||||
// _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
|
||||
// _31[1]+.._31[4], ... _31[4]+.._31[7] };
|
||||
_36 = VEC_PERM_EXPR <_35, _40, { 8, 9, 10, 11, 0, 1, 2, 3 }>;
|
||||
_36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
|
||||
_37 = _35 + _36;
|
||||
// _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
|
||||
// _31[0]+.._31[4], ... _31[0]+.._31[7] };
|
||||
_38 = _30 + _37;
|
||||
_39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
|
||||
MEM <vector(8) int> [(int *)&D.2043] = _39;
|
||||
MEM <vector(8) int> [(int *)&D.2042] = _38; */
|
||||
MEM <vector(8) int> [(int *)&D.2042] = _38;
|
||||
Exclusive scan:
|
||||
_26 = D.2043[_25];
|
||||
D.2044[_25] = _26;
|
||||
_27 = D.2042[_25];
|
||||
_28 = _26 + _27;
|
||||
D.2043[_25] = _28;
|
||||
should be vectorized as (where _40 is the vectorized rhs
|
||||
from the D.2042[_21] = 0; store):
|
||||
_30 = MEM <vector(8) int> [(int *)&D.2043];
|
||||
_31 = MEM <vector(8) int> [(int *)&D.2042];
|
||||
_32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
|
||||
_33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
|
||||
_34 = _32 + _33;
|
||||
// _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
|
||||
// _31[3]+_31[4], ... _31[5]+.._31[6] };
|
||||
_35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
|
||||
_36 = _34 + _35;
|
||||
// _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
|
||||
// _31[1]+.._31[4], ... _31[3]+.._31[6] };
|
||||
_37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
|
||||
_38 = _36 + _37;
|
||||
// _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
|
||||
// _31[0]+.._31[4], ... _31[0]+.._31[6] };
|
||||
_39 = _30 + _38;
|
||||
_50 = _31 + _39;
|
||||
_51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
|
||||
MEM <vector(8) int> [(int *)&D.2044] = _39;
|
||||
MEM <vector(8) int> [(int *)&D.2042] = _51; */
|
||||
enum machine_mode vec_mode = TYPE_MODE (vectype);
|
||||
optab optab = optab_for_tree_code (code, vectype, optab_default);
|
||||
if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
|
||||
|
@ -6715,6 +6868,24 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|||
tree rhs = gimple_assign_rhs1 (stmt);
|
||||
gcc_assert (TREE_CODE (rhs) == SSA_NAME);
|
||||
|
||||
tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
|
||||
bool inscan_var_store
|
||||
= lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
|
||||
|
||||
if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
|
||||
{
|
||||
use_operand_p use_p;
|
||||
imm_use_iterator iter;
|
||||
FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
|
||||
{
|
||||
gimple *use_stmt = USE_STMT (use_p);
|
||||
if (use_stmt == stmt || is_gimple_debug (use_stmt))
|
||||
continue;
|
||||
rhs = gimple_assign_lhs (use_stmt);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
|
||||
enum tree_code code = gimple_assign_rhs_code (def_stmt);
|
||||
if (code == POINTER_PLUS_EXPR)
|
||||
|
@ -6737,15 +6908,12 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|||
{
|
||||
std::swap (rhs1, rhs2);
|
||||
std::swap (var1, var2);
|
||||
std::swap (load1_dr_info, load2_dr_info);
|
||||
}
|
||||
|
||||
tree *init = loop_vinfo->scan_map->get (var1);
|
||||
gcc_assert (init);
|
||||
|
||||
tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
|
||||
bool inscan_var_store
|
||||
= lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
|
||||
|
||||
unsigned HOST_WIDE_INT nunits;
|
||||
if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
|
||||
gcc_unreachable ();
|
||||
|
@ -6789,29 +6957,50 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|||
tree vec_oprnd1 = NULL_TREE;
|
||||
tree vec_oprnd2 = NULL_TREE;
|
||||
tree vec_oprnd3 = NULL_TREE;
|
||||
tree dataref_ptr = unshare_expr (DR_BASE_ADDRESS (dr_info->dr));
|
||||
tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
|
||||
tree dataref_offset = build_int_cst (ref_type, 0);
|
||||
tree bump = vect_get_data_ptr_increment (dr_info, vectype, VMAT_CONTIGUOUS);
|
||||
tree ldataref_ptr = NULL_TREE;
|
||||
tree orig = NULL_TREE;
|
||||
if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
|
||||
ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
|
||||
for (int j = 0; j < ncopies; j++)
|
||||
{
|
||||
stmt_vec_info new_stmt_info;
|
||||
if (j == 0)
|
||||
{
|
||||
vec_oprnd1 = vect_get_vec_def_for_operand (*init, stmt_info);
|
||||
vec_oprnd2 = vect_get_vec_def_for_operand (rhs1, stmt_info);
|
||||
if (ldataref_ptr == NULL)
|
||||
vec_oprnd2 = vect_get_vec_def_for_operand (rhs1, stmt_info);
|
||||
vec_oprnd3 = vect_get_vec_def_for_operand (rhs2, stmt_info);
|
||||
orig = vec_oprnd3;
|
||||
}
|
||||
else
|
||||
{
|
||||
vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
|
||||
vec_oprnd2 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd2);
|
||||
if (ldataref_ptr == NULL)
|
||||
vec_oprnd2 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd2);
|
||||
vec_oprnd3 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd3);
|
||||
if (!inscan_var_store)
|
||||
dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
|
||||
}
|
||||
|
||||
if (ldataref_ptr)
|
||||
{
|
||||
vec_oprnd2 = make_ssa_name (vectype);
|
||||
tree data_ref = fold_build2 (MEM_REF, vectype,
|
||||
unshare_expr (ldataref_ptr),
|
||||
dataref_offset);
|
||||
vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
|
||||
gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
|
||||
new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
|
||||
if (prev_stmt_info == NULL)
|
||||
STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
|
||||
else
|
||||
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
|
||||
prev_stmt_info = new_stmt_info;
|
||||
}
|
||||
|
||||
tree v = vec_oprnd2;
|
||||
for (int i = 0; i < units_log2; ++i)
|
||||
{
|
||||
|
@ -6848,6 +7037,17 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|||
new_temp = new_temp2;
|
||||
}
|
||||
|
||||
/* For exclusive scan, perform the perms[i] permutation once
|
||||
more. */
|
||||
if (i == 0
|
||||
&& STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
|
||||
&& v == vec_oprnd2)
|
||||
{
|
||||
v = new_temp;
|
||||
--i;
|
||||
continue;
|
||||
}
|
||||
|
||||
tree new_temp2 = make_ssa_name (vectype);
|
||||
g = gimple_build_assign (new_temp2, code, v, new_temp);
|
||||
new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
|
||||
|
@ -6863,16 +7063,30 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|||
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
|
||||
prev_stmt_info = new_stmt_info;
|
||||
|
||||
tree last_perm_arg = new_temp;
|
||||
/* For exclusive scan, new_temp computed above is the exclusive scan
|
||||
prefix sum. Turn it into inclusive prefix sum for the broadcast
|
||||
of the last element into orig. */
|
||||
if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
|
||||
{
|
||||
last_perm_arg = make_ssa_name (vectype);
|
||||
g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
|
||||
new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
|
||||
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
|
||||
prev_stmt_info = new_stmt_info;
|
||||
}
|
||||
|
||||
orig = make_ssa_name (vectype);
|
||||
g = gimple_build_assign (orig, VEC_PERM_EXPR, new_temp, new_temp,
|
||||
perms[units_log2]);
|
||||
g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
|
||||
last_perm_arg, perms[units_log2]);
|
||||
new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
|
||||
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
|
||||
prev_stmt_info = new_stmt_info;
|
||||
|
||||
if (!inscan_var_store)
|
||||
{
|
||||
tree data_ref = fold_build2 (MEM_REF, vectype, dataref_ptr,
|
||||
tree data_ref = fold_build2 (MEM_REF, vectype,
|
||||
unshare_expr (dataref_ptr),
|
||||
dataref_offset);
|
||||
vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
|
||||
g = gimple_build_assign (data_ref, new_temp);
|
||||
|
@ -6888,7 +7102,8 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|||
if (j != 0)
|
||||
dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
|
||||
|
||||
tree data_ref = fold_build2 (MEM_REF, vectype, dataref_ptr,
|
||||
tree data_ref = fold_build2 (MEM_REF, vectype,
|
||||
unshare_expr (dataref_ptr),
|
||||
dataref_offset);
|
||||
vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
|
||||
gimple *g = gimple_build_assign (data_ref, orig);
|
||||
|
@ -7325,7 +7540,7 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|||
}
|
||||
return true;
|
||||
}
|
||||
else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
|
||||
else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
|
||||
return vectorizable_scan_store (stmt_info, gsi, vec_stmt, ncopies);
|
||||
|
||||
if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
|
||||
|
|
|
@ -917,7 +917,7 @@ struct _stmt_vec_info {
|
|||
bool strided_p;
|
||||
|
||||
/* For both loads and stores. */
|
||||
unsigned simd_lane_access_p : 2;
|
||||
unsigned simd_lane_access_p : 3;
|
||||
|
||||
/* Classifies how the load or store is going to be implemented
|
||||
for loop vectorization. */
|
||||
|
|
Loading…
Add table
Reference in a new issue