re PR lto/70289 ([openacc] ICE in input_varpool_node)
gcc/ PR lto/70289 PR ipa/70348 PR tree-optimization/70373 PR middle-end/70533 PR middle-end/70534 PR middle-end/70535 * gimplify.c (gimplify_adjust_omp_clauses): Add or adjust data clauses for acc parallel reductions as necessary. Error on those that are private. * omp-low.c (scan_sharing_clauses): Don't install variables which are used in acc parallel reductions. (lower_rec_input_clauses): Remove dead code. (lower_oacc_reductions): Add support for reference reductions. (lower_reduction_clauses): Remove dead code. (lower_omp_target): Don't remap variables appearing in acc parallel reductions. * tree.h (OMP_CLAUSE_MAP_IN_REDUCTION): New macro. gcc/testsuite/ * c-c++-common/goacc/reduction-5.c: New test. * c-c++-common/goacc/reduction-promotions.c: New test. * gfortran.dg/goacc/reduction-3.f95: New test. * gfortran.dg/goacc/reduction-promotions.f90: New test. libgomp/ * testsuite/libgomp.oacc-c-c++-common/loop-reduction-gang-np-1.c: New test. * testsuite/libgomp.oacc-c-c++-common/loop-reduction-gw-np-1.c: New test. * testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-1.c: New test. * testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-2.c: New test. * testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-3.c: New test. * testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-4.c: New test. * testsuite/libgomp.oacc-c-c++-common/loop-reduction-vector-p-1.c: New test. * testsuite/libgomp.oacc-c-c++-common/loop-reduction-vector-p-2.c: New test. * testsuite/libgomp.oacc-c-c++-common/loop-reduction-worker-p-1.c: New test. * testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-1.c: New test. * testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-2.c: New test. * testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-3.c: New test. * testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-1.c: New test. * testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-2.c: New test. * testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-3.c: New test. * testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-4.c: New test. * testsuite/libgomp.oacc-c-c++-common/par-reduction-1.c: Add test coverage. * testsuite/libgomp.oacc-c-c++-common/par-reduction-2.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/parallel-reduction.c: New test. * testsuite/libgomp.oacc-c-c++-common/pr70289.c: New test. * testsuite/libgomp.oacc-c-c++-common/pr70373.c: New test. * testsuite/libgomp.oacc-c-c++-common/reduction-1.c: Add test coverage. * testsuite/libgomp.oacc-c-c++-common/reduction-2.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/reduction-3.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/reduction-4.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/reduction-5.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/reduction-6.c: New test. * testsuite/libgomp.oacc-c-c++-common/reduction.h: New test. * testsuite/libgomp.oacc-fortran/parallel-reduction.f90: New test. * testsuite/libgomp.oacc-fortran/pr70289.f90: New test. * testsuite/libgomp.oacc-fortran/reduction-1.f90: Add test coverage. * testsuite/libgomp.oacc-fortran/reduction-2.f90: Likewise. * testsuite/libgomp.oacc-fortran/reduction-3.f90: Likewise. * testsuite/libgomp.oacc-fortran/reduction-4.f90: Likewise. * testsuite/libgomp.oacc-fortran/reduction-5.f90: Likewise. * testsuite/libgomp.oacc-fortran/reduction-6.f90: Likewise. * testsuite/libgomp.oacc-fortran/reduction-7.f90: New test. From-SVN: r234840
This commit is contained in:
parent
51a4b0f187
commit
c42cfb5ca3
49 changed files with 2902 additions and 568 deletions
|
@ -1,3 +1,23 @@
|
|||
2016-04-08 Cesar Philippidis <cesar@codesourcery.com>
|
||||
|
||||
PR lto/70289
|
||||
PR ipa/70348
|
||||
PR tree-optimization/70373
|
||||
PR middle-end/70533
|
||||
PR middle-end/70534
|
||||
PR middle-end/70535
|
||||
* gimplify.c (gimplify_adjust_omp_clauses): Add or adjust data
|
||||
clauses for acc parallel reductions as necessary. Error on those
|
||||
that are private.
|
||||
* omp-low.c (scan_sharing_clauses): Don't install variables which
|
||||
are used in acc parallel reductions.
|
||||
(lower_rec_input_clauses): Remove dead code.
|
||||
(lower_oacc_reductions): Add support for reference reductions.
|
||||
(lower_reduction_clauses): Remove dead code.
|
||||
(lower_omp_target): Don't remap variables appearing in acc parallel
|
||||
reductions.
|
||||
* tree.h (OMP_CLAUSE_MAP_IN_REDUCTION): New macro.
|
||||
|
||||
2016-04-08 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR middle-end/70593
|
||||
|
|
|
@ -7987,6 +7987,34 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, gimple_seq body, tree *list_p,
|
|||
break;
|
||||
}
|
||||
decl = OMP_CLAUSE_DECL (c);
|
||||
/* Data clasues associated with acc parallel reductions must be
|
||||
compatible with present_or_copy. Warn and adjust the clause
|
||||
if that is not the case. */
|
||||
if (ctx->region_type == ORT_ACC_PARALLEL)
|
||||
{
|
||||
tree t = DECL_P (decl) ? decl : TREE_OPERAND (decl, 0);
|
||||
n = NULL;
|
||||
|
||||
if (DECL_P (t))
|
||||
n = splay_tree_lookup (ctx->variables, (splay_tree_key) t);
|
||||
|
||||
if (n && (n->value & GOVD_REDUCTION))
|
||||
{
|
||||
enum gomp_map_kind kind = OMP_CLAUSE_MAP_KIND (c);
|
||||
|
||||
OMP_CLAUSE_MAP_IN_REDUCTION (c) = 1;
|
||||
if ((kind & GOMP_MAP_TOFROM) != GOMP_MAP_TOFROM
|
||||
&& kind != GOMP_MAP_FORCE_PRESENT
|
||||
&& kind != GOMP_MAP_POINTER)
|
||||
{
|
||||
warning_at (OMP_CLAUSE_LOCATION (c), 0,
|
||||
"incompatible data clause with reduction "
|
||||
"on %qE; promoting to present_or_copy",
|
||||
DECL_NAME (t));
|
||||
OMP_CLAUSE_SET_MAP_KIND (c, GOMP_MAP_TOFROM);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!DECL_P (decl))
|
||||
{
|
||||
if ((ctx->region_type & ORT_TARGET) != 0
|
||||
|
@ -8118,6 +8146,33 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, gimple_seq body, tree *list_p,
|
|||
|
||||
case OMP_CLAUSE_REDUCTION:
|
||||
decl = OMP_CLAUSE_DECL (c);
|
||||
/* OpenACC reductions need a present_or_copy data clause.
|
||||
Add one if necessary. Error is the reduction is private. */
|
||||
if (ctx->region_type == ORT_ACC_PARALLEL)
|
||||
{
|
||||
n = splay_tree_lookup (ctx->variables, (splay_tree_key) decl);
|
||||
if (n->value & (GOVD_PRIVATE | GOVD_FIRSTPRIVATE))
|
||||
error_at (OMP_CLAUSE_LOCATION (c), "invalid private "
|
||||
"reduction on %qE", DECL_NAME (decl));
|
||||
else if ((n->value & GOVD_MAP) == 0)
|
||||
{
|
||||
tree next = OMP_CLAUSE_CHAIN (c);
|
||||
tree nc = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_MAP);
|
||||
OMP_CLAUSE_SET_MAP_KIND (nc, GOMP_MAP_TOFROM);
|
||||
OMP_CLAUSE_DECL (nc) = decl;
|
||||
OMP_CLAUSE_CHAIN (c) = nc;
|
||||
lang_hooks.decls.omp_finish_clause (nc, pre_p);
|
||||
while (1)
|
||||
{
|
||||
OMP_CLAUSE_MAP_IN_REDUCTION (nc) = 1;
|
||||
if (OMP_CLAUSE_CHAIN (nc) == NULL)
|
||||
break;
|
||||
nc = OMP_CLAUSE_CHAIN (nc);
|
||||
}
|
||||
OMP_CLAUSE_CHAIN (nc) = next;
|
||||
n->value |= GOVD_MAP;
|
||||
}
|
||||
}
|
||||
if (DECL_P (decl)
|
||||
&& omp_shared_to_firstprivate_optimizable_decl_p (decl))
|
||||
omp_mark_stores (gimplify_omp_ctxp->outer_context, decl);
|
||||
|
|
|
@ -2122,7 +2122,8 @@ scan_sharing_clauses (tree clauses, omp_context *ctx,
|
|||
else
|
||||
install_var_field (decl, true, 3, ctx,
|
||||
base_pointers_restrict);
|
||||
if (is_gimple_omp_offloaded (ctx->stmt))
|
||||
if (is_gimple_omp_offloaded (ctx->stmt)
|
||||
&& !OMP_CLAUSE_MAP_IN_REDUCTION (c))
|
||||
install_var_local (decl, ctx);
|
||||
}
|
||||
}
|
||||
|
@ -4839,7 +4840,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
|
|||
gimplify_assign (ptr, x, ilist);
|
||||
}
|
||||
}
|
||||
else if (is_reference (var) && !is_oacc_parallel (ctx))
|
||||
else if (is_reference (var))
|
||||
{
|
||||
/* For references that are being privatized for Fortran,
|
||||
allocate new backing storage for the new pointer
|
||||
|
@ -5575,7 +5576,8 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
|
|||
tree orig = OMP_CLAUSE_DECL (c);
|
||||
tree var = maybe_lookup_decl (orig, ctx);
|
||||
tree ref_to_res = NULL_TREE;
|
||||
tree incoming, outgoing;
|
||||
tree incoming, outgoing, v1, v2, v3;
|
||||
bool is_private = false;
|
||||
|
||||
enum tree_code rcode = OMP_CLAUSE_REDUCTION_CODE (c);
|
||||
if (rcode == MINUS_EXPR)
|
||||
|
@ -5588,7 +5590,6 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
|
|||
|
||||
if (!var)
|
||||
var = orig;
|
||||
gcc_assert (!is_reference (var));
|
||||
|
||||
incoming = outgoing = var;
|
||||
|
||||
|
@ -5624,22 +5625,38 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
|
|||
for (; cls; cls = OMP_CLAUSE_CHAIN (cls))
|
||||
if (OMP_CLAUSE_CODE (cls) == OMP_CLAUSE_REDUCTION
|
||||
&& orig == OMP_CLAUSE_DECL (cls))
|
||||
goto has_outer_reduction;
|
||||
{
|
||||
incoming = outgoing = lookup_decl (orig, probe);
|
||||
goto has_outer_reduction;
|
||||
}
|
||||
else if ((OMP_CLAUSE_CODE (cls) == OMP_CLAUSE_FIRSTPRIVATE
|
||||
|| OMP_CLAUSE_CODE (cls) == OMP_CLAUSE_PRIVATE)
|
||||
&& orig == OMP_CLAUSE_DECL (cls))
|
||||
{
|
||||
is_private = true;
|
||||
goto do_lookup;
|
||||
}
|
||||
}
|
||||
|
||||
do_lookup:
|
||||
/* This is the outermost construct with this reduction,
|
||||
see if there's a mapping for it. */
|
||||
if (gimple_code (outer->stmt) == GIMPLE_OMP_TARGET
|
||||
&& maybe_lookup_field (orig, outer))
|
||||
&& maybe_lookup_field (orig, outer) && !is_private)
|
||||
{
|
||||
ref_to_res = build_receiver_ref (orig, false, outer);
|
||||
if (is_reference (orig))
|
||||
ref_to_res = build_simple_mem_ref (ref_to_res);
|
||||
|
||||
tree type = TREE_TYPE (var);
|
||||
if (POINTER_TYPE_P (type))
|
||||
type = TREE_TYPE (type);
|
||||
|
||||
outgoing = var;
|
||||
incoming = omp_reduction_init_op (loc, rcode, TREE_TYPE (var));
|
||||
incoming = omp_reduction_init_op (loc, rcode, type);
|
||||
}
|
||||
else if (ctx->outer)
|
||||
incoming = outgoing = lookup_decl (orig, ctx->outer);
|
||||
else
|
||||
incoming = outgoing = orig;
|
||||
|
||||
|
@ -5649,6 +5666,37 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
|
|||
if (!ref_to_res)
|
||||
ref_to_res = integer_zero_node;
|
||||
|
||||
if (is_reference (orig))
|
||||
{
|
||||
tree type = TREE_TYPE (var);
|
||||
const char *id = IDENTIFIER_POINTER (DECL_NAME (var));
|
||||
|
||||
if (!inner)
|
||||
{
|
||||
tree x = create_tmp_var (TREE_TYPE (type), id);
|
||||
gimplify_assign (var, build_fold_addr_expr (x), fork_seq);
|
||||
}
|
||||
|
||||
v1 = create_tmp_var (type, id);
|
||||
v2 = create_tmp_var (type, id);
|
||||
v3 = create_tmp_var (type, id);
|
||||
|
||||
gimplify_assign (v1, var, fork_seq);
|
||||
gimplify_assign (v2, var, fork_seq);
|
||||
gimplify_assign (v3, var, fork_seq);
|
||||
|
||||
var = build_simple_mem_ref (var);
|
||||
v1 = build_simple_mem_ref (v1);
|
||||
v2 = build_simple_mem_ref (v2);
|
||||
v3 = build_simple_mem_ref (v3);
|
||||
outgoing = build_simple_mem_ref (outgoing);
|
||||
|
||||
if (TREE_CODE (incoming) != INTEGER_CST)
|
||||
incoming = build_simple_mem_ref (incoming);
|
||||
}
|
||||
else
|
||||
v1 = v2 = v3 = var;
|
||||
|
||||
/* Determine position in reduction buffer, which may be used
|
||||
by target. */
|
||||
enum machine_mode mode = TYPE_MODE (TREE_TYPE (var));
|
||||
|
@ -5678,20 +5726,20 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
|
|||
= build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION,
|
||||
TREE_TYPE (var), 6, init_code,
|
||||
unshare_expr (ref_to_res),
|
||||
var, level, op, off);
|
||||
v1, level, op, off);
|
||||
tree fini_call
|
||||
= build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION,
|
||||
TREE_TYPE (var), 6, fini_code,
|
||||
unshare_expr (ref_to_res),
|
||||
var, level, op, off);
|
||||
v2, level, op, off);
|
||||
tree teardown_call
|
||||
= build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION,
|
||||
TREE_TYPE (var), 6, teardown_code,
|
||||
ref_to_res, var, level, op, off);
|
||||
ref_to_res, v3, level, op, off);
|
||||
|
||||
gimplify_assign (var, setup_call, &before_fork);
|
||||
gimplify_assign (var, init_call, &after_fork);
|
||||
gimplify_assign (var, fini_call, &before_join);
|
||||
gimplify_assign (v1, setup_call, &before_fork);
|
||||
gimplify_assign (v2, init_call, &after_fork);
|
||||
gimplify_assign (v3, fini_call, &before_join);
|
||||
gimplify_assign (outgoing, teardown_call, &after_join);
|
||||
}
|
||||
|
||||
|
@ -5933,9 +5981,6 @@ lower_reduction_clauses (tree clauses, gimple_seq *stmt_seqp, omp_context *ctx)
|
|||
}
|
||||
}
|
||||
|
||||
if (is_gimple_omp_oacc (ctx->stmt))
|
||||
return;
|
||||
|
||||
stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START),
|
||||
0);
|
||||
gimple_seq_add_stmt (stmt_seqp, stmt);
|
||||
|
@ -15829,7 +15874,10 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
|
|||
if (!maybe_lookup_field (var, ctx))
|
||||
continue;
|
||||
|
||||
if (offloaded)
|
||||
/* Don't remap oacc parallel reduction variables, because the
|
||||
intermediate result must be local to each gang. */
|
||||
if (offloaded && !(OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
|
||||
&& OMP_CLAUSE_MAP_IN_REDUCTION (c)))
|
||||
{
|
||||
x = build_receiver_ref (var, true, ctx);
|
||||
tree new_var = lookup_decl (var, ctx);
|
||||
|
|
|
@ -1,3 +1,16 @@
|
|||
2016-04-08 Cesar Philippidis <cesar@codesourcery.com>
|
||||
|
||||
PR lto/70289
|
||||
PR ipa/70348
|
||||
PR tree-optimization/70373
|
||||
PR middle-end/70533
|
||||
PR middle-end/70534
|
||||
PR middle-end/70535
|
||||
* c-c++-common/goacc/reduction-5.c: New test.
|
||||
* c-c++-common/goacc/reduction-promotions.c: New test.
|
||||
* gfortran.dg/goacc/reduction-3.f95: New test.
|
||||
* gfortran.dg/goacc/reduction-promotions.f90: New test.
|
||||
|
||||
2016-04-08 Patrick Palka <ppalka@gcc.gnu.org>
|
||||
|
||||
PR c++/70590
|
||||
|
|
16
gcc/testsuite/c-c++-common/goacc/reduction-5.c
Normal file
16
gcc/testsuite/c-c++-common/goacc/reduction-5.c
Normal file
|
@ -0,0 +1,16 @@
|
|||
/* Integer reductions. */
|
||||
|
||||
#define n 1000
|
||||
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
int v1;
|
||||
|
||||
#pragma acc parallel reduction(+:v1) private(v1) /* { dg-error "appears more than once in data clauses" } */
|
||||
;
|
||||
#pragma acc parallel reduction(+:v1) firstprivate(v1) /* { dg-error "appears more than once in data clauses" } */
|
||||
;
|
||||
|
||||
return 0;
|
||||
}
|
32
gcc/testsuite/c-c++-common/goacc/reduction-promotions.c
Normal file
32
gcc/testsuite/c-c++-common/goacc/reduction-promotions.c
Normal file
|
@ -0,0 +1,32 @@
|
|||
/* Integer reductions. */
|
||||
|
||||
#define n 1000
|
||||
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
int v1, v2;
|
||||
|
||||
#pragma acc parallel reduction(+:v1,v2)
|
||||
;
|
||||
#pragma acc parallel reduction(+:v1,v2) copy(v1,v2)
|
||||
;
|
||||
#pragma acc parallel reduction(+:v1,v2) pcopy(v1,v2)
|
||||
;
|
||||
#pragma acc parallel reduction(+:v1,v2) present(v1,v2)
|
||||
;
|
||||
#pragma acc parallel reduction(+:v1,v2) copyin(v1,v2) /* { dg-warning "incompatible data clause with reduction" } */
|
||||
;
|
||||
#pragma acc parallel reduction(+:v1,v2) pcopyin(v1,v2) /* { dg-warning "incompatible data clause with reduction" } */
|
||||
;
|
||||
#pragma acc parallel reduction(+:v1,v2) copyout(v1,v2) /* { dg-warning "incompatible data clause with reduction" } */
|
||||
;
|
||||
#pragma acc parallel reduction(+:v1,v2) pcopyout(v1,v2) /* { dg-warning "incompatible data clause with reduction" } */
|
||||
;
|
||||
#pragma acc parallel reduction(+:v1,v2) create(v1,v2) /* { dg-warning "incompatible data clause with reduction" } */
|
||||
;
|
||||
#pragma acc parallel reduction(+:v1,v2) pcreate(v1,v2) /* { dg-warning "incompatible data clause with reduction" } */
|
||||
;
|
||||
|
||||
return 0;
|
||||
}
|
10
gcc/testsuite/gfortran.dg/goacc/reduction-3.f95
Normal file
10
gcc/testsuite/gfortran.dg/goacc/reduction-3.f95
Normal file
|
@ -0,0 +1,10 @@
|
|||
! { dg-do compile }
|
||||
|
||||
subroutine foo (ia1)
|
||||
integer :: i1, i2
|
||||
|
||||
!$acc parallel reduction (+:i1) private(i1) ! { dg-error "invalid private reduction on .i1." }
|
||||
!$acc end parallel
|
||||
!$acc parallel reduction (+:i2) firstprivate(i2) ! { dg-error "invalid private reduction on .i2." }
|
||||
!$acc end parallel
|
||||
end subroutine foo
|
46
gcc/testsuite/gfortran.dg/goacc/reduction-promotions.f90
Normal file
46
gcc/testsuite/gfortran.dg/goacc/reduction-promotions.f90
Normal file
|
@ -0,0 +1,46 @@
|
|||
! Ensure that each parallel reduction variable as a copy or pcopy
|
||||
! data clause.
|
||||
|
||||
! { dg-additional-options "-fdump-tree-gimple" }
|
||||
|
||||
program test
|
||||
implicit none
|
||||
integer :: v1, v2
|
||||
|
||||
!$acc parallel reduction(+:v1,v2)
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel reduction(+:v1,v2) copy(v1,v2)
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel reduction(+:v1,v2) pcopy(v1,v2)
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel reduction(+:v1,v2) present(v1,v2)
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel reduction(+:v1,v2) copyin(v1,v2) ! { dg-warning "incompatible data clause" }
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel reduction(+:v1,v2) pcopyin(v1,v2) ! { dg-warning "incompatible data clause" }
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel reduction(+:v1,v2) copyout(v1,v2) ! { dg-warning "incompatible data clause" }
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel reduction(+:v1,v2) pcopyout(v1,v2) ! { dg-warning "incompatible data clause" }
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel reduction(+:v1,v2) create(v1,v2) ! { dg-warning "incompatible data clause" }
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel reduction(+:v1,v2) pcreate(v1,v2) ! { dg-warning "incompatible data clause" }
|
||||
!$acc end parallel
|
||||
end program test
|
||||
|
||||
! { dg-final { scan-tree-dump-times "map.tofrom:v1" 8 "gimple" } }
|
||||
! { dg-final { scan-tree-dump-times "map.tofrom:v2" 8 "gimple" } }
|
||||
! { dg-final { scan-tree-dump-times "map.force_tofrom:v1" 1 "gimple" } }
|
||||
! { dg-final { scan-tree-dump-times "map.force_tofrom:v2" 1 "gimple" } }
|
||||
! { dg-final { scan-tree-dump-times "map.force_present:v1" 1 "gimple" } }
|
||||
! { dg-final { scan-tree-dump-times "map.force_present:v2" 1 "gimple" } }
|
|
@ -1536,6 +1536,9 @@ extern void protected_set_expr_location (tree, location_t);
|
|||
treatment if OMP_CLAUSE_SIZE is zero. */
|
||||
#define OMP_CLAUSE_MAP_MAYBE_ZERO_LENGTH_ARRAY_SECTION(NODE) \
|
||||
TREE_PROTECTED (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_MAP))
|
||||
/* Nonzero if this map clause is for an ACC parallel reduction variable. */
|
||||
#define OMP_CLAUSE_MAP_IN_REDUCTION(NODE) \
|
||||
TREE_PRIVATE (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_MAP))
|
||||
|
||||
#define OMP_CLAUSE_PROC_BIND_KIND(NODE) \
|
||||
(OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_PROC_BIND)->omp_clause.subcode.proc_bind_kind)
|
||||
|
|
|
@ -1,3 +1,65 @@
|
|||
2016-04-08 Cesar Philippidis <cesar@codesourcery.com>
|
||||
|
||||
PR lto/70289
|
||||
PR ipa/70348
|
||||
PR tree-optimization/70373
|
||||
PR middle-end/70533
|
||||
PR middle-end/70534
|
||||
PR middle-end/70535
|
||||
* testsuite/libgomp.oacc-c-c++-common/loop-reduction-gang-np-1.c: New
|
||||
test.
|
||||
* testsuite/libgomp.oacc-c-c++-common/loop-reduction-gw-np-1.c: New
|
||||
test.
|
||||
* testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-1.c: New
|
||||
test.
|
||||
* testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-2.c: New
|
||||
test.
|
||||
* testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-3.c: New
|
||||
test.
|
||||
* testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-4.c: New
|
||||
test.
|
||||
* testsuite/libgomp.oacc-c-c++-common/loop-reduction-vector-p-1.c: New
|
||||
test.
|
||||
* testsuite/libgomp.oacc-c-c++-common/loop-reduction-vector-p-2.c: New
|
||||
test.
|
||||
* testsuite/libgomp.oacc-c-c++-common/loop-reduction-worker-p-1.c: New
|
||||
test.
|
||||
* testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-1.c: New test.
|
||||
* testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-2.c: New test.
|
||||
* testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-3.c: New test.
|
||||
* testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-1.c: New
|
||||
test.
|
||||
* testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-2.c: New
|
||||
test.
|
||||
* testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-3.c: New
|
||||
test.
|
||||
* testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-4.c: New
|
||||
test.
|
||||
* testsuite/libgomp.oacc-c-c++-common/par-reduction-1.c: Add test
|
||||
coverage.
|
||||
* testsuite/libgomp.oacc-c-c++-common/par-reduction-2.c: Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/parallel-reduction.c: New test.
|
||||
* testsuite/libgomp.oacc-c-c++-common/pr70289.c: New test.
|
||||
* testsuite/libgomp.oacc-c-c++-common/pr70373.c: New test.
|
||||
* testsuite/libgomp.oacc-c-c++-common/reduction-1.c: Add test
|
||||
coverage.
|
||||
* testsuite/libgomp.oacc-c-c++-common/reduction-2.c: Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/reduction-3.c: Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/reduction-4.c: Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/reduction-5.c: Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/reduction-6.c: New test.
|
||||
* testsuite/libgomp.oacc-c-c++-common/reduction.h: New test.
|
||||
* testsuite/libgomp.oacc-fortran/parallel-reduction.f90: New test.
|
||||
* testsuite/libgomp.oacc-fortran/pr70289.f90: New test.
|
||||
* testsuite/libgomp.oacc-fortran/reduction-1.f90: Add test coverage.
|
||||
* testsuite/libgomp.oacc-fortran/reduction-2.f90: Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/reduction-3.f90: Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/reduction-4.f90: Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/reduction-5.f90: Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/reduction-6.f90: Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/reduction-7.f90: New test.
|
||||
|
||||
2016-03-30 Thomas Schwinge <thomas@codesourcery.com>
|
||||
James Norris <jnorris@codesourcery.com>
|
||||
Nathan Sidwell <nathan@codesourcery.com>
|
||||
|
|
|
@ -0,0 +1,45 @@
|
|||
/* { dg-additional-options "-w" } */
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
/* Test of reduction on loop directive (gangs, non-private reduction
|
||||
variable). */
|
||||
|
||||
int
|
||||
main (int argc, char *argv[])
|
||||
{
|
||||
int i, arr[1024], res = 0, hres = 0;
|
||||
|
||||
for (i = 0; i < 1024; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
|
||||
copy(res)
|
||||
{
|
||||
#pragma acc loop gang reduction(+:res)
|
||||
for (i = 0; i < 1024; i++)
|
||||
res += arr[i];
|
||||
}
|
||||
|
||||
for (i = 0; i < 1024; i++)
|
||||
hres += arr[i];
|
||||
|
||||
assert (res == hres);
|
||||
|
||||
res = hres = 1;
|
||||
|
||||
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
|
||||
copy(res)
|
||||
{
|
||||
#pragma acc loop gang reduction(*:res)
|
||||
for (i = 0; i < 12; i++)
|
||||
res *= arr[i];
|
||||
}
|
||||
|
||||
for (i = 0; i < 12; i++)
|
||||
hres *= arr[i];
|
||||
|
||||
assert (res == hres);
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,30 @@
|
|||
/* { dg-additional-options "-w" } */
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
/* Test of reduction on loop directive (gangs and vectors, non-private
|
||||
reduction variable). */
|
||||
|
||||
int
|
||||
main (int argc, char *argv[])
|
||||
{
|
||||
int i, arr[1024], res = 0, hres = 0;
|
||||
|
||||
for (i = 0; i < 1024; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
|
||||
copy(res)
|
||||
{
|
||||
#pragma acc loop gang vector reduction(+:res)
|
||||
for (i = 0; i < 1024; i++)
|
||||
res += arr[i];
|
||||
}
|
||||
|
||||
for (i = 0; i < 1024; i++)
|
||||
hres += arr[i];
|
||||
|
||||
assert (res == hres);
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,30 @@
|
|||
/* { dg-additional-options "-w" } */
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
/* Test of reduction on loop directive (gangs and workers, non-private
|
||||
reduction variable). */
|
||||
|
||||
int
|
||||
main (int argc, char *argv[])
|
||||
{
|
||||
int i, arr[1024], res = 0, hres = 0;
|
||||
|
||||
for (i = 0; i < 1024; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
|
||||
copy(res)
|
||||
{
|
||||
#pragma acc loop gang worker reduction(+:res)
|
||||
for (i = 0; i < 1024; i++)
|
||||
res += arr[i];
|
||||
}
|
||||
|
||||
for (i = 0; i < 1024; i++)
|
||||
hres += arr[i];
|
||||
|
||||
assert (res == hres);
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,28 @@
|
|||
#include <assert.h>
|
||||
|
||||
/* Test of reduction on loop directive (gangs, workers and vectors, non-private
|
||||
reduction variable). */
|
||||
|
||||
int
|
||||
main (int argc, char *argv[])
|
||||
{
|
||||
int i, arr[1024], res = 0, hres = 0;
|
||||
|
||||
for (i = 0; i < 1024; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
|
||||
copy(res)
|
||||
{
|
||||
#pragma acc loop gang worker vector reduction(+:res)
|
||||
for (i = 0; i < 1024; i++)
|
||||
res += arr[i];
|
||||
}
|
||||
|
||||
for (i = 0; i < 1024; i++)
|
||||
hres += arr[i];
|
||||
|
||||
assert (res == hres);
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
#include <assert.h>
|
||||
|
||||
/* Test of reduction on loop directive (gangs, workers and vectors, non-private
|
||||
reduction variable: separate gang and worker/vector loops). */
|
||||
|
||||
int
|
||||
main (int argc, char *argv[])
|
||||
{
|
||||
int i, j, arr[32768], res = 0, hres = 0;
|
||||
|
||||
for (i = 0; i < 32768; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
|
||||
copy(res)
|
||||
{
|
||||
#pragma acc loop gang reduction(+:res)
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
#pragma acc loop worker vector reduction(+:res)
|
||||
for (i = 0; i < 1024; i++)
|
||||
res += arr[j * 1024 + i];
|
||||
}
|
||||
/* "res" is non-private, and is not available until after the parallel
|
||||
region. */
|
||||
}
|
||||
|
||||
for (i = 0; i < 32768; i++)
|
||||
hres += arr[i];
|
||||
|
||||
assert (res == hres);
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,33 @@
|
|||
#include <assert.h>
|
||||
|
||||
/* Test of reduction on loop directive (gangs, workers and vectors, non-private
|
||||
reduction variable: separate gang and worker/vector loops). */
|
||||
|
||||
int
|
||||
main (int argc, char *argv[])
|
||||
{
|
||||
int i, j;
|
||||
double arr[32768], res = 0, hres = 0;
|
||||
|
||||
for (i = 0; i < 32768; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
|
||||
copyin(arr) copy(res)
|
||||
{
|
||||
#pragma acc loop gang reduction(+:res)
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
#pragma acc loop worker vector reduction(+:res)
|
||||
for (i = 0; i < 1024; i++)
|
||||
res += arr[j * 1024 + i];
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32768; i++)
|
||||
hres += arr[i];
|
||||
|
||||
assert (res == hres);
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,55 @@
|
|||
#include <assert.h>
|
||||
|
||||
/* Test of reduction on loop directive (gangs, workers and vectors, multiple
|
||||
non-private reduction variables, float type). */
|
||||
|
||||
int
|
||||
main (int argc, char *argv[])
|
||||
{
|
||||
int i, j;
|
||||
float arr[32768];
|
||||
float res = 0, mres = 0, hres = 0, hmres = 0;
|
||||
|
||||
for (i = 0; i < 32768; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
|
||||
copy(res, mres)
|
||||
{
|
||||
#pragma acc loop gang reduction(+:res) reduction(max:mres)
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
#pragma acc loop worker vector reduction(+:res) reduction(max:mres)
|
||||
for (i = 0; i < 1024; i++)
|
||||
{
|
||||
res += arr[j * 1024 + i];
|
||||
if (arr[j * 1024 + i] > mres)
|
||||
mres = arr[j * 1024 + i];
|
||||
}
|
||||
|
||||
#pragma acc loop worker vector reduction(+:res) reduction(max:mres)
|
||||
for (i = 0; i < 1024; i++)
|
||||
{
|
||||
res += arr[j * 1024 + (1023 - i)];
|
||||
if (arr[j * 1024 + (1023 - i)] > mres)
|
||||
mres = arr[j * 1024 + (1023 - i)];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (j = 0; j < 32; j++)
|
||||
for (i = 0; i < 1024; i++)
|
||||
{
|
||||
hres += arr[j * 1024 + i];
|
||||
hres += arr[j * 1024 + (1023 - i)];
|
||||
if (arr[j * 1024 + i] > hmres)
|
||||
hmres = arr[j * 1024 + i];
|
||||
if (arr[j * 1024 + (1023 - i)] > hmres)
|
||||
hmres = arr[j * 1024 + (1023 - i)];
|
||||
}
|
||||
|
||||
assert (res == hres);
|
||||
assert (mres == hmres);
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,43 @@
|
|||
/* { dg-additional-options "-w" } */
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
/* Test of reduction on loop directive (vectors, private reduction
|
||||
variable). */
|
||||
|
||||
int
|
||||
main (int argc, char *argv[])
|
||||
{
|
||||
int i, j, arr[1024], out[32], res = 0, hres = 0;
|
||||
|
||||
for (i = 0; i < 1024; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
|
||||
private(res) copyout(out)
|
||||
{
|
||||
#pragma acc loop gang
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
res = 0;
|
||||
|
||||
#pragma acc loop vector reduction(+:res)
|
||||
for (i = 0; i < 32; i++)
|
||||
res += arr[j * 32 + i];
|
||||
|
||||
out[j] = res;
|
||||
}
|
||||
}
|
||||
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
hres = 0;
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
hres += arr[j * 32 + i];
|
||||
|
||||
assert (out[j] == hres);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
#include <assert.h>
|
||||
|
||||
/* Test of reduction on loop directive (vector reduction in
|
||||
gang-partitioned/worker-partitioned mode, private reduction variable). */
|
||||
|
||||
int
|
||||
main (int argc, char *argv[])
|
||||
{
|
||||
int i, j, k;
|
||||
double ina[1024], inb[1024], out[1024], acc;
|
||||
|
||||
for (j = 0; j < 32; j++)
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
ina[j * 32 + i] = (i == j) ? 2.0 : 0.0;
|
||||
inb[j * 32 + i] = (double) (i + j);
|
||||
}
|
||||
|
||||
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
|
||||
private(acc) copyin(ina, inb) copyout(out)
|
||||
{
|
||||
#pragma acc loop gang worker
|
||||
for (k = 0; k < 32; k++)
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
acc = 0;
|
||||
|
||||
#pragma acc loop vector reduction(+:acc)
|
||||
for (i = 0; i < 32; i++)
|
||||
acc += ina[k * 32 + i] * inb[i * 32 + j];
|
||||
|
||||
out[k * 32 + j] = acc;
|
||||
}
|
||||
}
|
||||
|
||||
for (j = 0; j < 32; j++)
|
||||
for (i = 0; i < 32; i++)
|
||||
assert (out[j * 32 + i] == (i + j) * 2);
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,43 @@
|
|||
/* { dg-additional-options "-w" } */
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
/* Test of reduction on loop directive (workers, private reduction
|
||||
variable). */
|
||||
|
||||
int
|
||||
main (int argc, char *argv[])
|
||||
{
|
||||
int i, j, arr[1024], out[32], res = 0, hres = 0;
|
||||
|
||||
for (i = 0; i < 1024; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
|
||||
private(res) copyout(out)
|
||||
{
|
||||
#pragma acc loop gang
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
res = 0;
|
||||
|
||||
#pragma acc loop worker reduction(+:res)
|
||||
for (i = 0; i < 32; i++)
|
||||
res += arr[j * 32 + i];
|
||||
|
||||
out[j] = res;
|
||||
}
|
||||
}
|
||||
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
hres = 0;
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
hres += arr[j * 32 + i];
|
||||
|
||||
assert (out[j] == hres);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
#include <assert.h>
|
||||
|
||||
/* Test of reduction on loop directive (workers and vectors, private reduction
|
||||
variable). */
|
||||
|
||||
int
|
||||
main (int argc, char *argv[])
|
||||
{
|
||||
int i, j, arr[1024], out[32], res = 0, hres = 0;
|
||||
|
||||
for (i = 0; i < 1024; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
|
||||
private(res) copyout(out)
|
||||
{
|
||||
#pragma acc loop gang
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
res = 0;
|
||||
|
||||
#pragma acc loop worker vector reduction(+:res)
|
||||
for (i = 0; i < 32; i++)
|
||||
res += arr[j * 32 + i];
|
||||
|
||||
out[j] = res;
|
||||
}
|
||||
}
|
||||
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
hres = 0;
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
hres += arr[j * 32 + i];
|
||||
|
||||
assert (out[j] == hres);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
#include <assert.h>
|
||||
|
||||
/* Test of reduction on loop directive (workers and vectors, private reduction
|
||||
variable). */
|
||||
|
||||
int
|
||||
main (int argc, char *argv[])
|
||||
{
|
||||
int i, j, arr[32768], out[32], res = 0, hres = 0;
|
||||
|
||||
for (i = 0; i < 32768; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
|
||||
private(res) copyout(out)
|
||||
{
|
||||
#pragma acc loop gang
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
res = j;
|
||||
|
||||
#pragma acc loop worker reduction(+:res)
|
||||
for (i = 0; i < 1024; i++)
|
||||
res += arr[j * 1024 + i];
|
||||
|
||||
#pragma acc loop vector reduction(+:res)
|
||||
for (i = 1023; i >= 0; i--)
|
||||
res += arr[j * 1024 + i];
|
||||
|
||||
out[j] = res;
|
||||
}
|
||||
}
|
||||
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
hres = j;
|
||||
|
||||
for (i = 0; i < 1024; i++)
|
||||
hres += arr[j * 1024 + i] * 2;
|
||||
|
||||
assert (out[j] == hres);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,38 @@
|
|||
#include <assert.h>
|
||||
|
||||
/* Test of reduction on loop directive (workers and vectors, private reduction
|
||||
variable: gang-redundant mode). */
|
||||
|
||||
int
|
||||
main (int argc, char *argv[])
|
||||
{
|
||||
int i, arr[1024], out[32], res = 0, hres = 0;
|
||||
|
||||
for (i = 0; i < 1024; i++)
|
||||
arr[i] = i ^ 33;
|
||||
|
||||
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
|
||||
private(res) copyin(arr) copyout(out)
|
||||
{
|
||||
/* Private variables aren't initialized by default in openacc. */
|
||||
res = 0;
|
||||
|
||||
/* "res" should be available at the end of the following loop (and should
|
||||
have the same value redundantly in each gang). */
|
||||
#pragma acc loop worker vector reduction(+:res)
|
||||
for (i = 0; i < 1024; i++)
|
||||
res += arr[i];
|
||||
|
||||
#pragma acc loop gang (static: 1)
|
||||
for (i = 0; i < 32; i++)
|
||||
out[i] = res;
|
||||
}
|
||||
|
||||
for (i = 0; i < 1024; i++)
|
||||
hres += arr[i];
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
assert (out[i] == hres);
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,38 @@
|
|||
#include <assert.h>
|
||||
|
||||
/* Test of reduction on both parallel and loop directives (worker and
|
||||
vector-partitioned loops individually in gang-partitioned mode, int
|
||||
type). */
|
||||
|
||||
int
|
||||
main (int argc, char *argv[])
|
||||
{
|
||||
int i, j, arr[32768], res = 0, hres = 0;
|
||||
|
||||
for (i = 0; i < 32768; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
|
||||
reduction(+:res) copy(res)
|
||||
{
|
||||
#pragma acc loop gang
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
#pragma acc loop worker reduction(+:res)
|
||||
for (i = 0; i < 1024; i++)
|
||||
res += arr[j * 1024 + i];
|
||||
|
||||
#pragma acc loop vector reduction(+:res)
|
||||
for (i = 1023; i >= 0; i--)
|
||||
res += arr[j * 1024 + i];
|
||||
}
|
||||
}
|
||||
|
||||
for (j = 0; j < 32; j++)
|
||||
for (i = 0; i < 1024; i++)
|
||||
hres += arr[j * 1024 + i] * 2;
|
||||
|
||||
assert (res == hres);
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,40 @@
|
|||
#include <assert.h>
|
||||
|
||||
/* Test of reduction on both parallel and loop directives (workers and vectors
|
||||
in gang-partitioned mode, int type with XOR). */
|
||||
|
||||
int
|
||||
main (int argc, char *argv[])
|
||||
{
|
||||
int i, j, arr[32768], res = 0, hres = 0;
|
||||
|
||||
for (i = 0; i < 32768; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
|
||||
reduction(^:res)
|
||||
{
|
||||
#pragma acc loop gang
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
#pragma acc loop worker vector reduction(^:res)
|
||||
for (i = 0; i < 1024; i++)
|
||||
res ^= arr[j * 1024 + i];
|
||||
|
||||
#pragma acc loop worker vector reduction(^:res)
|
||||
for (i = 0; i < 1024; i++)
|
||||
res ^= arr[j * 1024 + (1023 - i)];
|
||||
}
|
||||
}
|
||||
|
||||
for (j = 0; j < 32; j++)
|
||||
for (i = 0; i < 1024; i++)
|
||||
{
|
||||
hres ^= arr[j * 1024 + i];
|
||||
hres ^= arr[j * 1024 + (1023 - i)];
|
||||
}
|
||||
|
||||
assert (res == hres);
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
#include <assert.h>
|
||||
|
||||
/* Test of reduction on both parallel and loop directives (workers and vectors
|
||||
together in gang-partitioned mode, float type). */
|
||||
|
||||
int
|
||||
main (int argc, char *argv[])
|
||||
{
|
||||
int i, j;
|
||||
float arr[32768];
|
||||
float res = 0, hres = 0;
|
||||
|
||||
for (i = 0; i < 32768; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
|
||||
reduction(+:res) copy(res)
|
||||
{
|
||||
#pragma acc loop gang
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
#pragma acc loop worker vector reduction(+:res)
|
||||
for (i = 0; i < 1024; i++)
|
||||
res += arr[j * 1024 + i];
|
||||
|
||||
#pragma acc loop worker vector reduction(+:res)
|
||||
for (i = 0; i < 1024; i++)
|
||||
res += arr[j * 1024 + (1023 - i)];
|
||||
}
|
||||
}
|
||||
|
||||
for (j = 0; j < 32; j++)
|
||||
for (i = 0; i < 1024; i++)
|
||||
{
|
||||
hres += arr[j * 1024 + i];
|
||||
hres += arr[j * 1024 + (1023 - i)];
|
||||
}
|
||||
|
||||
assert (res == hres);
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,55 @@
|
|||
#include <assert.h>
|
||||
|
||||
/* Test of reduction on both parallel and loop directives (workers and vectors
|
||||
together in gang-partitioned mode, float type, multiple reductions). */
|
||||
|
||||
int
|
||||
main (int argc, char *argv[])
|
||||
{
|
||||
int i, j;
|
||||
float arr[32768];
|
||||
float res = 0, mres = 0, hres = 0, hmres = 0;
|
||||
|
||||
for (i = 0; i < 32768; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
|
||||
reduction(+:res) reduction(max:mres) copy(res, mres)
|
||||
{
|
||||
#pragma acc loop gang
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
#pragma acc loop worker vector reduction(+:res) reduction(max:mres)
|
||||
for (i = 0; i < 1024; i++)
|
||||
{
|
||||
res += arr[j * 1024 + i];
|
||||
if (arr[j * 1024 + i] > mres)
|
||||
mres = arr[j * 1024 + i];
|
||||
}
|
||||
|
||||
#pragma acc loop worker vector reduction(+:res) reduction(max:mres)
|
||||
for (i = 0; i < 1024; i++)
|
||||
{
|
||||
res += arr[j * 1024 + (1023 - i)];
|
||||
if (arr[j * 1024 + (1023 - i)] > mres)
|
||||
mres = arr[j * 1024 + (1023 - i)];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (j = 0; j < 32; j++)
|
||||
for (i = 0; i < 1024; i++)
|
||||
{
|
||||
hres += arr[j * 1024 + i];
|
||||
hres += arr[j * 1024 + (1023 - i)];
|
||||
if (arr[j * 1024 + i] > hmres)
|
||||
hmres = arr[j * 1024 + i];
|
||||
if (arr[j * 1024 + (1023 - i)] > hmres)
|
||||
hmres = arr[j * 1024 + (1023 - i)];
|
||||
}
|
||||
|
||||
assert (res == hres);
|
||||
assert (mres == hmres);
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -1,40 +1,54 @@
|
|||
/* { dg-additional-options "-w" } */
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
/* Test of reduction on parallel directive. */
|
||||
|
||||
#define ACTUAL_GANGS 256
|
||||
|
||||
int
|
||||
main (int argc, char *argv[])
|
||||
{
|
||||
int res, res2 = 0;
|
||||
int res, res1 = 0, res2 = 0;
|
||||
|
||||
#if defined(ACC_DEVICE_TYPE_host)
|
||||
# define GANGS 1
|
||||
#else
|
||||
# define GANGS 256
|
||||
#endif
|
||||
#pragma acc parallel num_gangs(GANGS) copy(res2)
|
||||
#pragma acc parallel num_gangs(GANGS) num_workers(32) vector_length(32) \
|
||||
reduction(+:res1) copy(res2, res1)
|
||||
{
|
||||
res1 += 5;
|
||||
|
||||
#pragma acc atomic
|
||||
res2 += 5;
|
||||
}
|
||||
res = GANGS * 5;
|
||||
|
||||
assert (res == res1);
|
||||
assert (res == res2);
|
||||
#undef GANGS
|
||||
|
||||
res = res2 = 1;
|
||||
res = res1 = res2 = 1;
|
||||
|
||||
#if defined(ACC_DEVICE_TYPE_host)
|
||||
# define GANGS 1
|
||||
#else
|
||||
# define GANGS 8
|
||||
#endif
|
||||
#pragma acc parallel num_gangs(GANGS) copy(res2)
|
||||
#pragma acc parallel num_gangs(GANGS) num_workers(32) vector_length(32) \
|
||||
reduction(*:res1) copy(res1, res2)
|
||||
{
|
||||
res1 *= 5;
|
||||
|
||||
#pragma acc atomic
|
||||
res2 *= 5;
|
||||
}
|
||||
for (int i = 0; i < GANGS; ++i)
|
||||
res *= 5;
|
||||
|
||||
assert (res == res1);
|
||||
assert (res == res2);
|
||||
#undef GANGS
|
||||
|
||||
|
|
|
@ -1,18 +1,25 @@
|
|||
/* { dg-additional-options "-w" } */
|
||||
|
||||
#include <assert.h>
|
||||
#include <openacc.h>
|
||||
|
||||
/* Test of reduction on parallel directive (with async). */
|
||||
|
||||
int
|
||||
main (int argc, char *argv[])
|
||||
{
|
||||
int res, res2 = 0;
|
||||
int res, res1 = 0, res2 = 0;
|
||||
|
||||
#if defined(ACC_DEVICE_TYPE_host)
|
||||
# define GANGS 1
|
||||
#else
|
||||
# define GANGS 256
|
||||
#endif
|
||||
#pragma acc parallel num_gangs(GANGS) copy(res2) async(1)
|
||||
#pragma acc parallel num_gangs(GANGS) num_workers(32) vector_length(32) \
|
||||
reduction(+:res1) copy(res1, res2) async(1)
|
||||
{
|
||||
res1 += 5;
|
||||
|
||||
#pragma acc atomic
|
||||
res2 += 5;
|
||||
}
|
||||
|
@ -20,18 +27,22 @@ main (int argc, char *argv[])
|
|||
|
||||
acc_wait (1);
|
||||
|
||||
assert (res == res1);
|
||||
assert (res == res2);
|
||||
#undef GANGS
|
||||
|
||||
res = res2 = 1;
|
||||
res = res1 = res2 = 1;
|
||||
|
||||
#if defined(ACC_DEVICE_TYPE_host)
|
||||
# define GANGS 1
|
||||
#else
|
||||
# define GANGS 8
|
||||
#endif
|
||||
#pragma acc parallel num_gangs(GANGS) copy(res2) async(1)
|
||||
#pragma acc parallel num_gangs(GANGS) num_workers(32) vector_length(32) \
|
||||
reduction(*:res1) copy(res1, res2) async(1)
|
||||
{
|
||||
res1 *= 5;
|
||||
|
||||
#pragma acc atomic
|
||||
res2 *= 5;
|
||||
}
|
||||
|
@ -40,6 +51,7 @@ main (int argc, char *argv[])
|
|||
|
||||
acc_wait (1);
|
||||
|
||||
assert (res == res1);
|
||||
assert (res == res2);
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -5,12 +5,20 @@
|
|||
|
||||
int main ()
|
||||
{
|
||||
int dummy[10];
|
||||
|
||||
#pragma acc parallel num_workers (2<<20) /* { dg-error "using num_workers" } */
|
||||
{
|
||||
#pragma acc loop worker
|
||||
for (int i = 0; i < 10; i++)
|
||||
dummy[i] = i;
|
||||
}
|
||||
|
||||
#pragma acc parallel vector_length (2<<20) /* { dg-error "using vector_length" } */
|
||||
{
|
||||
#pragma acc loop vector
|
||||
for (int i = 0; i < 10; i++)
|
||||
dummy[i] = i;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -0,0 +1,72 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-additional-options "-w" } */
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <openacc.h>
|
||||
|
||||
#define N 10
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
int s1 = 0, s2 = 0;
|
||||
int i;
|
||||
int dummy = 0;
|
||||
|
||||
#pragma acc data copy (dummy)
|
||||
{
|
||||
#pragma acc parallel num_gangs (N) reduction (+:s1) copy(s1)
|
||||
{
|
||||
s1++;
|
||||
}
|
||||
}
|
||||
|
||||
if (acc_get_device_type () != acc_device_nvidia)
|
||||
{
|
||||
if (s1 != 1)
|
||||
abort ();
|
||||
}
|
||||
else
|
||||
{
|
||||
if (s1 != N)
|
||||
abort ();
|
||||
}
|
||||
|
||||
s1 = 0;
|
||||
s2 = 0;
|
||||
|
||||
#pragma acc parallel num_gangs (10) reduction (+:s1, s2) copy(s1, s2)
|
||||
{
|
||||
s1++;
|
||||
s2 += N;
|
||||
}
|
||||
|
||||
if (acc_get_device_type () != acc_device_nvidia)
|
||||
{
|
||||
if (s1 != 1)
|
||||
abort ();
|
||||
if (s2 != N)
|
||||
abort ();
|
||||
}
|
||||
else
|
||||
{
|
||||
if (s1 != N)
|
||||
abort ();
|
||||
if (s2 != N*N)
|
||||
abort ();
|
||||
}
|
||||
|
||||
s1 = 0;
|
||||
|
||||
#pragma acc parallel num_gangs (10) reduction (+:s1) copy(s1)
|
||||
{
|
||||
#pragma acc loop gang reduction (+:s1)
|
||||
for (i = 0; i < 10; i++)
|
||||
s1++;
|
||||
}
|
||||
|
||||
if (s1 != N)
|
||||
abort ();
|
||||
|
||||
return 0;
|
||||
}
|
13
libgomp/testsuite/libgomp.oacc-c-c++-common/pr70289.c
Normal file
13
libgomp/testsuite/libgomp.oacc-c-c++-common/pr70289.c
Normal file
|
@ -0,0 +1,13 @@
|
|||
int
|
||||
main ()
|
||||
{
|
||||
int i;
|
||||
static int temp;
|
||||
|
||||
#pragma acc parallel reduction(+:temp)
|
||||
{
|
||||
temp++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
20
libgomp/testsuite/libgomp.oacc-c-c++-common/pr70373.c
Normal file
20
libgomp/testsuite/libgomp.oacc-c-c++-common/pr70373.c
Normal file
|
@ -0,0 +1,20 @@
|
|||
#define N 32
|
||||
|
||||
int
|
||||
foo (unsigned int sum)
|
||||
{
|
||||
#pragma acc parallel reduction (+:sum)
|
||||
{
|
||||
sum;
|
||||
}
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
unsigned int sum = 0;
|
||||
foo (sum);
|
||||
return 0;
|
||||
}
|
|
@ -1,46 +1,59 @@
|
|||
/* { dg-do run } */
|
||||
|
||||
/* Ignore vector_length warnings for offloaded (nvptx) targets. */
|
||||
/* { dg-additional-options "-foffload=-w" } */
|
||||
|
||||
/* Integer reductions. */
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
#include "reduction.h"
|
||||
|
||||
#define vl 32
|
||||
|
||||
#define DO_PRAGMA(x) _Pragma (#x)
|
||||
|
||||
#define check_reduction_op(type, op, init, b) \
|
||||
{ \
|
||||
type res, vres; \
|
||||
res = (init); \
|
||||
DO_PRAGMA (acc parallel vector_length (vl) copy(res)) \
|
||||
DO_PRAGMA (acc loop reduction (op:res))\
|
||||
for (i = 0; i < n; i++) \
|
||||
res = res op (b); \
|
||||
\
|
||||
vres = (init); \
|
||||
for (i = 0; i < n; i++) \
|
||||
vres = vres op (b); \
|
||||
\
|
||||
if (res != vres) \
|
||||
abort (); \
|
||||
}
|
||||
const int ng = 8;
|
||||
const int nw = 4;
|
||||
const int vl = 32;
|
||||
|
||||
static void
|
||||
test_reductions_int (void)
|
||||
test_reductions (void)
|
||||
{
|
||||
const int n = 1000;
|
||||
const int n = 100;
|
||||
int i;
|
||||
int array[n];
|
||||
|
||||
for (i = 0; i < n; i++)
|
||||
array[i] = i;
|
||||
array[i] = i+1;
|
||||
|
||||
check_reduction_op (int, +, 0, array[i]);
|
||||
check_reduction_op (int, *, 1, array[i]);
|
||||
check_reduction_op (int, &, -1, array[i]);
|
||||
check_reduction_op (int, |, 0, array[i]);
|
||||
check_reduction_op (int, ^, 0, array[i]);
|
||||
/* Gang reductions. */
|
||||
check_reduction_op (int, +, 0, array[i], num_gangs (ng), gang);
|
||||
check_reduction_op (int, *, 1, array[i], num_gangs (ng), gang);
|
||||
check_reduction_op (int, &, -1, array[i], num_gangs (ng), gang);
|
||||
check_reduction_op (int, |, 0, array[i], num_gangs (ng), gang);
|
||||
check_reduction_op (int, ^, 0, array[i], num_gangs (ng), gang);
|
||||
|
||||
/* Worker reductions. */
|
||||
check_reduction_op (int, +, 0, array[i], num_workers (nw), worker);
|
||||
check_reduction_op (int, *, 1, array[i], num_workers (nw), worker);
|
||||
check_reduction_op (int, &, -1, array[i], num_workers (nw), worker);
|
||||
check_reduction_op (int, |, 0, array[i], num_workers (nw), worker);
|
||||
check_reduction_op (int, ^, 0, array[i], num_workers (nw), worker);
|
||||
|
||||
/* Vector reductions. */
|
||||
check_reduction_op (int, +, 0, array[i], vector_length (vl), vector);
|
||||
check_reduction_op (int, *, 1, array[i], vector_length (vl), vector);
|
||||
check_reduction_op (int, &, -1, array[i], vector_length (vl), vector);
|
||||
check_reduction_op (int, |, 0, array[i], vector_length (vl), vector);
|
||||
check_reduction_op (int, ^, 0, array[i], vector_length (vl), vector);
|
||||
|
||||
/* Combined reductions. */
|
||||
check_reduction_op (int, +, 0, array[i], num_gangs (ng) num_workers (nw)
|
||||
vector_length (vl), gang worker vector);
|
||||
check_reduction_op (int, *, 1, array[i], num_gangs (ng) num_workers (nw)
|
||||
vector_length (vl), gang worker vector);
|
||||
check_reduction_op (int, &, -1, array[i], num_gangs (ng) num_workers (nw)
|
||||
vector_length (vl), gang worker vector);
|
||||
check_reduction_op (int, |, 0, array[i], num_gangs (ng) num_workers (nw)
|
||||
vector_length (vl), gang worker vector);
|
||||
check_reduction_op (int, ^, 0, array[i], num_gangs (ng) num_workers (nw)
|
||||
vector_length (vl), gang worker vector);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -55,30 +68,32 @@ test_reductions_bool (void)
|
|||
array[i] = i;
|
||||
|
||||
cmp_val = 5;
|
||||
check_reduction_op (bool, &&, true, (cmp_val > array[i]));
|
||||
check_reduction_op (bool, ||, false, (cmp_val > array[i]));
|
||||
|
||||
/* Gang reductions. */
|
||||
check_reduction_op (int, &&, 1, (cmp_val > array[i]), num_gangs (ng),
|
||||
gang);
|
||||
check_reduction_op (int, ||, 0, (cmp_val > array[i]), num_gangs (ng),
|
||||
gang);
|
||||
|
||||
/* Worker reductions. */
|
||||
check_reduction_op (int, &&, 1, (cmp_val > array[i]), num_workers (nw),
|
||||
worker);
|
||||
check_reduction_op (int, ||, 0, (cmp_val > array[i]), num_workers (nw),
|
||||
worker);
|
||||
|
||||
/* Vector reductions. */
|
||||
check_reduction_op (int, &&, 1, (cmp_val > array[i]), vector_length (vl),
|
||||
vector);
|
||||
check_reduction_op (int, ||, 0, (cmp_val > array[i]), vector_length (vl),
|
||||
vector);
|
||||
|
||||
/* Combined reductions. */
|
||||
check_reduction_op (int, &&, 1, (cmp_val > array[i]), num_gangs (ng)
|
||||
num_workers (nw) vector_length (vl), gang worker vector);
|
||||
check_reduction_op (int, ||, 0, (cmp_val > array[i]), num_gangs (ng)
|
||||
num_workers (nw) vector_length (vl), gang worker vector);
|
||||
}
|
||||
|
||||
#define check_reduction_macro(type, op, init, b) \
|
||||
{ \
|
||||
type res, vres; \
|
||||
res = (init); \
|
||||
DO_PRAGMA (acc parallel vector_length (vl) copy(res))\
|
||||
DO_PRAGMA (acc loop reduction (op:res))\
|
||||
for (i = 0; i < n; i++) \
|
||||
res = op (res, (b)); \
|
||||
\
|
||||
vres = (init); \
|
||||
for (i = 0; i < n; i++) \
|
||||
vres = op (vres, (b)); \
|
||||
\
|
||||
if (res != vres) \
|
||||
abort (); \
|
||||
}
|
||||
|
||||
#define max(a, b) (((a) > (b)) ? (a) : (b))
|
||||
#define min(a, b) (((a) < (b)) ? (a) : (b))
|
||||
|
||||
static void
|
||||
test_reductions_minmax (void)
|
||||
{
|
||||
|
@ -89,14 +104,32 @@ test_reductions_minmax (void)
|
|||
for (i = 0; i < n; i++)
|
||||
array[i] = i;
|
||||
|
||||
check_reduction_macro (int, min, n + 1, array[i]);
|
||||
check_reduction_macro (int, max, -1, array[i]);
|
||||
/* Gang reductions. */
|
||||
check_reduction_macro (int, min, n + 1, array[i], num_gangs (ng), gang);
|
||||
check_reduction_macro (int, max, -1, array[i], num_gangs (ng), gang);
|
||||
|
||||
/* Worker reductions. */
|
||||
check_reduction_macro (int, min, n + 1, array[i], num_workers (nw), worker);
|
||||
check_reduction_macro (int, max, -1, array[i], num_workers (nw), worker);
|
||||
|
||||
/* Vector reductions. */
|
||||
check_reduction_macro (int, min, n + 1, array[i], vector_length (vl),
|
||||
vector);
|
||||
check_reduction_macro (int, max, -1, array[i], vector_length (vl), vector);
|
||||
|
||||
/* Combined reductions. */
|
||||
check_reduction_macro (int, min, n + 1, array[i], num_gangs (ng)
|
||||
num_workers (nw) vector_length (vl), gang worker
|
||||
vector);
|
||||
check_reduction_macro (int, max, -1, array[i], num_gangs (ng)
|
||||
num_workers (nw) vector_length (vl), gang worker
|
||||
vector);
|
||||
}
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
test_reductions_int ();
|
||||
test_reductions ();
|
||||
test_reductions_bool ();
|
||||
test_reductions_minmax ();
|
||||
return 0;
|
||||
|
|
|
@ -1,124 +1,83 @@
|
|||
/* { dg-do run } */
|
||||
|
||||
/* Ignore vector_length warnings for offloaded (nvptx) targets. */
|
||||
/* { dg-additional-options "-foffload=-w" } */
|
||||
|
||||
/* float reductions. */
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
#include <math.h>
|
||||
#include "reduction.h"
|
||||
|
||||
#define vl 32
|
||||
const int ng = 8;
|
||||
const int nw = 4;
|
||||
const int vl = 32;
|
||||
|
||||
int
|
||||
main(void)
|
||||
static void
|
||||
test_reductions (void)
|
||||
{
|
||||
const int n = 100;
|
||||
int i;
|
||||
float array[n];
|
||||
|
||||
for (i = 0; i < n; i++)
|
||||
array[i] = i+1;
|
||||
|
||||
/* Gang reductions. */
|
||||
check_reduction_op (float, +, 0, array[i], num_gangs (ng), gang);
|
||||
check_reduction_op (float, *, 1, array[i], num_gangs (ng), gang);
|
||||
|
||||
/* Worker reductions. */
|
||||
check_reduction_op (float, +, 0, array[i], num_workers (nw), worker);
|
||||
check_reduction_op (float, *, 1, array[i], num_workers (nw), worker);
|
||||
|
||||
/* Vector reductions. */
|
||||
check_reduction_op (float, +, 0, array[i], vector_length (vl), vector);
|
||||
check_reduction_op (float, *, 1, array[i], vector_length (vl), vector);
|
||||
|
||||
/* Combined reductions. */
|
||||
check_reduction_op (float, +, 0, array[i], num_gangs (ng) num_workers (nw)
|
||||
vector_length (vl), gang worker vector);
|
||||
check_reduction_op (float, *, 1, array[i], num_gangs (ng) num_workers (nw)
|
||||
vector_length (vl), gang worker vector);
|
||||
}
|
||||
|
||||
static void
|
||||
test_reductions_minmax (void)
|
||||
{
|
||||
const int n = 1000;
|
||||
int i;
|
||||
float vresult, result, array[n];
|
||||
bool lvresult, lresult;
|
||||
float array[n];
|
||||
|
||||
for (i = 0; i < n; i++)
|
||||
array[i] = i;
|
||||
|
||||
result = 0;
|
||||
vresult = 0;
|
||||
/* Gang reductions. */
|
||||
check_reduction_macro (float, min, n + 1, array[i], num_gangs (ng), gang);
|
||||
check_reduction_macro (float, max, -1, array[i], num_gangs (ng), gang);
|
||||
|
||||
/* '+' reductions. */
|
||||
#pragma acc parallel vector_length (vl) copy(result)
|
||||
#pragma acc loop reduction (+:result)
|
||||
for (i = 0; i < n; i++)
|
||||
result += array[i];
|
||||
/* Worker reductions. */
|
||||
check_reduction_macro (float, min, n + 1, array[i], num_workers (nw),
|
||||
worker);
|
||||
check_reduction_macro (float, max, -1, array[i], num_workers (nw), worker);
|
||||
|
||||
/* Verify the reduction. */
|
||||
for (i = 0; i < n; i++)
|
||||
vresult += array[i];
|
||||
/* Vector reductions. */
|
||||
check_reduction_macro (float, min, n + 1, array[i], vector_length (vl),
|
||||
vector);
|
||||
check_reduction_macro (float, max, -1, array[i], vector_length (vl), vector);
|
||||
|
||||
if (result != vresult)
|
||||
abort ();
|
||||
|
||||
result = 0;
|
||||
vresult = 0;
|
||||
|
||||
/* '*' reductions. */
|
||||
#pragma acc parallel vector_length (vl) copy(result)
|
||||
#pragma acc loop reduction (*:result)
|
||||
for (i = 0; i < n; i++)
|
||||
result *= array[i];
|
||||
|
||||
/* Verify the reduction. */
|
||||
for (i = 0; i < n; i++)
|
||||
vresult *= array[i];
|
||||
|
||||
if (fabs(result - vresult) > .0001)
|
||||
abort ();
|
||||
result = 0;
|
||||
vresult = 0;
|
||||
|
||||
/* 'max' reductions. */
|
||||
#pragma acc parallel vector_length (vl) copy(result)
|
||||
#pragma acc loop reduction (max:result)
|
||||
for (i = 0; i < n; i++)
|
||||
result = result > array[i] ? result : array[i];
|
||||
|
||||
/* Verify the reduction. */
|
||||
for (i = 0; i < n; i++)
|
||||
vresult = vresult > array[i] ? vresult : array[i];
|
||||
|
||||
if (result != vresult)
|
||||
abort ();
|
||||
|
||||
result = 0;
|
||||
vresult = 0;
|
||||
|
||||
/* 'min' reductions. */
|
||||
#pragma acc parallel vector_length (vl) copy(result)
|
||||
#pragma acc loop reduction (min:result)
|
||||
for (i = 0; i < n; i++)
|
||||
result = result < array[i] ? result : array[i];
|
||||
|
||||
/* Verify the reduction. */
|
||||
for (i = 0; i < n; i++)
|
||||
vresult = vresult < array[i] ? vresult : array[i];
|
||||
|
||||
if (result != vresult)
|
||||
abort ();
|
||||
|
||||
result = 5;
|
||||
vresult = 5;
|
||||
|
||||
lresult = false;
|
||||
lvresult = false;
|
||||
|
||||
/* '&&' reductions. */
|
||||
#pragma acc parallel vector_length (vl) copy(lresult)
|
||||
#pragma acc loop reduction (&&:lresult)
|
||||
for (i = 0; i < n; i++)
|
||||
lresult = lresult && (result > array[i]);
|
||||
|
||||
/* Verify the reduction. */
|
||||
for (i = 0; i < n; i++)
|
||||
lvresult = lresult && (result > array[i]);
|
||||
|
||||
if (lresult != lvresult)
|
||||
abort ();
|
||||
|
||||
result = 5;
|
||||
vresult = 5;
|
||||
|
||||
lresult = false;
|
||||
lvresult = false;
|
||||
|
||||
/* '||' reductions. */
|
||||
#pragma acc parallel vector_length (vl) copy(lresult)
|
||||
#pragma acc loop reduction (||:lresult)
|
||||
for (i = 0; i < n; i++)
|
||||
lresult = lresult || (result > array[i]);
|
||||
|
||||
/* Verify the reduction. */
|
||||
for (i = 0; i < n; i++)
|
||||
lvresult = lresult || (result > array[i]);
|
||||
|
||||
if (lresult != lvresult)
|
||||
abort ();
|
||||
/* Combined reductions. */
|
||||
check_reduction_macro (float, min, n + 1, array[i], num_gangs (ng)
|
||||
num_workers (nw) vector_length (vl), gang worker
|
||||
vector);
|
||||
check_reduction_macro (float, max, -1, array[i], num_gangs (ng)
|
||||
num_workers (nw)vector_length (vl), gang worker
|
||||
vector);
|
||||
}
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
test_reductions ();
|
||||
test_reductions_minmax ();
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -1,93 +1,84 @@
|
|||
/* { dg-do run } */
|
||||
|
||||
/* Ignore vector_length warnings for offloaded (nvptx) targets. */
|
||||
/* { dg-additional-options "-foffload=-w" } */
|
||||
|
||||
/* double reductions. */
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
#include <math.h>
|
||||
#include "reduction.h"
|
||||
|
||||
#define vl 32
|
||||
const int ng = 8;
|
||||
const int nw = 4;
|
||||
const int vl = 32;
|
||||
|
||||
int
|
||||
main(void)
|
||||
static void
|
||||
test_reductions (void)
|
||||
{
|
||||
const int n = 10;
|
||||
int i;
|
||||
double array[n];
|
||||
|
||||
for (i = 0; i < n; i++)
|
||||
array[i] = i+1;
|
||||
|
||||
/* Gang reductions. */
|
||||
check_reduction_op (double, +, 0, array[i], num_gangs (ng), gang);
|
||||
check_reduction_op (double, *, 1, array[i], num_gangs (ng), gang);
|
||||
|
||||
/* Worker reductions. */
|
||||
check_reduction_op (double, +, 0, array[i], num_workers (nw), worker);
|
||||
check_reduction_op (double, *, 1, array[i], num_workers (nw), worker);
|
||||
|
||||
/* Vector reductions. */
|
||||
check_reduction_op (double, +, 0, array[i], vector_length (vl), vector);
|
||||
check_reduction_op (double, *, 1, array[i], vector_length (vl), vector);
|
||||
|
||||
/* Combined reductions. */
|
||||
check_reduction_op (double, +, 0, array[i], num_gangs (ng) num_workers (nw)
|
||||
vector_length (vl), gang worker vector);
|
||||
check_reduction_op (double, *, 1, array[i], num_gangs (ng) num_workers (nw)
|
||||
vector_length (vl), gang worker vector);
|
||||
}
|
||||
|
||||
static void
|
||||
test_reductions_minmax (void)
|
||||
{
|
||||
const int n = 1000;
|
||||
int i;
|
||||
double vresult, result, array[n];
|
||||
bool lvresult, lresult;
|
||||
double array[n];
|
||||
|
||||
for (i = 0; i < n; i++)
|
||||
array[i] = i;
|
||||
|
||||
result = 0;
|
||||
vresult = 0;
|
||||
/* Gang reductions. */
|
||||
check_reduction_macro (double, min, n + 1, array[i], num_gangs (ng), gang);
|
||||
check_reduction_macro (double, max, -1, array[i], num_gangs (ng), gang);
|
||||
|
||||
/* 'max' reductions. */
|
||||
#pragma acc parallel vector_length (vl) copy(result)
|
||||
#pragma acc loop reduction (max:result)
|
||||
for (i = 0; i < n; i++)
|
||||
result = result > array[i] ? result : array[i];
|
||||
/* Worker reductions. */
|
||||
check_reduction_macro (double, min, n + 1, array[i], num_workers (nw),
|
||||
worker);
|
||||
check_reduction_macro (double, max, -1, array[i], num_workers (nw), worker);
|
||||
|
||||
/* Verify the reduction. */
|
||||
for (i = 0; i < n; i++)
|
||||
vresult = vresult > array[i] ? vresult : array[i];
|
||||
/* Vector reductions. */
|
||||
check_reduction_macro (double, min, n + 1, array[i], vector_length (vl),
|
||||
vector);
|
||||
check_reduction_macro (double, max, -1, array[i], vector_length (vl),
|
||||
vector);
|
||||
|
||||
if (result != vresult)
|
||||
abort ();
|
||||
|
||||
result = 0;
|
||||
vresult = 0;
|
||||
|
||||
/* 'min' reductions. */
|
||||
#pragma acc parallel vector_length (vl) copy(result)
|
||||
#pragma acc loop reduction (min:result)
|
||||
for (i = 0; i < n; i++)
|
||||
result = result < array[i] ? result : array[i];
|
||||
|
||||
/* Verify the reduction. */
|
||||
for (i = 0; i < n; i++)
|
||||
vresult = vresult < array[i] ? vresult : array[i];
|
||||
|
||||
if (result != vresult)
|
||||
abort ();
|
||||
|
||||
result = 5;
|
||||
vresult = 5;
|
||||
|
||||
lresult = false;
|
||||
lvresult = false;
|
||||
|
||||
/* '&&' reductions. */
|
||||
#pragma acc parallel vector_length (vl) copy(lresult)
|
||||
#pragma acc loop reduction (&&:lresult)
|
||||
for (i = 0; i < n; i++)
|
||||
lresult = lresult && (result > array[i]);
|
||||
|
||||
/* Verify the reduction. */
|
||||
for (i = 0; i < n; i++)
|
||||
lvresult = lresult && (result > array[i]);
|
||||
|
||||
if (lresult != lvresult)
|
||||
abort ();
|
||||
|
||||
result = 5;
|
||||
vresult = 5;
|
||||
|
||||
lresult = false;
|
||||
lvresult = false;
|
||||
|
||||
/* '||' reductions. */
|
||||
#pragma acc parallel vector_length (vl) copy(lresult)
|
||||
#pragma acc loop reduction (||:lresult)
|
||||
for (i = 0; i < n; i++)
|
||||
lresult = lresult || (result > array[i]);
|
||||
|
||||
/* Verify the reduction. */
|
||||
for (i = 0; i < n; i++)
|
||||
lvresult = lresult || (result > array[i]);
|
||||
|
||||
if (lresult != lvresult)
|
||||
abort ();
|
||||
/* Combined reductions. */
|
||||
check_reduction_macro (double, min, n + 1, array[i], num_gangs (ng)
|
||||
num_workers (nw) vector_length (vl), gang worker
|
||||
vector);
|
||||
check_reduction_macro (double, max, -1, array[i], num_gangs (ng)
|
||||
num_workers (nw) vector_length (vl), gang worker
|
||||
vector);
|
||||
}
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
test_reductions ();
|
||||
test_reductions_minmax ();
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -1,59 +1,56 @@
|
|||
/* { dg-do run { target { ! { hppa*-*-hpux* } } } } */
|
||||
|
||||
/* Ignore vector_length warnings for offloaded (nvptx) targets. */
|
||||
/* { dg-additional-options "-foffload=-w" } */
|
||||
|
||||
/* complex reductions. */
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
#include <math.h>
|
||||
#include <complex.h>
|
||||
#include "reduction.h"
|
||||
|
||||
#define vl 32
|
||||
const int ng = 8;
|
||||
const int nw = 4;
|
||||
const int vl = 32;
|
||||
|
||||
static void
|
||||
test_reductions (void)
|
||||
{
|
||||
const int n = 10;
|
||||
int i;
|
||||
double _Complex array[n];
|
||||
|
||||
for (i = 0; i < n; i++)
|
||||
array[i] = i+1;
|
||||
|
||||
/* Gang reductions. */
|
||||
check_reduction_op (double, +, 0, creal (array[i]), num_gangs (ng), gang);
|
||||
check_reduction_op (double, *, 1, creal (array[i]), num_gangs (ng), gang);
|
||||
|
||||
/* Worker reductions. */
|
||||
check_reduction_op (double, +, 0, creal (array[i]), num_workers (nw),
|
||||
worker);
|
||||
check_reduction_op (double, *, 1, creal (array[i]), num_workers (nw),
|
||||
worker);
|
||||
|
||||
/* Vector reductions. */
|
||||
check_reduction_op (double, +, 0, creal (array[i]), vector_length (vl),
|
||||
vector);
|
||||
check_reduction_op (double, *, 1, creal (array[i]), vector_length (vl),
|
||||
vector);
|
||||
|
||||
/* Combined reductions. */
|
||||
check_reduction_op (double, +, 0, creal (array[i]), num_gangs (ng)
|
||||
num_workers (nw) vector_length (vl), gang worker
|
||||
vector);
|
||||
check_reduction_op (double, *, 1, creal (array[i]), num_gangs (ng)
|
||||
num_workers (nw) vector_length (vl), gang worker
|
||||
vector);
|
||||
}
|
||||
|
||||
int
|
||||
main(void)
|
||||
main (void)
|
||||
{
|
||||
const int n = 1000;
|
||||
int i;
|
||||
double _Complex vresult, result, array[n];
|
||||
bool lvresult, lresult;
|
||||
|
||||
for (i = 0; i < n; i++)
|
||||
array[i] = i;
|
||||
|
||||
result = 0;
|
||||
vresult = 0;
|
||||
|
||||
/* '&&' reductions. */
|
||||
#pragma acc parallel vector_length (vl) copy(lresult)
|
||||
#pragma acc loop reduction (&&:lresult)
|
||||
for (i = 0; i < n; i++)
|
||||
lresult = lresult && (creal(result) > creal(array[i]));
|
||||
|
||||
/* Verify the reduction. */
|
||||
for (i = 0; i < n; i++)
|
||||
lvresult = lresult && (creal(result) > creal(array[i]));
|
||||
|
||||
if (lresult != lvresult)
|
||||
abort ();
|
||||
|
||||
result = 5;
|
||||
vresult = 5;
|
||||
|
||||
lresult = false;
|
||||
lvresult = false;
|
||||
|
||||
/* '||' reductions. */
|
||||
#pragma acc parallel vector_length (vl) copy(lresult)
|
||||
#pragma acc loop reduction (||:lresult)
|
||||
for (i = 0; i < n; i++)
|
||||
lresult = lresult || (creal(result) > creal(array[i]));
|
||||
|
||||
/* Verify the reduction. */
|
||||
for (i = 0; i < n; i++)
|
||||
lvresult = lresult || (creal(result) > creal(array[i]));
|
||||
|
||||
if (lresult != lvresult)
|
||||
abort ();
|
||||
|
||||
test_reductions ();
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -1,32 +1,57 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-additional-options "-w" } */
|
||||
|
||||
/* Ignore vector_length warnings for offloaded (nvptx) targets. */
|
||||
/* { dg-additional-options "-foffload=-w" } */
|
||||
|
||||
/* Multiple reductions. */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
const int ng = 8;
|
||||
const int nw = 4;
|
||||
const int vl = 32;
|
||||
|
||||
const int n = 100;
|
||||
|
||||
#define DO_PRAGMA(x) _Pragma (#x)
|
||||
|
||||
#define check_reduction(gwv_par, gwv_loop) \
|
||||
{ \
|
||||
s1 = 2; s2 = 5; \
|
||||
DO_PRAGMA (acc parallel gwv_par copy (s1, s2)) \
|
||||
DO_PRAGMA (acc loop gwv_loop reduction (+:s1, s2)) \
|
||||
for (i = 0; i < n; i++) \
|
||||
{ \
|
||||
s1 = s1 + 3; \
|
||||
s2 = s2 + 5; \
|
||||
} \
|
||||
\
|
||||
if (s1 != v1 && s2 != v2) \
|
||||
abort (); \
|
||||
}
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
int s1 = 2, s2 = 5, v1 = 2, v2 = 5;
|
||||
int n = 100;
|
||||
int i;
|
||||
|
||||
#pragma acc parallel vector_length (32) copy(s1,s2)
|
||||
#pragma acc loop reduction (+:s1, s2)
|
||||
for (i = 0; i < n; i++)
|
||||
{
|
||||
s1 = s1 + 3;
|
||||
s2 = s2 + 2;
|
||||
}
|
||||
|
||||
for (i = 0; i < n; i++)
|
||||
{
|
||||
v1 = v1 + 3;
|
||||
v2 = v2 + 2;
|
||||
}
|
||||
|
||||
if (s1 != v1)
|
||||
abort ();
|
||||
|
||||
if (s2 != v2)
|
||||
abort ();
|
||||
|
||||
|
||||
check_reduction (num_gangs (ng), gang);
|
||||
|
||||
/* Nvptx targets require a vector_length or 32 in to allow spinlocks with
|
||||
gangs. */
|
||||
check_reduction (num_workers (nw) vector_length (vl), worker);
|
||||
check_reduction (vector_length (vl), vector);
|
||||
check_reduction (num_gangs (ng) num_workers (nw) vector_length (vl), gang
|
||||
worker vector);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
36
libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-6.c
Normal file
36
libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-6.c
Normal file
|
@ -0,0 +1,36 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-additional-options "-w" } */
|
||||
|
||||
/* Test reductions on explicitly private variables. */
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
int i, j, red[10];
|
||||
int v;
|
||||
|
||||
for (i = 0; i < 10; i++)
|
||||
red[i] = -1;
|
||||
|
||||
#pragma acc parallel copyout(red)
|
||||
{
|
||||
#pragma acc loop gang private(v)
|
||||
for (j = 0; j < 10; j++)
|
||||
{
|
||||
v = j;
|
||||
|
||||
#pragma acc loop vector reduction (+:v)
|
||||
for (i = 0; i < 100; i++)
|
||||
v++;
|
||||
|
||||
red[j] = v;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 10; i++)
|
||||
assert (red[i] == i + 100);
|
||||
|
||||
return 0;
|
||||
}
|
43
libgomp/testsuite/libgomp.oacc-c-c++-common/reduction.h
Normal file
43
libgomp/testsuite/libgomp.oacc-c-c++-common/reduction.h
Normal file
|
@ -0,0 +1,43 @@
|
|||
#ifndef REDUCTION_H
|
||||
#define REDUCTION_H
|
||||
|
||||
#define DO_PRAGMA(x) _Pragma (#x)
|
||||
|
||||
#define check_reduction_op(type, op, init, b, gwv_par, gwv_loop) \
|
||||
{ \
|
||||
type res, vres; \
|
||||
res = (init); \
|
||||
DO_PRAGMA (acc parallel gwv_par copy (res)) \
|
||||
DO_PRAGMA (acc loop gwv_loop reduction (op:res)) \
|
||||
for (i = 0; i < n; i++) \
|
||||
res = res op (b); \
|
||||
\
|
||||
vres = (init); \
|
||||
for (i = 0; i < n; i++) \
|
||||
vres = vres op (b); \
|
||||
\
|
||||
if (res != vres) \
|
||||
abort (); \
|
||||
}
|
||||
|
||||
#define check_reduction_macro(type, op, init, b, gwv_par, gwv_loop) \
|
||||
{ \
|
||||
type res, vres; \
|
||||
res = (init); \
|
||||
DO_PRAGMA (acc parallel gwv_par copy(res)) \
|
||||
DO_PRAGMA (acc loop gwv_loop reduction (op:res)) \
|
||||
for (i = 0; i < n; i++) \
|
||||
res = op (res, (b)); \
|
||||
\
|
||||
vres = (init); \
|
||||
for (i = 0; i < n; i++) \
|
||||
vres = op (vres, (b)); \
|
||||
\
|
||||
if (res != vres) \
|
||||
abort (); \
|
||||
}
|
||||
|
||||
#define max(a, b) (((a) > (b)) ? (a) : (b))
|
||||
#define min(a, b) (((a) < (b)) ? (a) : (b))
|
||||
|
||||
#endif
|
|
@ -0,0 +1,47 @@
|
|||
! { dg-do run }
|
||||
! { dg-additional-options "-w" }
|
||||
|
||||
program reduction
|
||||
implicit none
|
||||
integer, parameter :: n = 10
|
||||
integer s1, s2
|
||||
include "openacc_lib.h"
|
||||
|
||||
s1 = 0
|
||||
s2 = 0
|
||||
|
||||
!$acc parallel reduction(+:s1,s2) num_gangs (n) copy(s1)
|
||||
s1 = s1 + 1
|
||||
s2 = s2 + 1
|
||||
!$acc end parallel
|
||||
|
||||
if (acc_get_device_type () .eq. acc_device_nvidia) then
|
||||
if (s1 .ne. n) call abort
|
||||
if (s2 .ne. n) call abort
|
||||
else
|
||||
if (s1 .ne. 1) call abort
|
||||
if (s2 .ne. 1) call abort
|
||||
end if
|
||||
|
||||
! Test reductions inside subroutines
|
||||
|
||||
s1 = 0
|
||||
s2 = 0
|
||||
call redsub (s1, s2, n)
|
||||
|
||||
if (acc_get_device_type () .eq. acc_device_nvidia) then
|
||||
if (s1 .ne. n) call abort
|
||||
else
|
||||
if (s2 .ne. 1) call abort
|
||||
end if
|
||||
end program reduction
|
||||
|
||||
subroutine redsub(s1, s2, n)
|
||||
implicit none
|
||||
integer :: s1, s2, n
|
||||
|
||||
!$acc parallel reduction(+:s1,s2) num_gangs (10) copy(s1)
|
||||
s1 = s1 + 1
|
||||
s2 = s2 + 1
|
||||
!$acc end parallel
|
||||
end subroutine redsub
|
20
libgomp/testsuite/libgomp.oacc-fortran/pr70289.f90
Normal file
20
libgomp/testsuite/libgomp.oacc-fortran/pr70289.f90
Normal file
|
@ -0,0 +1,20 @@
|
|||
program foo
|
||||
implicit none
|
||||
integer :: i
|
||||
integer :: temp = 0
|
||||
integer :: temp2 = 0
|
||||
|
||||
!$acc parallel
|
||||
!$acc loop gang private(temp)
|
||||
do i=1, 10000
|
||||
temp = 0
|
||||
enddo
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel reduction(+:temp2)
|
||||
!$acc loop gang reduction(+:temp2)
|
||||
do i=1, 10000
|
||||
temp2 = 0
|
||||
enddo
|
||||
!$acc end parallel
|
||||
end program foo
|
|
@ -1,28 +1,55 @@
|
|||
! { dg-do run }
|
||||
! { dg-additional-options "-w" }
|
||||
|
||||
! Integer reductions
|
||||
|
||||
program reduction_1
|
||||
implicit none
|
||||
|
||||
integer, parameter :: n = 10, vl = 32
|
||||
integer :: i, vresult, result
|
||||
logical :: lresult, lvresult
|
||||
integer, parameter :: n = 10, ng = 8, nw = 4, vl = 32
|
||||
integer :: i, vresult, rg, rw, rv, rc
|
||||
logical :: lrg, lrw, lrv, lrc, lvresult
|
||||
integer, dimension (n) :: array
|
||||
|
||||
do i = 1, n
|
||||
array(i) = i
|
||||
end do
|
||||
|
||||
result = 0
|
||||
!
|
||||
! '+' reductions
|
||||
!
|
||||
|
||||
rg = 0
|
||||
rw = 0
|
||||
rv = 0
|
||||
rc = 0
|
||||
vresult = 0
|
||||
|
||||
! '+' reductions
|
||||
|
||||
!$acc parallel vector_length(vl) num_gangs(1) copy(result)
|
||||
!$acc loop reduction(+:result)
|
||||
!$acc parallel num_gangs(ng) copy(rg)
|
||||
!$acc loop reduction(+:rg) gang
|
||||
do i = 1, n
|
||||
result = result + array(i)
|
||||
rg = rg + array(i)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_workers(nw) copy(rw)
|
||||
!$acc loop reduction(+:rw) worker
|
||||
do i = 1, n
|
||||
rw = rw + array(i)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel vector_length(vl) copy(rv)
|
||||
!$acc loop reduction(+:rv) vector
|
||||
do i = 1, n
|
||||
rv = rv + array(i)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
|
||||
!$acc loop reduction(+:rc) gang worker vector
|
||||
do i = 1, n
|
||||
rc = rc + array(i)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
|
@ -31,17 +58,46 @@ program reduction_1
|
|||
vresult = vresult + array(i)
|
||||
end do
|
||||
|
||||
if (result.ne.vresult) call abort
|
||||
|
||||
result = 0
|
||||
vresult = 0
|
||||
if (rg .ne. vresult) call abort
|
||||
if (rw .ne. vresult) call abort
|
||||
if (rv .ne. vresult) call abort
|
||||
if (rc .ne. vresult) call abort
|
||||
|
||||
!
|
||||
! '*' reductions
|
||||
!
|
||||
|
||||
!$acc parallel vector_length(vl) num_gangs(1) copy(result)
|
||||
!$acc loop reduction(*:result)
|
||||
rg = 1
|
||||
rw = 1
|
||||
rv = 1
|
||||
rc = 1
|
||||
vresult = 1
|
||||
|
||||
!$acc parallel num_gangs(ng) copy(rg)
|
||||
!$acc loop reduction(*:rg) gang
|
||||
do i = 1, n
|
||||
result = result * array(i)
|
||||
rg = rg * array(i)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_workers(nw) copy(rw)
|
||||
!$acc loop reduction(*:rw) worker
|
||||
do i = 1, n
|
||||
rw = rw * array(i)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel vector_length(vl) copy(rv)
|
||||
!$acc loop reduction(*:rv) vector
|
||||
do i = 1, n
|
||||
rv = rv * array(i)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
|
||||
!$acc loop reduction(*:rc) gang worker vector
|
||||
do i = 1, n
|
||||
rc = rc * array(i)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
|
@ -50,17 +106,46 @@ program reduction_1
|
|||
vresult = vresult * array(i)
|
||||
end do
|
||||
|
||||
if (result.ne.vresult) call abort
|
||||
if (rg .ne. vresult) call abort
|
||||
if (rw .ne. vresult) call abort
|
||||
if (rv .ne. vresult) call abort
|
||||
if (rc .ne. vresult) call abort
|
||||
|
||||
result = 0
|
||||
!
|
||||
! 'max' reductions
|
||||
!
|
||||
|
||||
rg = 0
|
||||
rw = 0
|
||||
rv = 0
|
||||
rc = 0
|
||||
vresult = 0
|
||||
|
||||
! 'max' reductions
|
||||
|
||||
!$acc parallel vector_length(vl) num_gangs(1) copy(result)
|
||||
!$acc loop reduction(max:result)
|
||||
!$acc parallel num_gangs(ng) copy(rg)
|
||||
!$acc loop reduction(max:rg) gang
|
||||
do i = 1, n
|
||||
result = max (result, array(i))
|
||||
rg = max (rg, array(i))
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_workers(nw) copy(rw)
|
||||
!$acc loop reduction(max:rw) worker
|
||||
do i = 1, n
|
||||
rw = max (rw, array(i))
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel vector_length(vl) copy(rv)
|
||||
!$acc loop reduction(max:rv) vector
|
||||
do i = 1, n
|
||||
rv = max (rv, array(i))
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_gangs(ng) Num_workers(nw) vector_length(vl) copy(rc)
|
||||
!$acc loop reduction(max:rc) gang worker vector
|
||||
do i = 1, n
|
||||
rc = max (rc, array(i))
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
|
@ -69,17 +154,46 @@ program reduction_1
|
|||
vresult = max (vresult, array(i))
|
||||
end do
|
||||
|
||||
if (result.ne.vresult) call abort
|
||||
|
||||
result = 1
|
||||
vresult = 1
|
||||
if (rg .ne. vresult) call abort
|
||||
if (rw .ne. vresult) call abort
|
||||
if (rv .ne. vresult) call abort
|
||||
if (rc .ne. vresult) call abort
|
||||
|
||||
!
|
||||
! 'min' reductions
|
||||
!
|
||||
|
||||
!$acc parallel vector_length(vl) num_gangs(1) copy(result)
|
||||
!$acc loop reduction(min:result)
|
||||
rg = 0
|
||||
rw = 0
|
||||
rv = 0
|
||||
rc = 0
|
||||
vresult = 0
|
||||
|
||||
!$acc parallel num_gangs(ng) copy(rg)
|
||||
!$acc loop reduction(min:rg) gang
|
||||
do i = 1, n
|
||||
result = min (result, array(i))
|
||||
rg = min (rg, array(i))
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_workers(nw) copy(rw)
|
||||
!$acc loop reduction(min:rw) worker
|
||||
do i = 1, n
|
||||
rw = min (rw, array(i))
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel vector_length(vl) copy(rv)
|
||||
!$acc loop reduction(min:rv) vector
|
||||
do i = 1, n
|
||||
rv = min (rv, array(i))
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
|
||||
!$acc loop reduction(min:rc) gang worker vector
|
||||
do i = 1, n
|
||||
rc = min (rc, array(i))
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
|
@ -88,17 +202,46 @@ program reduction_1
|
|||
vresult = min (vresult, array(i))
|
||||
end do
|
||||
|
||||
if (result.ne.vresult) call abort
|
||||
if (rg .ne. vresult) call abort
|
||||
if (rw .ne. vresult) call abort
|
||||
if (rv .ne. vresult) call abort
|
||||
if (rc .ne. vresult) call abort
|
||||
|
||||
result = 1
|
||||
!
|
||||
! 'iand' reductions
|
||||
!
|
||||
|
||||
rg = 1
|
||||
rw = 1
|
||||
rv = 1
|
||||
rc = 1
|
||||
vresult = 1
|
||||
|
||||
! 'iand' reductions
|
||||
|
||||
!$acc parallel vector_length(vl) num_gangs(1) copy(result)
|
||||
!$acc loop reduction(iand:result)
|
||||
!$acc parallel num_gangs(ng) copy(rg)
|
||||
!$acc loop reduction(iand:rg) gang
|
||||
do i = 1, n
|
||||
result = iand (result, array(i))
|
||||
rg = iand (rg, array(i))
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_workers(nw) copy(rw)
|
||||
!$acc loop reduction(iand:rw) worker
|
||||
do i = 1, n
|
||||
rw = iand (rw, array(i))
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel vector_length(vl) copy(rv)
|
||||
!$acc loop reduction(iand:rv) vector
|
||||
do i = 1, n
|
||||
rv = iand (rv, array(i))
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
|
||||
!$acc loop reduction(iand:rc) gang worker vector
|
||||
do i = 1, n
|
||||
rc = iand (rc, array(i))
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
|
@ -107,17 +250,46 @@ program reduction_1
|
|||
vresult = iand (vresult, array(i))
|
||||
end do
|
||||
|
||||
if (result.ne.vresult) call abort
|
||||
|
||||
result = 1
|
||||
vresult = 1
|
||||
if (rg .ne. vresult) call abort
|
||||
if (rw .ne. vresult) call abort
|
||||
if (rv .ne. vresult) call abort
|
||||
if (rc .ne. vresult) call abort
|
||||
|
||||
!
|
||||
! 'ior' reductions
|
||||
!
|
||||
|
||||
!$acc parallel vector_length(vl) num_gangs(1) copy(result)
|
||||
!$acc loop reduction(ior:result)
|
||||
rg = 0
|
||||
rw = 0
|
||||
rv = 0
|
||||
rc = 0
|
||||
vresult = 0
|
||||
|
||||
!$acc parallel num_gangs(ng) copy(rg)
|
||||
!$acc loop reduction(ior:rg) gang
|
||||
do i = 1, n
|
||||
result = ior (result, array(i))
|
||||
rg = ior (rg, array(i))
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_workers(nw) copy(rw)
|
||||
!$acc loop reduction(ior:rw) worker
|
||||
do i = 1, n
|
||||
rw = ior (rw, array(i))
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel vector_length(vl) copy(rv)
|
||||
!$acc loop reduction(ior:rv) gang
|
||||
do i = 1, n
|
||||
rv = ior (rv, array(i))
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
|
||||
!$acc loop reduction(ior:rc) gang worker vector
|
||||
do i = 1, n
|
||||
rc = ior (rc, array(i))
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
|
@ -126,17 +298,46 @@ program reduction_1
|
|||
vresult = ior (vresult, array(i))
|
||||
end do
|
||||
|
||||
if (result.ne.vresult) call abort
|
||||
if (rg .ne. vresult) call abort
|
||||
if (rw .ne. vresult) call abort
|
||||
if (rv .ne. vresult) call abort
|
||||
if (rc .ne. vresult) call abort
|
||||
|
||||
result = 0
|
||||
!
|
||||
! 'ieor' reductions
|
||||
!
|
||||
|
||||
rg = 0
|
||||
rw = 0
|
||||
rv = 0
|
||||
rc = 0
|
||||
vresult = 0
|
||||
|
||||
! 'ieor' reductions
|
||||
|
||||
!$acc parallel vector_length(vl) num_gangs(1) copy(result)
|
||||
!$acc loop reduction(ieor:result)
|
||||
!$acc parallel num_gangs(ng) copy(rg)
|
||||
!$acc loop reduction(ieor:rg) gang
|
||||
do i = 1, n
|
||||
result = ieor (result, array(i))
|
||||
rg = ieor (rg, array(i))
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_workers(nw) copy(rw)
|
||||
!$acc loop reduction(ieor:rw) worker
|
||||
do i = 1, n
|
||||
rw = ieor (rw, array(i))
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel vector_length(vl) copy(rv)
|
||||
!$acc loop reduction(ieor:rv) vector
|
||||
do i = 1, n
|
||||
rv = ieor (rv, array(i))
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
|
||||
!$acc loop reduction(ieor:rc) gang worker vector
|
||||
do i = 1, n
|
||||
rc = ieor (rc, array(i))
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
|
@ -145,17 +346,46 @@ program reduction_1
|
|||
vresult = ieor (vresult, array(i))
|
||||
end do
|
||||
|
||||
if (result.ne.vresult) call abort
|
||||
|
||||
lresult = .false.
|
||||
lvresult = .false.
|
||||
if (rg .ne. vresult) call abort
|
||||
if (rw .ne. vresult) call abort
|
||||
if (rv .ne. vresult) call abort
|
||||
if (rc .ne. vresult) call abort
|
||||
|
||||
!
|
||||
! '.and.' reductions
|
||||
!
|
||||
|
||||
!$acc parallel vector_length(vl) num_gangs(1) copy(lresult)
|
||||
!$acc loop reduction(.and.:lresult)
|
||||
lrg = .true.
|
||||
lrw = .true.
|
||||
lrv = .true.
|
||||
lrc = .true.
|
||||
lvresult = .true.
|
||||
|
||||
!$acc parallel num_gangs(ng) copy(lrg)
|
||||
!$acc loop reduction(.and.:lrg) gang
|
||||
do i = 1, n
|
||||
lresult = lresult .and. (array(i) .ge. 5)
|
||||
lrg = lrg .and. (array(i) .ge. 5)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_workers(nw) copy(lrw)
|
||||
!$acc loop reduction(.and.:lrw) worker
|
||||
do i = 1, n
|
||||
lrw = lrw .and. (array(i) .ge. 5)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel vector_length(vl) copy(lrv)
|
||||
!$acc loop reduction(.and.:lrv) vector
|
||||
do i = 1, n
|
||||
lrv = lrv .and. (array(i) .ge. 5)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc)
|
||||
!$acc loop reduction(.and.:lrc) gang worker vector
|
||||
do i = 1, n
|
||||
lrc = lrc .and. (array(i) .ge. 5)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
|
@ -164,17 +394,46 @@ program reduction_1
|
|||
lvresult = lvresult .and. (array(i) .ge. 5)
|
||||
end do
|
||||
|
||||
if (result.ne.vresult) call abort
|
||||
if (lrg .neqv. lvresult) call abort
|
||||
if (lrw .neqv. lvresult) call abort
|
||||
if (lrv .neqv. lvresult) call abort
|
||||
if (lrc .neqv. lvresult) call abort
|
||||
|
||||
lresult = .false.
|
||||
!
|
||||
! '.or.' reductions
|
||||
!
|
||||
|
||||
lrg = .true.
|
||||
lrw = .true.
|
||||
lrv = .true.
|
||||
lrc = .true.
|
||||
lvresult = .false.
|
||||
|
||||
! '.or.' reductions
|
||||
|
||||
!$acc parallel vector_length(vl) num_gangs(1) copy(lresult)
|
||||
!$acc loop reduction(.or.:lresult)
|
||||
!$acc parallel num_gangs(ng) copy(lrg)
|
||||
!$acc loop reduction(.or.:lrg) gang
|
||||
do i = 1, n
|
||||
lresult = lresult .or. (array(i) .ge. 5)
|
||||
lrg = lrg .or. (array(i) .ge. 5)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_workers(nw) copy(lrw)
|
||||
!$acc loop reduction(.or.:lrw) worker
|
||||
do i = 1, n
|
||||
lrw = lrw .or. (array(i) .ge. 5)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel vector_length(vl) copy(lrv)
|
||||
!$acc loop reduction(.or.:lrv) vector
|
||||
do i = 1, n
|
||||
lrv = lrv .or. (array(i) .ge. 5)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc)
|
||||
!$acc loop reduction(.or.:lrc) gang worker vector
|
||||
do i = 1, n
|
||||
lrc = lrc .or. (array(i) .ge. 5)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
|
@ -183,17 +442,46 @@ program reduction_1
|
|||
lvresult = lvresult .or. (array(i) .ge. 5)
|
||||
end do
|
||||
|
||||
if (result.ne.vresult) call abort
|
||||
|
||||
lresult = .false.
|
||||
lvresult = .false.
|
||||
if (lrg .neqv. lvresult) call abort
|
||||
if (lrw .neqv. lvresult) call abort
|
||||
if (lrv .neqv. lvresult) call abort
|
||||
if (lrc .neqv. lvresult) call abort
|
||||
|
||||
!
|
||||
! '.eqv.' reductions
|
||||
!
|
||||
|
||||
!$acc parallel vector_length(vl) num_gangs(1) copy(lresult)
|
||||
!$acc loop reduction(.eqv.:lresult)
|
||||
lrg = .true.
|
||||
lrw = .true.
|
||||
lrv = .true.
|
||||
lrc = .true.
|
||||
lvresult = .true.
|
||||
|
||||
!$acc parallel num_gangs(ng) copy(lrg)
|
||||
!$acc loop reduction(.eqv.:lrg) gang
|
||||
do i = 1, n
|
||||
lresult = lresult .eqv. (array(i) .ge. 5)
|
||||
lrg = lrg .eqv. (array(i) .ge. 5)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_workers(nw) copy(lrw)
|
||||
!$acc loop reduction(.eqv.:lrw) worker
|
||||
do i = 1, n
|
||||
lrw = lrw .eqv. (array(i) .ge. 5)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel vector_length(vl) copy(lrv)
|
||||
!$acc loop reduction(.eqv.:lrv) vector
|
||||
do i = 1, n
|
||||
lrv = lrv .eqv. (array(i) .ge. 5)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_workers(nw) vector_length(vl) copy(lrc)
|
||||
!$acc loop reduction(.eqv.:lrc) gang worker vector
|
||||
do i = 1, n
|
||||
lrc = lrc .eqv. (array(i) .ge. 5)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
|
@ -202,17 +490,46 @@ program reduction_1
|
|||
lvresult = lvresult .eqv. (array(i) .ge. 5)
|
||||
end do
|
||||
|
||||
if (result.ne.vresult) call abort
|
||||
|
||||
lresult = .false.
|
||||
lvresult = .false.
|
||||
if (lrg .neqv. lvresult) call abort
|
||||
if (lrw .neqv. lvresult) call abort
|
||||
if (lrv .neqv. lvresult) call abort
|
||||
if (lrc .neqv. lvresult) call abort
|
||||
|
||||
!
|
||||
! '.neqv.' reductions
|
||||
!
|
||||
|
||||
!$acc parallel vector_length(vl) num_gangs(1) copy(lresult)
|
||||
!$acc loop reduction(.neqv.:lresult)
|
||||
lrg = .true.
|
||||
lrw = .true.
|
||||
lrv = .true.
|
||||
lrc = .true.
|
||||
lvresult = .true.
|
||||
|
||||
!$acc parallel num_gangs(ng) copy(lrg)
|
||||
!$acc loop reduction(.neqv.:lrg) gang
|
||||
do i = 1, n
|
||||
lresult = lresult .neqv. (array(i) .ge. 5)
|
||||
lrg = lrg .neqv. (array(i) .ge. 5)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_workers(nw) copy(lrw)
|
||||
!$acc loop reduction(.neqv.:lrw) worker
|
||||
do i = 1, n
|
||||
lrw = lrw .neqv. (array(i) .ge. 5)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel vector_length(vl) copy(lrv)
|
||||
!$acc loop reduction(.neqv.:lrv) vector
|
||||
do i = 1, n
|
||||
lrv = lrv .neqv. (array(i) .ge. 5)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc)
|
||||
!$acc loop reduction(.neqv.:lrc) gang worker vector
|
||||
do i = 1, n
|
||||
lrc = lrc .neqv. (array(i) .ge. 5)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
|
@ -221,5 +538,8 @@ program reduction_1
|
|||
lvresult = lvresult .neqv. (array(i) .ge. 5)
|
||||
end do
|
||||
|
||||
if (result.ne.vresult) call abort
|
||||
if (lrg .neqv. lvresult) call abort
|
||||
if (lrw .neqv. lvresult) call abort
|
||||
if (lrv .neqv. lvresult) call abort
|
||||
if (lrc .neqv. lvresult) call abort
|
||||
end program reduction_1
|
||||
|
|
|
@ -5,26 +5,52 @@
|
|||
program reduction_2
|
||||
implicit none
|
||||
|
||||
integer, parameter :: n = 10, vl = 32
|
||||
integer, parameter :: n = 10, ng = 8, nw = 4, vl = 32
|
||||
integer :: i
|
||||
real, parameter :: e = .001
|
||||
real :: vresult, result
|
||||
logical :: lresult, lvresult
|
||||
real, dimension (n) :: array
|
||||
real :: vresult, rg, rw, rv, rc
|
||||
real, parameter :: e = 0.001
|
||||
logical :: lrg, lrw, lrv, lrc, lvresult
|
||||
real, dimension (n) :: array
|
||||
|
||||
do i = 1, n
|
||||
array(i) = i
|
||||
end do
|
||||
|
||||
result = 0
|
||||
!
|
||||
! '+' reductions
|
||||
!
|
||||
|
||||
rg = 0
|
||||
rw = 0
|
||||
rv = 0
|
||||
rc = 0
|
||||
vresult = 0
|
||||
|
||||
! '+' reductions
|
||||
|
||||
!$acc parallel vector_length(vl) num_gangs(1) copy(result)
|
||||
!$acc loop reduction(+:result)
|
||||
!$acc parallel num_gangs(ng) copy(rg)
|
||||
!$acc loop reduction(+:rg) gang
|
||||
do i = 1, n
|
||||
result = result + array(i)
|
||||
rg = rg + array(i)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_workers(nw) copy(rw)
|
||||
!$acc loop reduction(+:rw) worker
|
||||
do i = 1, n
|
||||
rw = rw + array(i)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel vector_length(vl) copy(rv)
|
||||
!$acc loop reduction(+:rv) vector
|
||||
do i = 1, n
|
||||
rv = rv + array(i)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
|
||||
!$acc loop reduction(+:rc) gang worker vector
|
||||
do i = 1, n
|
||||
rc = rc + array(i)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
|
@ -33,17 +59,46 @@ program reduction_2
|
|||
vresult = vresult + array(i)
|
||||
end do
|
||||
|
||||
if (abs (result - vresult) .ge. e) call abort
|
||||
if (rg .ne. vresult) call abort
|
||||
if (rw .ne. vresult) call abort
|
||||
if (rv .ne. vresult) call abort
|
||||
if (rc .ne. vresult) call abort
|
||||
|
||||
result = 1
|
||||
!
|
||||
! '*' reductions
|
||||
!
|
||||
|
||||
rg = 1
|
||||
rw = 1
|
||||
rv = 1
|
||||
rc = 1
|
||||
vresult = 1
|
||||
|
||||
! '*' reductions
|
||||
|
||||
!$acc parallel vector_length(vl) num_gangs(1) copy(result)
|
||||
!$acc loop reduction(*:result)
|
||||
!$acc parallel num_gangs(ng) copy(rg)
|
||||
!$acc loop reduction(*:rg) gang
|
||||
do i = 1, n
|
||||
result = result * array(i)
|
||||
rg = rg * array(i)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_workers(nw) copy(rw)
|
||||
!$acc loop reduction(*:rw) worker
|
||||
do i = 1, n
|
||||
rw = rw * array(i)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel vector_length(vl) copy(rv)
|
||||
!$acc loop reduction(*:rv) vector
|
||||
do i = 1, n
|
||||
rv = rv * array(i)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
|
||||
!$acc loop reduction(*:rc) gang worker vector
|
||||
do i = 1, n
|
||||
rc = rc * array(i)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
|
@ -52,17 +107,46 @@ program reduction_2
|
|||
vresult = vresult * array(i)
|
||||
end do
|
||||
|
||||
if (result.ne.vresult) call abort
|
||||
if (abs (rg - vresult) .ge. e) call abort
|
||||
if (abs (rw - vresult) .ge. e) call abort
|
||||
if (abs (rv - vresult) .ge. e) call abort
|
||||
if (abs (rc - vresult) .ge. e) call abort
|
||||
|
||||
result = 0
|
||||
!
|
||||
! 'max' reductions
|
||||
!
|
||||
|
||||
rg = 0
|
||||
rw = 0
|
||||
rg = 0
|
||||
rc = 0
|
||||
vresult = 0
|
||||
|
||||
! 'max' reductions
|
||||
|
||||
!$acc parallel vector_length(vl) num_gangs(1) copy(result)
|
||||
!$acc loop reduction(max:result)
|
||||
!$acc parallel num_gangs(ng) copy(rg)
|
||||
!$acc loop reduction(max:rg) gang
|
||||
do i = 1, n
|
||||
result = max (result, array(i))
|
||||
rg = max (rg, array(i))
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_workers(nw) copy(rw)
|
||||
!$acc loop reduction(max:rw) worker
|
||||
do i = 1, n
|
||||
rw = max (rw, array(i))
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel vector_length(vl) copy(rv)
|
||||
!$acc loop reduction(max:rv) vector
|
||||
do i = 1, n
|
||||
rv = max (rv, array(i))
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
|
||||
!$acc loop reduction(max:rc) gang worker vector
|
||||
do i = 1, n
|
||||
rc = max (rc, array(i))
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
|
@ -71,17 +155,46 @@ program reduction_2
|
|||
vresult = max (vresult, array(i))
|
||||
end do
|
||||
|
||||
if (result.ne.vresult) call abort
|
||||
|
||||
result = 1
|
||||
vresult = 1
|
||||
if (abs (rg - vresult) .ge. e) call abort
|
||||
if (abs (rw - vresult) .ge. e) call abort
|
||||
if (abs (rg - vresult) .ge. e) call abort
|
||||
if (abs (rc - vresult) .ge. e) call abort
|
||||
|
||||
!
|
||||
! 'min' reductions
|
||||
!
|
||||
|
||||
!$acc parallel vector_length(vl) num_gangs(1) copy(result)
|
||||
!$acc loop reduction(min:result)
|
||||
rg = 0
|
||||
rw = 0
|
||||
rv = 0
|
||||
rc = 0
|
||||
vresult = 0
|
||||
|
||||
!$acc parallel num_gangs(ng) copy(rg)
|
||||
!$acc loop reduction(min:rg) gang
|
||||
do i = 1, n
|
||||
result = min (result, array(i))
|
||||
rg = min (rg, array(i))
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_workers(nw) copy(rw)
|
||||
!$acc loop reduction(min:rw) worker
|
||||
do i = 1, n
|
||||
rw = min (rw, array(i))
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel vector_length(vl) copy(rv)
|
||||
!$acc loop reduction(min:rv) vector
|
||||
do i = 1, n
|
||||
rv = min (rv, array(i))
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
|
||||
!$acc loop reduction(min:rc) gang worker vector
|
||||
do i = 1, n
|
||||
rc = min (rc, array(i))
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
|
@ -90,17 +203,46 @@ program reduction_2
|
|||
vresult = min (vresult, array(i))
|
||||
end do
|
||||
|
||||
if (result.ne.vresult) call abort
|
||||
if (rg .ne. vresult) call abort
|
||||
if (rv .ne. vresult) call abort
|
||||
if (rw .ne. vresult) call abort
|
||||
if (rc .ne. vresult) call abort
|
||||
|
||||
lresult = .true.
|
||||
!
|
||||
! '.and.' reductions
|
||||
!
|
||||
|
||||
lrg = .true.
|
||||
lrw = .true.
|
||||
lrv = .true.
|
||||
lrc = .true.
|
||||
lvresult = .true.
|
||||
|
||||
! '.and.' reductions
|
||||
|
||||
!$acc parallel vector_length(vl) num_gangs(1) copy(lresult)
|
||||
!$acc loop reduction(.and.:lresult)
|
||||
!$acc parallel num_gangs(ng) copy(lrg)
|
||||
!$acc loop reduction(.and.:lrg) gang
|
||||
do i = 1, n
|
||||
lresult = lresult .and. (array(i) .ge. 5)
|
||||
lrg = lrg .and. (array(i) .ge. 5)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_workers(nw) copy(lrw)
|
||||
!$acc loop reduction(.and.:lrw) worker
|
||||
do i = 1, n
|
||||
lrw = lrw .and. (array(i) .ge. 5)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel vector_length(vl) copy(lrv)
|
||||
!$acc loop reduction(.and.:lrv) vector
|
||||
do i = 1, n
|
||||
lrv = lrv .and. (array(i) .ge. 5)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc)
|
||||
!$acc loop reduction(.and.:lrc) gang worker vector
|
||||
do i = 1, n
|
||||
lrc = lrc .and. (array(i) .ge. 5)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
|
@ -109,17 +251,46 @@ program reduction_2
|
|||
lvresult = lvresult .and. (array(i) .ge. 5)
|
||||
end do
|
||||
|
||||
if (result.ne.vresult) call abort
|
||||
if (lrg .neqv. lvresult) call abort
|
||||
if (lrw .neqv. lvresult) call abort
|
||||
if (lrv .neqv. lvresult) call abort
|
||||
if (lrc .neqv. lvresult) call abort
|
||||
|
||||
lresult = .false.
|
||||
!
|
||||
! '.or.' reductions
|
||||
!
|
||||
|
||||
lrg = .false.
|
||||
lrw = .false.
|
||||
lrv = .false.
|
||||
lrc = .false.
|
||||
lvresult = .false.
|
||||
|
||||
! '.or.' reductions
|
||||
|
||||
!$acc parallel vector_length(vl) num_gangs(1) copy(lresult)
|
||||
!$acc loop reduction(.or.:lresult)
|
||||
!$acc parallel num_gangs(ng) copy(lrg)
|
||||
!$acc loop reduction(.or.:lrg) gang
|
||||
do i = 1, n
|
||||
lresult = lresult .or. (array(i) .ge. 5)
|
||||
lrg = lrg .or. (array(i) .ge. 5)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_workers(nw) copy(lrw)
|
||||
!$acc loop reduction(.or.:lrw) worker
|
||||
do i = 1, n
|
||||
lrw = lrw .or. (array(i) .ge. 5)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel vector_length(vl) copy(lrv)
|
||||
!$acc loop reduction(.or.:lrv) vector
|
||||
do i = 1, n
|
||||
lrv = lrv .or. (array(i) .ge. 5)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc)
|
||||
!$acc loop reduction(.or.:lrc) gang worker vector
|
||||
do i = 1, n
|
||||
lrc = lrc .or. (array(i) .ge. 5)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
|
@ -128,17 +299,46 @@ program reduction_2
|
|||
lvresult = lvresult .or. (array(i) .ge. 5)
|
||||
end do
|
||||
|
||||
if (result.ne.vresult) call abort
|
||||
|
||||
lresult = .false.
|
||||
lvresult = .false.
|
||||
if (lrg .neqv. lvresult) call abort
|
||||
if (lrw .neqv. lvresult) call abort
|
||||
if (lrv .neqv. lvresult) call abort
|
||||
if (lrc .neqv. lvresult) call abort
|
||||
|
||||
!
|
||||
! '.eqv.' reductions
|
||||
!
|
||||
|
||||
!$acc parallel vector_length(vl) num_gangs(1) copy(lresult)
|
||||
!$acc loop reduction(.eqv.:lresult)
|
||||
lrg = .true.
|
||||
lrw = .true.
|
||||
lrv = .true.
|
||||
lrc = .true.
|
||||
lvresult = .true.
|
||||
|
||||
!$acc parallel num_gangs(ng) copy(lrg)
|
||||
!$acc loop reduction(.eqv.:lrg) gang
|
||||
do i = 1, n
|
||||
lresult = lresult .eqv. (array(i) .ge. 5)
|
||||
lrg = lrg .eqv. (array(i) .ge. 5)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_workers(nw) copy(lrw)
|
||||
!$acc loop reduction(.eqv.:lrw) worker
|
||||
do i = 1, n
|
||||
lrw = lrw .eqv. (array(i) .ge. 5)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel vector_length(vl) copy(lrv)
|
||||
!$acc loop reduction(.eqv.:lrv) vector
|
||||
do i = 1, n
|
||||
lrv = lrv .eqv. (array(i) .ge. 5)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc)
|
||||
!$acc loop reduction(.eqv.:lrc) gang worker vector
|
||||
do i = 1, n
|
||||
lrc = lrc .eqv. (array(i) .ge. 5)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
|
@ -147,17 +347,46 @@ program reduction_2
|
|||
lvresult = lvresult .eqv. (array(i) .ge. 5)
|
||||
end do
|
||||
|
||||
if (result.ne.vresult) call abort
|
||||
|
||||
lresult = .false.
|
||||
lvresult = .false.
|
||||
if (lrg .neqv. lvresult) call abort
|
||||
if (lrw .neqv. lvresult) call abort
|
||||
if (lrv .neqv. lvresult) call abort
|
||||
if (lrc .neqv. lvresult) call abort
|
||||
|
||||
!
|
||||
! '.neqv.' reductions
|
||||
!
|
||||
|
||||
!$acc parallel vector_length(vl) num_gangs(1) copy(lresult)
|
||||
!$acc loop reduction(.neqv.:lresult)
|
||||
lrg = .true.
|
||||
lrw = .true.
|
||||
lrv = .true.
|
||||
lrc = .true.
|
||||
lvresult = .true.
|
||||
|
||||
!$acc parallel num_gangs(ng) copy(lrg)
|
||||
!$acc loop reduction(.neqv.:lrg) gang
|
||||
do i = 1, n
|
||||
lresult = lresult .neqv. (array(i) .ge. 5)
|
||||
lrg = lrg .neqv. (array(i) .ge. 5)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_workers(nw) copy(lrw)
|
||||
!$acc loop reduction(.neqv.:lrw) worker
|
||||
do i = 1, n
|
||||
lrw = lrw .neqv. (array(i) .ge. 5)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel vector_length(vl) copy(lrv)
|
||||
!$acc loop reduction(.neqv.:lrv) vector
|
||||
do i = 1, n
|
||||
lrv = lrv .neqv. (array(i) .ge. 5)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc)
|
||||
!$acc loop reduction(.neqv.:lrc) gang worker vector
|
||||
do i = 1, n
|
||||
lrc = lrc .neqv. (array(i) .ge. 5)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
|
@ -166,5 +395,8 @@ program reduction_2
|
|||
lvresult = lvresult .neqv. (array(i) .ge. 5)
|
||||
end do
|
||||
|
||||
if (result.ne.vresult) call abort
|
||||
if (lrg .neqv. lvresult) call abort
|
||||
if (lrw .neqv. lvresult) call abort
|
||||
if (lrv .neqv. lvresult) call abort
|
||||
if (lrc .neqv. lvresult) call abort
|
||||
end program reduction_2
|
||||
|
|
|
@ -5,26 +5,52 @@
|
|||
program reduction_3
|
||||
implicit none
|
||||
|
||||
integer, parameter :: n = 10, vl = 32
|
||||
integer, parameter :: n = 10, ng = 8, nw = 4, vl = 32
|
||||
integer :: i
|
||||
double precision, parameter :: e = .001
|
||||
double precision :: vresult, result
|
||||
logical :: lresult, lvresult
|
||||
double precision :: vresult, rg, rw, rv, rc
|
||||
double precision, parameter :: e = 0.001
|
||||
logical :: lrg, lrw, lrv, lrc, lvresult
|
||||
double precision, dimension (n) :: array
|
||||
|
||||
do i = 1, n
|
||||
array(i) = i
|
||||
end do
|
||||
|
||||
result = 0
|
||||
!
|
||||
! '+' reductions
|
||||
!
|
||||
|
||||
rg = 0
|
||||
rw = 0
|
||||
rv = 0
|
||||
rc = 0
|
||||
vresult = 0
|
||||
|
||||
! '+' reductions
|
||||
|
||||
!$acc parallel vector_length(vl) num_gangs(1) copy(result)
|
||||
!$acc loop reduction(+:result)
|
||||
!$acc parallel num_gangs(ng) copy(rg)
|
||||
!$acc loop reduction(+:rg) gang
|
||||
do i = 1, n
|
||||
result = result + array(i)
|
||||
rg = rg + array(i)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_workers(nw) copy(rw)
|
||||
!$acc loop reduction(+:rw) worker
|
||||
do i = 1, n
|
||||
rw = rw + array(i)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel vector_length(vl) copy(rv)
|
||||
!$acc loop reduction(+:rv) vector
|
||||
do i = 1, n
|
||||
rv = rv + array(i)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
|
||||
!$acc loop reduction(+:rc) gang worker vector
|
||||
do i = 1, n
|
||||
rc = rc + array(i)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
|
@ -33,17 +59,46 @@ program reduction_3
|
|||
vresult = vresult + array(i)
|
||||
end do
|
||||
|
||||
if (abs (result - vresult) .ge. e) call abort
|
||||
if (abs (rg - vresult) .ge. e) call abort
|
||||
if (abs (rw - vresult) .ge. e) call abort
|
||||
if (abs (rv - vresult) .ge. e) call abort
|
||||
if (abs (rc - vresult) .ge. e) call abort
|
||||
|
||||
result = 1
|
||||
!
|
||||
! '*' reductions
|
||||
!
|
||||
|
||||
rg = 1
|
||||
rw = 1
|
||||
rv = 1
|
||||
rc = 1
|
||||
vresult = 1
|
||||
|
||||
! '*' reductions
|
||||
|
||||
!$acc parallel vector_length(vl) num_gangs(1) copy(result)
|
||||
!$acc loop reduction(*:result)
|
||||
!$acc parallel num_gangs(ng) copy(rg)
|
||||
!$acc loop reduction(*:rg) gang
|
||||
do i = 1, n
|
||||
result = result * array(i)
|
||||
rg = rg * array(i)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_workers(nw) copy(rw)
|
||||
!$acc loop reduction(*:rw) worker
|
||||
do i = 1, n
|
||||
rw = rw * array(i)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel vector_length(vl) copy(rv)
|
||||
!$acc loop reduction(*:rv) vector
|
||||
do i = 1, n
|
||||
rv = rv * array(i)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
|
||||
!$acc loop reduction(*:rc) gang worker vector
|
||||
do i = 1, n
|
||||
rc = rc * array(i)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
|
@ -52,17 +107,46 @@ program reduction_3
|
|||
vresult = vresult * array(i)
|
||||
end do
|
||||
|
||||
if (result.ne.vresult) call abort
|
||||
if (abs (rg - vresult) .ge. e) call abort
|
||||
if (abs (rw - vresult) .ge. e) call abort
|
||||
if (abs (rv - vresult) .ge. e) call abort
|
||||
if (abs (rc - vresult) .ge. e) call abort
|
||||
|
||||
result = 0
|
||||
!
|
||||
! 'max' reductions
|
||||
!
|
||||
|
||||
rg = 0
|
||||
rw = 0
|
||||
rv = 0
|
||||
rc = 0
|
||||
vresult = 0
|
||||
|
||||
! 'max' reductions
|
||||
|
||||
!$acc parallel vector_length(vl) num_gangs(1) copy(result)
|
||||
!$acc loop reduction(max:result)
|
||||
!$acc parallel num_gangs(ng) copy(rg)
|
||||
!$acc loop reduction(max:rg) gang
|
||||
do i = 1, n
|
||||
result = max (result, array(i))
|
||||
rg = max (rg, array(i))
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_workers(nw) copy(rw)
|
||||
!$acc loop reduction(max:rw) worker
|
||||
do i = 1, n
|
||||
rw = max (rw, array(i))
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel vector_length(vl) copy(rv)
|
||||
!$acc loop reduction(max:rv) vector
|
||||
do i = 1, n
|
||||
rv = max (rv, array(i))
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
|
||||
!$acc loop reduction(max:rc) gang worker vector
|
||||
do i = 1, n
|
||||
rc = max (rc, array(i))
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
|
@ -71,17 +155,46 @@ program reduction_3
|
|||
vresult = max (vresult, array(i))
|
||||
end do
|
||||
|
||||
if (result.ne.vresult) call abort
|
||||
|
||||
result = 1
|
||||
vresult = 1
|
||||
if (abs (rg - vresult) .ge. e) call abort
|
||||
if (abs (rw - vresult) .ge. e) call abort
|
||||
if (abs (rv - vresult) .ge. e) call abort
|
||||
if (abs (rc - vresult) .ge. e) call abort
|
||||
|
||||
!
|
||||
! 'min' reductions
|
||||
!
|
||||
|
||||
!$acc parallel vector_length(vl) num_gangs(1) copy(result)
|
||||
!$acc loop reduction(min:result)
|
||||
rg = 0
|
||||
rw = 0
|
||||
rv = 0
|
||||
rc = 0
|
||||
vresult = 0
|
||||
|
||||
!$acc parallel num_gangs(ng) copy(rg)
|
||||
!$acc loop reduction(min:rg) gang
|
||||
do i = 1, n
|
||||
result = min (result, array(i))
|
||||
rg = min (rg, array(i))
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_workers(nw) copy(rw)
|
||||
!$acc loop reduction(min:rw) worker
|
||||
do i = 1, n
|
||||
rw = min (rw, array(i))
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel vector_length(vl) copy(rv)
|
||||
!$acc loop reduction(min:rv) vector
|
||||
do i = 1, n
|
||||
rv = min (rv, array(i))
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
|
||||
!$acc loop reduction(min:rc) gang worker vector
|
||||
do i = 1, n
|
||||
rc = min (rc, array(i))
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
|
@ -90,17 +203,46 @@ program reduction_3
|
|||
vresult = min (vresult, array(i))
|
||||
end do
|
||||
|
||||
if (result.ne.vresult) call abort
|
||||
if (rg .ne. vresult) call abort
|
||||
if (rw .ne. vresult) call abort
|
||||
if (rv .ne. vresult) call abort
|
||||
if (rc .ne. vresult) call abort
|
||||
|
||||
lresult = .true.
|
||||
!
|
||||
! '.and.' reductions
|
||||
!
|
||||
|
||||
lrg = .true.
|
||||
lrw = .true.
|
||||
lrv = .true.
|
||||
lrc = .true.
|
||||
lvresult = .true.
|
||||
|
||||
! '.and.' reductions
|
||||
|
||||
!$acc parallel vector_length(vl) num_gangs(1) copy(lresult)
|
||||
!$acc loop reduction(.and.:lresult)
|
||||
!$acc parallel num_gangs(ng) copy(lrg)
|
||||
!$acc loop reduction(.and.:lrg) gang
|
||||
do i = 1, n
|
||||
lresult = lresult .and. (array(i) .ge. 5)
|
||||
lrg = lrg .and. (array(i) .ge. 5)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_workers(nw) copy(lrw)
|
||||
!$acc loop reduction(.and.:lrw) worker
|
||||
do i = 1, n
|
||||
lrw = lrw .and. (array(i) .ge. 5)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel vector_length(vl) copy(lrv)
|
||||
!$acc loop reduction(.and.:lrv) vector
|
||||
do i = 1, n
|
||||
lrv = lrv .and. (array(i) .ge. 5)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc)
|
||||
!$acc loop reduction(.and.:lrc) gang worker vector
|
||||
do i = 1, n
|
||||
lrc = lrc .and. (array(i) .ge. 5)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
|
@ -109,17 +251,46 @@ program reduction_3
|
|||
lvresult = lvresult .and. (array(i) .ge. 5)
|
||||
end do
|
||||
|
||||
if (result.ne.vresult) call abort
|
||||
if (lrg .neqv. lvresult) call abort
|
||||
if (lrw .neqv. lvresult) call abort
|
||||
if (lrv .neqv. lvresult) call abort
|
||||
if (lrc .neqv. lvresult) call abort
|
||||
|
||||
lresult = .false.
|
||||
!
|
||||
! '.or.' reductions
|
||||
!
|
||||
|
||||
lrg = .false.
|
||||
lrw = .false.
|
||||
lrv = .false.
|
||||
lrc = .false.
|
||||
lvresult = .false.
|
||||
|
||||
! '.or.' reductions
|
||||
|
||||
!$acc parallel vector_length(vl) num_gangs(1) copy(lresult)
|
||||
!$acc loop reduction(.or.:lresult)
|
||||
!$acc parallel num_gangs(ng) copy(lrg)
|
||||
!$acc loop reduction(.or.:lrg) gang
|
||||
do i = 1, n
|
||||
lresult = lresult .or. (array(i) .ge. 5)
|
||||
lrg = lrg .or. (array(i) .ge. 5)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_workers(nw) copy(lrw)
|
||||
!$acc loop reduction(.or.:lrw) worker
|
||||
do i = 1, n
|
||||
lrw = lrw .or. (array(i) .ge. 5)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel vector_length(vl) copy(lrv)
|
||||
!$acc loop reduction(.or.:lrv) vector
|
||||
do i = 1, n
|
||||
lrv = lrv .or. (array(i) .ge. 5)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc)
|
||||
!$acc loop reduction(.or.:lrc) gang worker vector
|
||||
do i = 1, n
|
||||
lrc = lrc .or. (array(i) .ge. 5)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
|
@ -128,17 +299,46 @@ program reduction_3
|
|||
lvresult = lvresult .or. (array(i) .ge. 5)
|
||||
end do
|
||||
|
||||
if (result.ne.vresult) call abort
|
||||
|
||||
lresult = .false.
|
||||
lvresult = .false.
|
||||
if (lrg .neqv. lvresult) call abort
|
||||
if (lrw .neqv. lvresult) call abort
|
||||
if (lrv .neqv. lvresult) call abort
|
||||
if (lrc .neqv. lvresult) call abort
|
||||
|
||||
!
|
||||
! '.eqv.' reductions
|
||||
!
|
||||
|
||||
!$acc parallel vector_length(vl) num_gangs(1) copy(lresult)
|
||||
!$acc loop reduction(.eqv.:lresult)
|
||||
lrg = .true.
|
||||
lrw = .true.
|
||||
lrv = .true.
|
||||
lrc = .true.
|
||||
lvresult = .true.
|
||||
|
||||
!$acc parallel num_gangs(ng) copy(lrg)
|
||||
!$acc loop reduction(.eqv.:lrg) gang
|
||||
do i = 1, n
|
||||
lresult = lresult .eqv. (array(i) .ge. 5)
|
||||
lrg = lrg .eqv. (array(i) .ge. 5)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_workers(nw) copy(lrw)
|
||||
!$acc loop reduction(.eqv.:lrw) worker
|
||||
do i = 1, n
|
||||
lrw = lrw .eqv. (array(i) .ge. 5)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel vector_length(vl) copy(lrv)
|
||||
!$acc loop reduction(.eqv.:lrv) vector
|
||||
do i = 1, n
|
||||
lrv = lrv .eqv. (array(i) .ge. 5)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc)
|
||||
!$acc loop reduction(.eqv.:lrc) gang worker vector
|
||||
do i = 1, n
|
||||
lrc = lrc .eqv. (array(i) .ge. 5)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
|
@ -147,17 +347,46 @@ program reduction_3
|
|||
lvresult = lvresult .eqv. (array(i) .ge. 5)
|
||||
end do
|
||||
|
||||
if (result.ne.vresult) call abort
|
||||
|
||||
lresult = .false.
|
||||
lvresult = .false.
|
||||
if (lrg .neqv. lvresult) call abort
|
||||
if (lrw .neqv. lvresult) call abort
|
||||
if (lrv .neqv. lvresult) call abort
|
||||
if (lrc .neqv. lvresult) call abort
|
||||
|
||||
!
|
||||
! '.neqv.' reductions
|
||||
!
|
||||
|
||||
!$acc parallel vector_length(vl) num_gangs(1) copy(lresult)
|
||||
!$acc loop reduction(.neqv.:lresult)
|
||||
lrg = .true.
|
||||
lrw = .true.
|
||||
lrv = .true.
|
||||
lrc = .true.
|
||||
lvresult = .true.
|
||||
|
||||
!$acc parallel num_gangs(ng) copy(lrg)
|
||||
!$acc loop reduction(.neqv.:lrg) gang
|
||||
do i = 1, n
|
||||
lresult = lresult .neqv. (array(i) .ge. 5)
|
||||
lrg = lrg .neqv. (array(i) .ge. 5)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_workers(nw) copy(lrw)
|
||||
!$acc loop reduction(.neqv.:lrw) worker
|
||||
do i = 1, n
|
||||
lrw = lrw .neqv. (array(i) .ge. 5)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel vector_length(vl) copy(lrv)
|
||||
!$acc loop reduction(.neqv.:lrv) vector
|
||||
do i = 1, n
|
||||
lrv = lrv .neqv. (array(i) .ge. 5)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc)
|
||||
!$acc loop reduction(.neqv.:lrc) gang worker vector
|
||||
do i = 1, n
|
||||
lrc = lrc .neqv. (array(i) .ge. 5)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
|
@ -166,5 +395,8 @@ program reduction_3
|
|||
lvresult = lvresult .neqv. (array(i) .ge. 5)
|
||||
end do
|
||||
|
||||
if (result.ne.vresult) call abort
|
||||
if (lrg .neqv. lvresult) call abort
|
||||
if (lrw .neqv. lvresult) call abort
|
||||
if (lrv .neqv. lvresult) call abort
|
||||
if (lrc .neqv. lvresult) call abort
|
||||
end program reduction_3
|
||||
|
|
|
@ -5,50 +5,108 @@
|
|||
program reduction_4
|
||||
implicit none
|
||||
|
||||
integer, parameter :: n = 10, vl = 32
|
||||
integer, parameter :: n = 10, ng = 8, nw = 4, vl = 32
|
||||
integer :: i
|
||||
complex :: vresult, result
|
||||
real :: vresult, rg, rw, rv, rc
|
||||
complex, dimension (n) :: array
|
||||
|
||||
do i = 1, n
|
||||
array(i) = i
|
||||
end do
|
||||
|
||||
result = 0
|
||||
!
|
||||
! '+' reductions
|
||||
!
|
||||
|
||||
rg = 0
|
||||
rw = 0
|
||||
rv = 0
|
||||
rc = 0
|
||||
vresult = 0
|
||||
|
||||
! '+' reductions
|
||||
|
||||
!$acc parallel vector_length(vl) num_gangs(1) copy(result)
|
||||
!$acc loop reduction(+:result)
|
||||
!$acc parallel num_gangs(ng) copy(rg)
|
||||
!$acc loop reduction(+:rg) gang
|
||||
do i = 1, n
|
||||
result = result + array(i)
|
||||
rg = rg + REAL(array(i))
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_workers(nw) copy(rw)
|
||||
!$acc loop reduction(+:rw) worker
|
||||
do i = 1, n
|
||||
rw = rw + REAL(array(i))
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel vector_length(vl) copy(rv)
|
||||
!$acc loop reduction(+:rv) vector
|
||||
do i = 1, n
|
||||
rv = rv + REAL(array(i))
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
|
||||
!$acc loop reduction(+:rc) gang worker vector
|
||||
do i = 1, n
|
||||
rc = rc + REAL(array(i))
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
! Verify the results
|
||||
do i = 1, n
|
||||
vresult = vresult + array(i)
|
||||
vresult = vresult + REAL(array(i))
|
||||
end do
|
||||
|
||||
if (result .ne. vresult) call abort
|
||||
if (rg .ne. vresult) call abort
|
||||
if (rw .ne. vresult) call abort
|
||||
if (rv .ne. vresult) call abort
|
||||
if (rc .ne. vresult) call abort
|
||||
|
||||
result = 1
|
||||
!
|
||||
! '*' reductions
|
||||
!
|
||||
|
||||
rg = 1
|
||||
rw = 1
|
||||
rv = 1
|
||||
rc = 1
|
||||
vresult = 1
|
||||
|
||||
! ! '*' reductions
|
||||
!
|
||||
! !$acc parallel vector_length(vl)
|
||||
! !$acc loop reduction(*:result)
|
||||
! do i = 1, n
|
||||
! result = result * array(i)
|
||||
! end do
|
||||
! !$acc end parallel
|
||||
!
|
||||
! ! Verify the results
|
||||
! do i = 1, n
|
||||
! vresult = vresult * array(i)
|
||||
! end do
|
||||
!
|
||||
! if (result.ne.vresult) call abort
|
||||
!$acc parallel num_gangs(ng) copy(rg)
|
||||
!$acc loop reduction(*:rg) gang
|
||||
do i = 1, n
|
||||
rg = rg * REAL(array(i))
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_workers(nw) copy(rw)
|
||||
!$acc loop reduction(*:rw) worker
|
||||
do i = 1, n
|
||||
rw = rw * REAL(array(i))
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel vector_length(vl) copy(rv)
|
||||
!$acc loop reduction(*:rv) vector
|
||||
do i = 1, n
|
||||
rv = rv * REAL(array(i))
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
|
||||
!$acc loop reduction(*:rc) gang worker vector
|
||||
do i = 1, n
|
||||
rc = rc * REAL(array(i))
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
! Verify the results
|
||||
do i = 1, n
|
||||
vresult = vresult * REAL(array(i))
|
||||
end do
|
||||
|
||||
if (rg .ne. vresult) call abort
|
||||
if (rw .ne. vresult) call abort
|
||||
if (rv .ne. vresult) call abort
|
||||
if (rc .ne. vresult) call abort
|
||||
end program reduction_4
|
||||
|
|
|
@ -1,12 +1,17 @@
|
|||
! { dg-do run }
|
||||
! { dg-additional-options "-w" }
|
||||
|
||||
! subroutine reduction
|
||||
|
||||
program reduction
|
||||
integer, parameter :: n = 40, c = 10
|
||||
integer :: i, vsum, sum
|
||||
integer :: i, vsum, gs, ws, vs, cs, ns
|
||||
|
||||
call redsub (sum, n, c)
|
||||
call redsub_gang (gs, n, c)
|
||||
call redsub_worker (ws, n, c)
|
||||
call redsub_vector (vs, n, c)
|
||||
call redsub_combined (cs, n, c)
|
||||
call redsub_nested (ns, n, c)
|
||||
|
||||
vsum = 0
|
||||
|
||||
|
@ -15,21 +20,80 @@ program reduction
|
|||
vsum = vsum + c
|
||||
end do
|
||||
|
||||
if (sum.ne.vsum) call abort ()
|
||||
if (gs .ne. vsum) call abort ()
|
||||
if (ws .ne. vsum) call abort ()
|
||||
if (vs .ne. vsum) call abort ()
|
||||
if (cs .ne. vsum) call abort ()
|
||||
if (ns .ne. vsum) call abort ()
|
||||
end program reduction
|
||||
|
||||
subroutine redsub(sum, n, c)
|
||||
subroutine redsub_gang(sum, n, c)
|
||||
integer :: sum, n, c
|
||||
|
||||
integer :: s
|
||||
s = 0
|
||||
sum = 0
|
||||
|
||||
!$acc parallel vector_length(32) copyin (n, c) copy (s) num_gangs(1)
|
||||
!$acc loop reduction(+:s)
|
||||
!$acc parallel copyin (n, c) num_gangs(n) copy(sum)
|
||||
!$acc loop reduction(+:sum) gang
|
||||
do i = 1, n
|
||||
s = s + c
|
||||
sum = sum + c
|
||||
end do
|
||||
!$acc end parallel
|
||||
end subroutine redsub_gang
|
||||
|
||||
sum = s
|
||||
end subroutine redsub
|
||||
subroutine redsub_worker(sum, n, c)
|
||||
integer :: sum, n, c
|
||||
|
||||
sum = 0
|
||||
|
||||
!$acc parallel copyin (n, c) num_workers(4) vector_length (32) copy(sum)
|
||||
!$acc loop reduction(+:sum) worker
|
||||
do i = 1, n
|
||||
sum = sum + c
|
||||
end do
|
||||
!$acc end parallel
|
||||
end subroutine redsub_worker
|
||||
|
||||
subroutine redsub_vector(sum, n, c)
|
||||
integer :: sum, n, c
|
||||
|
||||
sum = 0
|
||||
|
||||
!$acc parallel copyin (n, c) vector_length(32) copy(sum)
|
||||
!$acc loop reduction(+:sum) vector
|
||||
do i = 1, n
|
||||
sum = sum + c
|
||||
end do
|
||||
!$acc end parallel
|
||||
end subroutine redsub_vector
|
||||
|
||||
subroutine redsub_combined(sum, n, c)
|
||||
integer :: sum, n, c
|
||||
|
||||
sum = 0
|
||||
|
||||
!$acc parallel num_gangs (8) num_workers (4) vector_length(32) copy(sum)
|
||||
!$acc loop reduction(+:sum) gang worker vector
|
||||
do i = 1, n
|
||||
sum = sum + c
|
||||
end do
|
||||
!$acc end parallel
|
||||
end subroutine redsub_combined
|
||||
|
||||
subroutine redsub_nested(sum, n, c)
|
||||
integer :: sum, n, c
|
||||
integer :: ii, jj
|
||||
|
||||
ii = n / 10;
|
||||
jj = 10;
|
||||
sum = 0
|
||||
|
||||
!$acc parallel num_gangs (8) copy(sum)
|
||||
!$acc loop reduction(+:sum) gang
|
||||
do i = 1, ii
|
||||
!$acc loop reduction(+:sum) vector
|
||||
do j = 1, jj
|
||||
sum = sum + c
|
||||
end do
|
||||
end do
|
||||
!$acc end parallel
|
||||
end subroutine redsub_nested
|
||||
|
|
|
@ -1,30 +1,94 @@
|
|||
! { dg-do run }
|
||||
! { dg-additional-options "-cpp -w" }
|
||||
|
||||
program reduction
|
||||
implicit none
|
||||
|
||||
integer, parameter :: n = 100
|
||||
integer :: i, s1, s2, vs1, vs2
|
||||
integer, parameter :: n = 100, n2 = 1000, chunksize = 10
|
||||
integer :: i, gs1, gs2, ws1, ws2, vs1, vs2, cs1, cs2, hs1, hs2
|
||||
integer :: j, red, vred
|
||||
|
||||
s1 = 0
|
||||
s2 = 0
|
||||
gs1 = 0
|
||||
gs2 = 0
|
||||
ws1 = 0
|
||||
ws2 = 0
|
||||
vs1 = 0
|
||||
vs2 = 0
|
||||
cs1 = 0
|
||||
cs2 = 0
|
||||
hs1 = 0
|
||||
hs2 = 0
|
||||
|
||||
!$acc parallel vector_length (32) copy(s1, s2)
|
||||
!$acc loop reduction(+:s1, s2)
|
||||
!$acc parallel num_gangs (1000) copy(gs1, gs2)
|
||||
!$acc loop reduction(+:gs1, gs2) gang
|
||||
do i = 1, n
|
||||
s1 = s1 + 1
|
||||
s2 = s2 + 2
|
||||
gs1 = gs1 + 1
|
||||
gs2 = gs2 + 2
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
! Verify the results
|
||||
!$acc parallel num_workers (4) vector_length (32) copy(ws1, ws2)
|
||||
!$acc loop reduction(+:ws1, ws2) worker
|
||||
do i = 1, n
|
||||
ws1 = ws1 + 1
|
||||
ws2 = ws2 + 2
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel vector_length (32) copy(vs1, vs2)
|
||||
!$acc loop reduction(+:vs1, vs2) vector
|
||||
do i = 1, n
|
||||
vs1 = vs1 + 1
|
||||
vs2 = vs2 + 2
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
if (s1.ne.vs1) call abort ()
|
||||
if (s2.ne.vs2) call abort ()
|
||||
!$acc parallel num_gangs(8) num_workers(4) vector_length(32) copy(cs1, cs2)
|
||||
!$acc loop reduction(+:cs1, cs2) gang worker vector
|
||||
do i = 1, n
|
||||
cs1 = cs1 + 1
|
||||
cs2 = cs2 + 2
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
! Verify the results on the host
|
||||
do i = 1, n
|
||||
hs1 = hs1 + 1
|
||||
hs2 = hs2 + 2
|
||||
end do
|
||||
|
||||
if (gs1 .ne. hs1) call abort ()
|
||||
if (gs2 .ne. hs2) call abort ()
|
||||
|
||||
if (ws1 .ne. hs1) call abort ()
|
||||
if (ws2 .ne. hs2) call abort ()
|
||||
|
||||
if (vs1 .ne. hs1) call abort ()
|
||||
if (vs2 .ne. hs2) call abort ()
|
||||
|
||||
if (cs1 .ne. hs1) call abort ()
|
||||
if (cs2 .ne. hs2) call abort ()
|
||||
|
||||
! Nested reductions.
|
||||
|
||||
red = 0
|
||||
vred = 0
|
||||
|
||||
!$acc parallel num_gangs(10) vector_length(32) copy(red)
|
||||
!$acc loop reduction(+:red) gang
|
||||
do i = 1, n/chunksize
|
||||
!$acc loop reduction(+:red) vector
|
||||
do j = 1, chunksize
|
||||
red = red + chunksize
|
||||
end do
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
do i = 1, n/chunksize
|
||||
do j = 1, chunksize
|
||||
vred = vred + chunksize
|
||||
end do
|
||||
end do
|
||||
|
||||
if (red .ne. vred) call abort ()
|
||||
end program reduction
|
||||
|
|
88
libgomp/testsuite/libgomp.oacc-fortran/reduction-7.f90
Normal file
88
libgomp/testsuite/libgomp.oacc-fortran/reduction-7.f90
Normal file
|
@ -0,0 +1,88 @@
|
|||
! { dg-do run }
|
||||
! { dg-additional-options "-w" }
|
||||
|
||||
! subroutine reduction with private and firstprivate variables
|
||||
|
||||
program reduction
|
||||
integer, parameter :: n = 100
|
||||
integer :: i, j, vsum, cs, arr(n)
|
||||
|
||||
call redsub_private (cs, n, arr)
|
||||
call redsub_bogus (cs, n)
|
||||
call redsub_combined (cs, n, arr)
|
||||
|
||||
vsum = 0
|
||||
|
||||
! Verify the results
|
||||
do i = 1, n
|
||||
vsum = i
|
||||
do j = 1, n
|
||||
vsum = vsum + 1;
|
||||
end do
|
||||
if (vsum .ne. arr(i)) call abort ()
|
||||
end do
|
||||
end program reduction
|
||||
|
||||
! This subroutine tests a reduction with an explicit private variable.
|
||||
|
||||
subroutine redsub_private(sum, n, arr)
|
||||
integer :: sum, n, arr(n)
|
||||
integer :: i, j, v
|
||||
|
||||
!$acc parallel copyout (arr)
|
||||
!$acc loop gang private (v)
|
||||
do j = 1, n
|
||||
v = j
|
||||
|
||||
!$acc loop vector reduction (+:v)
|
||||
do i = 1, 100
|
||||
v = v + 1
|
||||
end do
|
||||
|
||||
arr(j) = v
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
! verify the results
|
||||
do i = 1, 10
|
||||
if (arr(i) .ne. 100+i) call abort ()
|
||||
end do
|
||||
end subroutine redsub_private
|
||||
|
||||
|
||||
! Bogus reduction on an impliclitly firstprivate variable. The results do
|
||||
! survive the parallel region. The goal here is to ensure that gfortran
|
||||
! doesn't ICE.
|
||||
|
||||
subroutine redsub_bogus(sum, n)
|
||||
integer :: sum, n, arr(n)
|
||||
integer :: i
|
||||
|
||||
!$acc parallel
|
||||
!$acc loop gang worker vector reduction (+:sum)
|
||||
do i = 1, n
|
||||
sum = sum + 1
|
||||
end do
|
||||
!$acc end parallel
|
||||
end subroutine redsub_bogus
|
||||
|
||||
! This reduction involving a firstprivate variable yields legitimate results.
|
||||
|
||||
subroutine redsub_combined(sum, n, arr)
|
||||
integer :: sum, n, arr(n)
|
||||
integer :: i, j
|
||||
|
||||
!$acc parallel copy (arr)
|
||||
!$acc loop gang
|
||||
do i = 1, n
|
||||
sum = i;
|
||||
|
||||
!$acc loop reduction(+:sum)
|
||||
do j = 1, n
|
||||
sum = sum + 1
|
||||
end do
|
||||
|
||||
arr(i) = sum
|
||||
end do
|
||||
!$acc end parallel
|
||||
end subroutine redsub_combined
|
Loading…
Add table
Reference in a new issue