Decompose OpenACC 'kernels' constructs into parts, a sequence of compute constructs
Not yet enabled by default: for now, the current mode of OpenACC 'kernels' constructs handling still remains '-fopenacc-kernels=parloops', but that is to change later. gcc/ * omp-oacc-kernels-decompose.cc: New. * Makefile.in (OBJS): Add it. * passes.def: Instantiate it. * tree-pass.h (make_pass_omp_oacc_kernels_decompose): Declare. * flag-types.h (enum openacc_kernels): Add. * doc/invoke.texi (-fopenacc-kernels): Document. * gimple.h (enum gf_mask): Add 'GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED', 'GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE', 'GF_OMP_TARGET_KIND_OACC_DATA_KERNELS'. (is_gimple_omp_oacc, is_gimple_omp_offloaded): Handle these. * gimple-pretty-print.c (dump_gimple_omp_target): Likewise. * omp-expand.c (expand_omp_target, build_omp_regions_1) (omp_make_gimple_edges): Likewise. * omp-low.c (scan_sharing_clauses, scan_omp_for) (check_omp_nesting_restrictions, lower_oacc_reductions) (lower_oacc_head_mark, lower_omp_target): Likewise. * omp-offload.c (execute_oacc_device_lower): Likewise. gcc/c-family/ * c.opt (fopenacc-kernels): Add. gcc/fortran/ * lang.opt (fopenacc-kernels): Add. gcc/testsuite/ * c-c++-common/goacc/kernels-decompose-1.c: New. * c-c++-common/goacc/kernels-decompose-2.c: New. * c-c++-common/goacc/kernels-decompose-ice-1.c: New. * c-c++-common/goacc/kernels-decompose-ice-2.c: New. * gfortran.dg/goacc/kernels-decompose-1.f95: New. * gfortran.dg/goacc/kernels-decompose-2.f95: New. * c-c++-common/goacc/if-clause-2.c: Adjust. * gfortran.dg/goacc/kernels-tree.f95: Likewise. libgomp/ * testsuite/libgomp.oacc-c-c++-common/declare-vla-kernels-decompose-ice-1.c: New. * testsuite/libgomp.oacc-c-c++-common/declare-vla-kernels-decompose.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-decompose-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/declare-vla.c: Adjust. * testsuite/libgomp.oacc-fortran/pr94358-1.f90: Likewise. Co-authored-by: Thomas Schwinge <thomas@codesourcery.com>
This commit is contained in:
parent
bd78857554
commit
e898ce7997
26 changed files with 2355 additions and 16 deletions
|
@ -1480,6 +1480,7 @@ OBJS = \
|
|||
omp-expand.o \
|
||||
omp-general.o \
|
||||
omp-low.o \
|
||||
omp-oacc-kernels-decompose.o \
|
||||
omp-simd-clone.o \
|
||||
opt-problem.o \
|
||||
optabs.o \
|
||||
|
|
|
@ -1796,6 +1796,19 @@ fopenacc-dim=
|
|||
C ObjC C++ ObjC++ LTO Joined Var(flag_openacc_dims)
|
||||
Specify default OpenACC compute dimensions.
|
||||
|
||||
fopenacc-kernels=
|
||||
C ObjC C++ ObjC++ RejectNegative Joined Enum(openacc_kernels) Var(flag_openacc_kernels) Init(OPENACC_KERNELS_PARLOOPS)
|
||||
-fopenacc-kernels=[decompose|parloops] Specify mode of OpenACC 'kernels' constructs handling.
|
||||
|
||||
Enum
|
||||
Name(openacc_kernels) Type(enum openacc_kernels)
|
||||
|
||||
EnumValue
|
||||
Enum(openacc_kernels) String(decompose) Value(OPENACC_KERNELS_DECOMPOSE)
|
||||
|
||||
EnumValue
|
||||
Enum(openacc_kernels) String(parloops) Value(OPENACC_KERNELS_PARLOOPS)
|
||||
|
||||
fopenmp
|
||||
C ObjC C++ ObjC++ LTO Var(flag_openmp)
|
||||
Enable OpenMP (implies -frecursive in Fortran).
|
||||
|
|
|
@ -201,7 +201,7 @@ in the following sections.
|
|||
-aux-info @var{filename} -fallow-parameterless-variadic-functions @gol
|
||||
-fno-asm -fno-builtin -fno-builtin-@var{function} -fgimple@gol
|
||||
-fhosted -ffreestanding @gol
|
||||
-fopenacc -fopenacc-dim=@var{geom} @gol
|
||||
-fopenacc -fopenacc-dim=@var{geom} -fopenacc-kernels=@var{mode} @gol
|
||||
-fopenmp -fopenmp-simd @gol
|
||||
-fms-extensions -fplan9-extensions -fsso-struct=@var{endianness} @gol
|
||||
-fallow-single-precision -fcond-mismatch -flax-vector-conversions @gol
|
||||
|
@ -2589,6 +2589,18 @@ not explicitly specify. The @var{geom} value is a triple of
|
|||
':'-separated sizes, in order 'gang', 'worker' and, 'vector'. A size
|
||||
can be omitted, to use a target-specific default value.
|
||||
|
||||
@item -fopenacc-kernels=@var{mode}
|
||||
@opindex fopenacc-kernels
|
||||
@cindex OpenACC accelerator programming
|
||||
Specify mode of OpenACC `kernels' constructs handling.
|
||||
With @option{-fopenacc-kernels=decompose}, OpenACC `kernels'
|
||||
constructs are decomposed into parts, a sequence of compute
|
||||
constructs, each then handled individually.
|
||||
This is work in progress.
|
||||
With @option{-fopenacc-kernels=parloops}, OpenACC `kernels' constructs
|
||||
are handled by the @samp{parloops} pass, en bloc.
|
||||
This is the current default.
|
||||
|
||||
@item -fopenmp
|
||||
@opindex fopenmp
|
||||
@cindex OpenMP parallel
|
||||
|
|
|
@ -415,6 +415,13 @@ enum evrp_mode
|
|||
EVRP_MODE_RVRP_DEBUG = EVRP_MODE_RVRP_ONLY | EVRP_MODE_DEBUG
|
||||
};
|
||||
|
||||
/* Modes of OpenACC 'kernels' constructs handling. */
|
||||
enum openacc_kernels
|
||||
{
|
||||
OPENACC_KERNELS_DECOMPOSE,
|
||||
OPENACC_KERNELS_PARLOOPS
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* ! GCC_FLAG_TYPES_H */
|
||||
|
|
|
@ -687,6 +687,10 @@ fopenacc-dim=
|
|||
Fortran LTO Joined Var(flag_openacc_dims)
|
||||
; Documented in C
|
||||
|
||||
fopenacc-kernels=
|
||||
Fortran RejectNegative Joined Enum(openacc_kernels) Var(flag_openacc_kernels) Init(OPENACC_KERNELS_PARLOOPS)
|
||||
; Documented in C
|
||||
|
||||
fopenmp
|
||||
Fortran LTO
|
||||
; Documented in C
|
||||
|
|
|
@ -1700,6 +1700,15 @@ dump_gimple_omp_target (pretty_printer *buffer, const gomp_target *gs,
|
|||
case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
|
||||
kind = " oacc_host_data";
|
||||
break;
|
||||
case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
|
||||
kind = " oacc_parallel_kernels_parallelized";
|
||||
break;
|
||||
case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
|
||||
kind = " oacc_parallel_kernels_gang_single";
|
||||
break;
|
||||
case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
|
||||
kind = " oacc_data_kernels";
|
||||
break;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
|
14
gcc/gimple.h
14
gcc/gimple.h
|
@ -175,6 +175,15 @@ enum gf_mask {
|
|||
GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA = 10,
|
||||
GF_OMP_TARGET_KIND_OACC_DECLARE = 11,
|
||||
GF_OMP_TARGET_KIND_OACC_HOST_DATA = 12,
|
||||
/* A 'GF_OMP_TARGET_KIND_OACC_PARALLEL' representing an OpenACC 'kernels'
|
||||
decomposed part, parallelized. */
|
||||
GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED = 13,
|
||||
/* A 'GF_OMP_TARGET_KIND_OACC_PARALLEL' representing an OpenACC 'kernels'
|
||||
decomposed part, "gang-single". */
|
||||
GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE = 14,
|
||||
/* A 'GF_OMP_TARGET_KIND_OACC_DATA' representing an OpenACC 'kernels'
|
||||
decomposed parts' 'data' construct. */
|
||||
GF_OMP_TARGET_KIND_OACC_DATA_KERNELS = 15,
|
||||
GF_OMP_TEAMS_HOST = 1 << 0,
|
||||
|
||||
/* True on an GIMPLE_OMP_RETURN statement if the return does not require
|
||||
|
@ -6511,6 +6520,9 @@ is_gimple_omp_oacc (const gimple *stmt)
|
|||
case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
|
||||
case GF_OMP_TARGET_KIND_OACC_DECLARE:
|
||||
case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
|
||||
case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
|
||||
case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
|
||||
case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
@ -6536,6 +6548,8 @@ is_gimple_omp_offloaded (const gimple *stmt)
|
|||
case GF_OMP_TARGET_KIND_OACC_PARALLEL:
|
||||
case GF_OMP_TARGET_KIND_OACC_KERNELS:
|
||||
case GF_OMP_TARGET_KIND_OACC_SERIAL:
|
||||
case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
|
||||
case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
|
|
@ -9257,11 +9257,14 @@ expand_omp_target (struct omp_region *region)
|
|||
case GF_OMP_TARGET_KIND_OACC_UPDATE:
|
||||
case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
|
||||
case GF_OMP_TARGET_KIND_OACC_DECLARE:
|
||||
case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
|
||||
case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
|
||||
data_region = false;
|
||||
break;
|
||||
case GF_OMP_TARGET_KIND_DATA:
|
||||
case GF_OMP_TARGET_KIND_OACC_DATA:
|
||||
case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
|
||||
case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
|
||||
data_region = true;
|
||||
break;
|
||||
default:
|
||||
|
@ -9307,6 +9310,16 @@ expand_omp_target (struct omp_region *region)
|
|||
= tree_cons (get_identifier ("oacc serial"),
|
||||
NULL_TREE, DECL_ATTRIBUTES (child_fn));
|
||||
break;
|
||||
case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
|
||||
DECL_ATTRIBUTES (child_fn)
|
||||
= tree_cons (get_identifier ("oacc parallel_kernels_parallelized"),
|
||||
NULL_TREE, DECL_ATTRIBUTES (child_fn));
|
||||
break;
|
||||
case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
|
||||
DECL_ATTRIBUTES (child_fn)
|
||||
= tree_cons (get_identifier ("oacc parallel_kernels_gang_single"),
|
||||
NULL_TREE, DECL_ATTRIBUTES (child_fn));
|
||||
break;
|
||||
default:
|
||||
/* Make sure we don't miss any. */
|
||||
gcc_checking_assert (!(is_gimple_omp_oacc (entry_stmt)
|
||||
|
@ -9517,10 +9530,13 @@ expand_omp_target (struct omp_region *region)
|
|||
case GF_OMP_TARGET_KIND_OACC_PARALLEL:
|
||||
case GF_OMP_TARGET_KIND_OACC_KERNELS:
|
||||
case GF_OMP_TARGET_KIND_OACC_SERIAL:
|
||||
case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
|
||||
case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
|
||||
start_ix = BUILT_IN_GOACC_PARALLEL;
|
||||
break;
|
||||
case GF_OMP_TARGET_KIND_OACC_DATA:
|
||||
case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
|
||||
case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
|
||||
start_ix = BUILT_IN_GOACC_DATA_START;
|
||||
break;
|
||||
case GF_OMP_TARGET_KIND_OACC_UPDATE:
|
||||
|
@ -9993,6 +10009,9 @@ build_omp_regions_1 (basic_block bb, struct omp_region *parent,
|
|||
case GF_OMP_TARGET_KIND_OACC_SERIAL:
|
||||
case GF_OMP_TARGET_KIND_OACC_DATA:
|
||||
case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
|
||||
case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
|
||||
case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
|
||||
case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
|
||||
break;
|
||||
case GF_OMP_TARGET_KIND_UPDATE:
|
||||
case GF_OMP_TARGET_KIND_ENTER_DATA:
|
||||
|
@ -10247,6 +10266,9 @@ omp_make_gimple_edges (basic_block bb, struct omp_region **region,
|
|||
case GF_OMP_TARGET_KIND_OACC_SERIAL:
|
||||
case GF_OMP_TARGET_KIND_OACC_DATA:
|
||||
case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
|
||||
case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
|
||||
case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
|
||||
case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
|
||||
break;
|
||||
case GF_OMP_TARGET_KIND_UPDATE:
|
||||
case GF_OMP_TARGET_KIND_ENTER_DATA:
|
||||
|
|
|
@ -193,8 +193,8 @@ static tree scan_omp_1_op (tree *, int *, void *);
|
|||
*handled_ops_p = false; \
|
||||
break;
|
||||
|
||||
/* Return true if CTX corresponds to an OpenACC 'parallel' or 'serial'
|
||||
region. */
|
||||
/* Return whether CTX represents an OpenACC 'parallel' or 'serial' construct.
|
||||
(This doesn't include OpenACC 'kernels' decomposed parts.) */
|
||||
|
||||
static bool
|
||||
is_oacc_parallel_or_serial (omp_context *ctx)
|
||||
|
@ -207,7 +207,8 @@ is_oacc_parallel_or_serial (omp_context *ctx)
|
|||
== GF_OMP_TARGET_KIND_OACC_SERIAL)));
|
||||
}
|
||||
|
||||
/* Return true if CTX corresponds to an oacc kernels region. */
|
||||
/* Return whether CTX represents an OpenACC 'kernels' construct.
|
||||
(This doesn't include OpenACC 'kernels' decomposed parts.) */
|
||||
|
||||
static bool
|
||||
is_oacc_kernels (omp_context *ctx)
|
||||
|
@ -218,6 +219,21 @@ is_oacc_kernels (omp_context *ctx)
|
|||
== GF_OMP_TARGET_KIND_OACC_KERNELS));
|
||||
}
|
||||
|
||||
/* Return whether CTX represents an OpenACC 'kernels' decomposed part. */
|
||||
|
||||
static bool
|
||||
is_oacc_kernels_decomposed_part (omp_context *ctx)
|
||||
{
|
||||
enum gimple_code outer_type = gimple_code (ctx->stmt);
|
||||
return ((outer_type == GIMPLE_OMP_TARGET)
|
||||
&& ((gimple_omp_target_kind (ctx->stmt)
|
||||
== GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED)
|
||||
|| (gimple_omp_target_kind (ctx->stmt)
|
||||
== GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE)
|
||||
|| (gimple_omp_target_kind (ctx->stmt)
|
||||
== GF_OMP_TARGET_KIND_OACC_DATA_KERNELS)));
|
||||
}
|
||||
|
||||
/* Return true if STMT corresponds to an OpenMP target region. */
|
||||
static bool
|
||||
is_omp_target (gimple *stmt)
|
||||
|
@ -1200,6 +1216,8 @@ scan_sharing_clauses (tree clauses, omp_context *ctx)
|
|||
{
|
||||
/* No 'reduction' clauses on OpenACC 'kernels'. */
|
||||
gcc_checking_assert (!is_oacc_kernels (ctx));
|
||||
/* Likewise, on OpenACC 'kernels' decomposed parts. */
|
||||
gcc_checking_assert (!is_oacc_kernels_decomposed_part (ctx));
|
||||
|
||||
ctx->local_reduction_clauses
|
||||
= tree_cons (NULL, c, ctx->local_reduction_clauses);
|
||||
|
@ -2415,7 +2433,9 @@ enclosing_target_ctx (omp_context *ctx)
|
|||
return ctx;
|
||||
}
|
||||
|
||||
/* Return true if ctx is part of an oacc kernels region. */
|
||||
/* Return whether CTX's parent compute construct is an OpenACC 'kernels'
|
||||
construct.
|
||||
(This doesn't include OpenACC 'kernels' decomposed parts.) */
|
||||
|
||||
static bool
|
||||
ctx_in_oacc_kernels_region (omp_context *ctx)
|
||||
|
@ -2431,7 +2451,8 @@ ctx_in_oacc_kernels_region (omp_context *ctx)
|
|||
return false;
|
||||
}
|
||||
|
||||
/* Check the parallelism clauses inside a kernels regions.
|
||||
/* Check the parallelism clauses inside a OpenACC 'kernels' region.
|
||||
(This doesn't include OpenACC 'kernels' decomposed parts.)
|
||||
Until kernels handling moves to use the same loop indirection
|
||||
scheme as parallel, we need to do this checking early. */
|
||||
|
||||
|
@ -2533,6 +2554,10 @@ scan_omp_for (gomp_for *stmt, omp_context *outer_ctx)
|
|||
|
||||
if (c_op0)
|
||||
{
|
||||
/* By construction, this is impossible for OpenACC 'kernels'
|
||||
decomposed parts. */
|
||||
gcc_assert (!(tgt && is_oacc_kernels_decomposed_part (tgt)));
|
||||
|
||||
error_at (OMP_CLAUSE_LOCATION (c),
|
||||
"argument not permitted on %qs clause",
|
||||
omp_clause_code_name[OMP_CLAUSE_CODE (c)]);
|
||||
|
@ -3070,6 +3095,8 @@ check_omp_nesting_restrictions (gimple *stmt, omp_context *ctx)
|
|||
case GF_OMP_TARGET_KIND_OACC_PARALLEL:
|
||||
case GF_OMP_TARGET_KIND_OACC_KERNELS:
|
||||
case GF_OMP_TARGET_KIND_OACC_SERIAL:
|
||||
case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
|
||||
case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
|
||||
ok = true;
|
||||
break;
|
||||
|
||||
|
@ -3526,6 +3553,11 @@ check_omp_nesting_restrictions (gimple *stmt, omp_context *ctx)
|
|||
case GF_OMP_TARGET_KIND_OACC_DECLARE: stmt_name = "declare"; break;
|
||||
case GF_OMP_TARGET_KIND_OACC_HOST_DATA: stmt_name = "host_data";
|
||||
break;
|
||||
case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
|
||||
case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
|
||||
case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
|
||||
/* OpenACC 'kernels' decomposed parts. */
|
||||
stmt_name = "kernels"; break;
|
||||
default: gcc_unreachable ();
|
||||
}
|
||||
switch (gimple_omp_target_kind (ctx->stmt))
|
||||
|
@ -3541,6 +3573,11 @@ check_omp_nesting_restrictions (gimple *stmt, omp_context *ctx)
|
|||
case GF_OMP_TARGET_KIND_OACC_DATA: ctx_stmt_name = "data"; break;
|
||||
case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
|
||||
ctx_stmt_name = "host_data"; break;
|
||||
case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
|
||||
case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
|
||||
case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
|
||||
/* OpenACC 'kernels' decomposed parts. */
|
||||
ctx_stmt_name = "kernels"; break;
|
||||
default: gcc_unreachable ();
|
||||
}
|
||||
|
||||
|
@ -6930,6 +6967,8 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
|
|||
{
|
||||
/* No 'reduction' clauses on OpenACC 'kernels'. */
|
||||
gcc_checking_assert (!is_oacc_kernels (ctx));
|
||||
/* Likewise, on OpenACC 'kernels' decomposed parts. */
|
||||
gcc_checking_assert (!is_oacc_kernels_decomposed_part (ctx));
|
||||
|
||||
tree orig = OMP_CLAUSE_DECL (c);
|
||||
tree var = maybe_lookup_decl (orig, ctx);
|
||||
|
@ -7785,6 +7824,8 @@ lower_oacc_head_mark (location_t loc, tree ddvar, tree clauses,
|
|||
else if (is_oacc_kernels (tgt))
|
||||
/* Not using this loops handling inside OpenACC 'kernels' regions. */
|
||||
gcc_unreachable ();
|
||||
else if (is_oacc_kernels_decomposed_part (tgt))
|
||||
;
|
||||
else
|
||||
gcc_unreachable ();
|
||||
|
||||
|
@ -7792,6 +7833,14 @@ lower_oacc_head_mark (location_t loc, tree ddvar, tree clauses,
|
|||
if (!tgt || is_oacc_parallel_or_serial (tgt))
|
||||
tag |= OLF_INDEPENDENT;
|
||||
|
||||
/* Loops inside OpenACC 'kernels' decomposed parts' regions are expected to
|
||||
have an explicit 'seq' or 'independent' clause, and no 'auto' clause. */
|
||||
if (tgt && is_oacc_kernels_decomposed_part (tgt))
|
||||
{
|
||||
gcc_assert (tag & (OLF_SEQ | OLF_INDEPENDENT));
|
||||
gcc_assert (!(tag & OLF_AUTO));
|
||||
}
|
||||
|
||||
if (tag & OLF_TILE)
|
||||
/* Tiling could use all 3 levels. */
|
||||
levels = 3;
|
||||
|
@ -11639,11 +11688,14 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
|
|||
case GF_OMP_TARGET_KIND_OACC_UPDATE:
|
||||
case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
|
||||
case GF_OMP_TARGET_KIND_OACC_DECLARE:
|
||||
case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
|
||||
case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
|
||||
data_region = false;
|
||||
break;
|
||||
case GF_OMP_TARGET_KIND_DATA:
|
||||
case GF_OMP_TARGET_KIND_OACC_DATA:
|
||||
case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
|
||||
case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
|
||||
data_region = true;
|
||||
break;
|
||||
default:
|
||||
|
@ -11829,6 +11881,8 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
|
|||
{
|
||||
/* No 'firstprivate' clauses on OpenACC 'kernels'. */
|
||||
gcc_checking_assert (!is_oacc_kernels (ctx));
|
||||
/* Likewise, on OpenACC 'kernels' decomposed parts. */
|
||||
gcc_checking_assert (!is_oacc_kernels_decomposed_part (ctx));
|
||||
|
||||
goto oacc_firstprivate;
|
||||
}
|
||||
|
@ -11861,6 +11915,8 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
|
|||
{
|
||||
/* No 'private' clauses on OpenACC 'kernels'. */
|
||||
gcc_checking_assert (!is_oacc_kernels (ctx));
|
||||
/* Likewise, on OpenACC 'kernels' decomposed parts. */
|
||||
gcc_checking_assert (!is_oacc_kernels_decomposed_part (ctx));
|
||||
|
||||
break;
|
||||
}
|
||||
|
|
1531
gcc/omp-oacc-kernels-decompose.cc
Normal file
1531
gcc/omp-oacc-kernels-decompose.cc
Normal file
File diff suppressed because it is too large
Load diff
|
@ -1771,11 +1771,19 @@ execute_oacc_device_lower ()
|
|||
bool is_oacc_serial
|
||||
= (lookup_attribute ("oacc serial",
|
||||
DECL_ATTRIBUTES (current_function_decl)) != NULL);
|
||||
bool is_oacc_parallel_kernels_parallelized
|
||||
= (lookup_attribute ("oacc parallel_kernels_parallelized",
|
||||
DECL_ATTRIBUTES (current_function_decl)) != NULL);
|
||||
bool is_oacc_parallel_kernels_gang_single
|
||||
= (lookup_attribute ("oacc parallel_kernels_gang_single",
|
||||
DECL_ATTRIBUTES (current_function_decl)) != NULL);
|
||||
int fn_level = oacc_fn_attrib_level (attrs);
|
||||
bool is_oacc_routine = (fn_level >= 0);
|
||||
gcc_checking_assert (is_oacc_parallel
|
||||
+ is_oacc_kernels
|
||||
+ is_oacc_serial
|
||||
+ is_oacc_parallel_kernels_parallelized
|
||||
+ is_oacc_parallel_kernels_gang_single
|
||||
+ is_oacc_routine
|
||||
== 1);
|
||||
|
||||
|
@ -1795,6 +1803,12 @@ execute_oacc_device_lower ()
|
|||
? "parallelized" : "unparallelized"));
|
||||
else if (is_oacc_serial)
|
||||
fprintf (dump_file, "Function is OpenACC serial offload\n");
|
||||
else if (is_oacc_parallel_kernels_parallelized)
|
||||
fprintf (dump_file, "Function is %s OpenACC kernels offload\n",
|
||||
"parallel_kernels_parallelized");
|
||||
else if (is_oacc_parallel_kernels_gang_single)
|
||||
fprintf (dump_file, "Function is %s OpenACC kernels offload\n",
|
||||
"parallel_kernels_gang_single");
|
||||
else if (is_oacc_routine)
|
||||
fprintf (dump_file, "Function is OpenACC routine level %d\n",
|
||||
fn_level);
|
||||
|
@ -1838,6 +1852,11 @@ execute_oacc_device_lower ()
|
|||
fprintf (dump_file, "]\n");
|
||||
}
|
||||
|
||||
/* Verify that for OpenACC 'kernels' decomposed "gang-single" parts we launch
|
||||
a single gang only. */
|
||||
if (is_oacc_parallel_kernels_gang_single)
|
||||
gcc_checking_assert (dims[GOMP_DIM_GANG] == 1);
|
||||
|
||||
oacc_loop_process (loops);
|
||||
if (dump_file)
|
||||
{
|
||||
|
|
|
@ -34,6 +34,7 @@ along with GCC; see the file COPYING3. If not see
|
|||
NEXT_PASS (pass_warn_unused_result);
|
||||
NEXT_PASS (pass_diagnose_omp_blocks);
|
||||
NEXT_PASS (pass_diagnose_tm_blocks);
|
||||
NEXT_PASS (pass_omp_oacc_kernels_decompose);
|
||||
NEXT_PASS (pass_lower_omp);
|
||||
NEXT_PASS (pass_lower_cf);
|
||||
NEXT_PASS (pass_lower_tm);
|
||||
|
|
|
@ -1,11 +1,21 @@
|
|||
/* { dg-additional-options "-fdump-tree-gimple" } */
|
||||
/* { dg-additional-options "-fopenacc-kernels=decompose" }
|
||||
{ dg-additional-options "-fdump-tree-omp_oacc_kernels_decompose" } */
|
||||
|
||||
void
|
||||
f (short c)
|
||||
{
|
||||
#pragma acc parallel if(c)
|
||||
;
|
||||
#pragma acc kernels if(c)
|
||||
;
|
||||
#pragma acc data if(c)
|
||||
;
|
||||
#pragma acc update device(c) if(c)
|
||||
#pragma acc parallel if(c) copy(c)
|
||||
++c;
|
||||
|
||||
#pragma acc kernels if(c) copy(c)
|
||||
/* { dg-final { scan-tree-dump-times {(?n)#pragma omp target oacc_kernels map\(tofrom:c \[len: [0-9]+\]\) if\(_[0-9]+\)$} 1 "gimple" } } */
|
||||
/* { dg-final { scan-tree-dump-times {(?n)#pragma omp target oacc_data_kernels map\(tofrom:c \[len: [0-9]+\]\) if\(_[0-9]+\)$} 1 "omp_oacc_kernels_decompose" } }
|
||||
{ dg-final { scan-tree-dump-times {(?n)#pragma omp target oacc_parallel_kernels_gang_single async\(-1\) num_gangs\(1\) map\(force_present:c \[len: [0-9]+\]\) if\(_[0-9]+\)$} 1 "omp_oacc_kernels_decompose" } } */
|
||||
++c;
|
||||
|
||||
#pragma acc data if(c) copy(c)
|
||||
++c;
|
||||
|
||||
#pragma acc update if(c) device(c)
|
||||
}
|
||||
|
|
83
gcc/testsuite/c-c++-common/goacc/kernels-decompose-1.c
Normal file
83
gcc/testsuite/c-c++-common/goacc/kernels-decompose-1.c
Normal file
|
@ -0,0 +1,83 @@
|
|||
/* Test OpenACC 'kernels' construct decomposition. */
|
||||
|
||||
/* { dg-additional-options "-fopt-info-omp-all" } */
|
||||
/* { dg-additional-options "-fdump-tree-gimple" } */
|
||||
/* { dg-additional-options "-fopenacc-kernels=decompose" }
|
||||
{ dg-additional-options "-fdump-tree-omp_oacc_kernels_decompose" } */
|
||||
|
||||
/* See also '../../gfortran.dg/goacc/kernels-decompose-1.f95'. */
|
||||
|
||||
#define N 1024
|
||||
|
||||
unsigned int a[N];
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
int i;
|
||||
unsigned int sum = 1;
|
||||
|
||||
#pragma acc kernels copyin(a[0:N]) copy(sum)
|
||||
/* { dg-bogus "optimized: assigned OpenACC seq loop parallelism" "TODO" { xfail *-*-* } .-1 }
|
||||
TODO Is this maybe the report that belongs to the XFAILed report further down? */
|
||||
{
|
||||
#pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */
|
||||
/* { dg-message "note: forwarded loop nest in OpenACC 'kernels' region to 'parloops' for analysis" "" { target *-*-* } l_loop_i$c_loop_i } */
|
||||
/* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_loop_i$c_loop_i } */
|
||||
for (i = 0; i < N; ++i)
|
||||
sum += a[i];
|
||||
|
||||
sum++; /* { dg-message "note: beginning 'gang-single' part in OpenACC 'kernels' region" } */
|
||||
a[0]++;
|
||||
|
||||
#pragma acc loop independent /* { dg-line l_loop_i[incr c_loop_i] } */
|
||||
/* { dg-message "note: parallelized loop nest in OpenACC 'kernels' region" "" { target *-*-* } l_loop_i$c_loop_i } */
|
||||
/* { dg-optimized "assigned OpenACC gang vector loop parallelism" "" { target *-*-* } l_loop_i$c_loop_i } */
|
||||
for (i = 0; i < N; ++i)
|
||||
sum += a[i];
|
||||
|
||||
if (sum > 10) /* { dg-message "note: beginning 'parloops' part in OpenACC 'kernels' region" } */
|
||||
{
|
||||
#pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */
|
||||
/* { dg-missed "unparallelized loop nest in OpenACC 'kernels' region: it's executed conditionally" "" { target *-*-* } l_loop_i$c_loop_i } */
|
||||
/*TODO { dg-optimized "assigned OpenACC seq loop parallelism" "TODO" { xfail *-*-* } l_loop_i$c_loop_i } */
|
||||
for (i = 0; i < N; ++i)
|
||||
sum += a[i];
|
||||
}
|
||||
|
||||
#pragma acc loop auto /* { dg-line l_loop_i[incr c_loop_i] } */
|
||||
/* { dg-message "note: forwarded loop nest in OpenACC 'kernels' region to 'parloops' for analysis" "" { target *-*-* } l_loop_i$c_loop_i } */
|
||||
/* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_loop_i$c_loop_i } */
|
||||
for (i = 0; i < N; ++i)
|
||||
sum += a[i];
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times {(?n)#pragma omp target oacc_kernels map\(tofrom:sum \[len: [0-9]+\]\) map\(to:a\[0\] \[len: [0-9]+\]\) map\(firstprivate:a \[pointer assign, bias: 0\]\)$} 1 "gimple" } }
|
||||
|
||||
{ dg-final { scan-tree-dump-times {(?n)#pragma acc loop private\(i\)$} 2 "gimple" } }
|
||||
{ dg-final { scan-tree-dump-times {(?n)#pragma acc loop independent private\(i\)$} 1 "gimple" } }
|
||||
{ dg-final { scan-tree-dump-times {(?n)#pragma acc loop auto private\(i\)$} 1 "gimple" } }
|
||||
{ dg-final { scan-tree-dump-times {(?n)#pragma acc loop} 4 "gimple" } } */
|
||||
|
||||
/* Check that the OpenACC 'kernels' got decomposed into 'data' and an enclosed
|
||||
sequence of compute constructs.
|
||||
{ dg-final { scan-tree-dump-times {(?n)#pragma omp target oacc_data_kernels map\(tofrom:sum \[len: [0-9]+\]\) map\(to:a\[0\] \[len: [0-9]+\]\)$} 1 "omp_oacc_kernels_decompose" } }
|
||||
As noted above, we get three "old-style" kernel regions, one gang-single region, and one parallelized loop region.
|
||||
{ dg-final { scan-tree-dump-times {(?n)#pragma omp target oacc_kernels async\(-1\) map\(force_present:sum \[len: [0-9]+\]\) map\(force_present:a\[0\] \[len: [0-9]+\]\) map\(firstprivate:a \[pointer assign, bias: 0\]\)$} 3 "omp_oacc_kernels_decompose" } }
|
||||
{ dg-final { scan-tree-dump-times {(?n)#pragma omp target oacc_parallel_kernels_parallelized async\(-1\) map\(force_present:sum \[len: [0-9]+\]\) map\(force_present:a\[0\] \[len: [0-9]+\]\) map\(firstprivate:a \[pointer assign, bias: 0\]\)$} 1 "omp_oacc_kernels_decompose" } }
|
||||
{ dg-final { scan-tree-dump-times {(?n)#pragma omp target oacc_parallel_kernels_gang_single async\(-1\) num_gangs\(1\) map\(force_present:sum \[len: [0-9]+\]\) map\(force_present:a\[0\] \[len: [0-9]+\]\) map\(firstprivate:a \[pointer assign, bias: 0\]\)$} 1 "omp_oacc_kernels_decompose" } }
|
||||
|
||||
'data' plus five CCs.
|
||||
{ dg-final { scan-tree-dump-times {(?n)#pragma omp target } 6 "omp_oacc_kernels_decompose" } }
|
||||
|
||||
{ dg-final { scan-tree-dump-times {(?n)#pragma acc loop private\(i\)$} 2 "omp_oacc_kernels_decompose" } }
|
||||
{ dg-final { scan-tree-dump-times {(?n)#pragma acc loop independent private\(i\)$} 1 "omp_oacc_kernels_decompose" } }
|
||||
{ dg-final { scan-tree-dump-times {(?n)#pragma acc loop auto private\(i\)$} 1 "omp_oacc_kernels_decompose" } }
|
||||
{ dg-final { scan-tree-dump-times {(?n)#pragma acc loop} 4 "omp_oacc_kernels_decompose" } }
|
||||
|
||||
Each of the parallel regions is async, and there is a final call to
|
||||
__builtin_GOACC_wait.
|
||||
{ dg-final { scan-tree-dump-times "__builtin_GOACC_wait" 1 "omp_oacc_kernels_decompose" } } */
|
141
gcc/testsuite/c-c++-common/goacc/kernels-decompose-2.c
Normal file
141
gcc/testsuite/c-c++-common/goacc/kernels-decompose-2.c
Normal file
|
@ -0,0 +1,141 @@
|
|||
/* Test OpenACC 'kernels' construct decomposition. */
|
||||
|
||||
/* { dg-additional-options "-fopt-info-omp-all" } */
|
||||
/* { dg-additional-options "-fopenacc-kernels=decompose" }
|
||||
/* { dg-additional-options "-O2" } for 'parloops'. */
|
||||
|
||||
/* See also '../../gfortran.dg/goacc/kernels-decompose-2.f95'. */
|
||||
|
||||
#pragma acc routine gang
|
||||
extern int
|
||||
f_g (int);
|
||||
|
||||
#pragma acc routine worker
|
||||
extern int
|
||||
f_w (int);
|
||||
|
||||
#pragma acc routine vector
|
||||
extern int
|
||||
f_v (int);
|
||||
|
||||
#pragma acc routine seq
|
||||
extern int
|
||||
f_s (int);
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
int x, y, z;
|
||||
#define N 10
|
||||
int a[N], b[N], c[N];
|
||||
|
||||
#pragma acc kernels
|
||||
{
|
||||
x = 0; /* { dg-message "note: beginning 'gang-single' part in OpenACC 'kernels' region" } */
|
||||
y = x < 10;
|
||||
z = x++;
|
||||
;
|
||||
}
|
||||
|
||||
{ /*TODO Instead of using 'for (int i = 0; [...])', move 'int i' outside, to work around for ICE detailed in 'kernels-decompose-ice-1.c'. */
|
||||
int i;
|
||||
#pragma acc kernels /* { dg-optimized "assigned OpenACC gang loop parallelism" } */
|
||||
for (i = 0; i < N; i++) /* { dg-message "note: beginning 'parloops' part in OpenACC 'kernels' region" } */
|
||||
a[i] = 0;
|
||||
}
|
||||
|
||||
#pragma acc kernels loop /* { dg-line l_loop_i[incr c_loop_i] } */
|
||||
/* { dg-message "note: forwarded loop nest in OpenACC 'kernels' region to 'parloops' for analysis" "" { target *-*-* } l_loop_i$c_loop_i } */
|
||||
/* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_loop_i$c_loop_i } */
|
||||
for (int i = 0; i < N; i++)
|
||||
b[i] = a[N - i - 1];
|
||||
|
||||
#pragma acc kernels
|
||||
{
|
||||
#pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */
|
||||
/* { dg-message "note: forwarded loop nest in OpenACC 'kernels' region to 'parloops' for analysis" "" { target *-*-* } l_loop_i$c_loop_i } */
|
||||
/* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_loop_i$c_loop_i } */
|
||||
for (int i = 0; i < N; i++)
|
||||
b[i] = a[N - i - 1];
|
||||
|
||||
#pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */
|
||||
/* { dg-message "note: forwarded loop nest in OpenACC 'kernels' region to 'parloops' for analysis" "" { target *-*-* } l_loop_i$c_loop_i } */
|
||||
/* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_loop_i$c_loop_i } */
|
||||
for (int i = 0; i < N; i++)
|
||||
c[i] = a[i] * b[i];
|
||||
|
||||
a[z] = 0; /* { dg-message "note: beginning 'gang-single' part in OpenACC 'kernels' region" } */
|
||||
|
||||
#pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */
|
||||
/* { dg-message "note: forwarded loop nest in OpenACC 'kernels' region to 'parloops' for analysis" "" { target *-*-* } l_loop_i$c_loop_i } */
|
||||
/* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_loop_i$c_loop_i } */
|
||||
for (int i = 0; i < N; i++)
|
||||
c[i] += a[i];
|
||||
|
||||
#pragma acc loop seq /* { dg-line l_loop_i[incr c_loop_i] } */
|
||||
/* { dg-message "note: parallelized loop nest in OpenACC 'kernels' region" "" { target *-*-* } l_loop_i$c_loop_i } */
|
||||
/* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_loop_i$c_loop_i } */
|
||||
for (int i = 0 + 1; i < N; i++)
|
||||
c[i] += c[i - 1];
|
||||
}
|
||||
|
||||
#pragma acc kernels
|
||||
/*TODO What does this mean?
|
||||
TODO { dg-optimized "assigned OpenACC worker vector loop parallelism" "" { target *-*-* } .-2 } */
|
||||
{
|
||||
#pragma acc loop independent /* { dg-line l_loop_i[incr c_loop_i] } */
|
||||
/* { dg-optimized "assigned OpenACC gang loop parallelism" "" { target *-*-* } l_loop_i$c_loop_i } */
|
||||
/* { dg-message "note: parallelized loop nest in OpenACC 'kernels' region" "" { target *-*-* } l_loop_i$c_loop_i } */
|
||||
for (int i = 0; i < N; ++i)
|
||||
#pragma acc loop independent /* { dg-line l_loop_j[incr c_loop_j] } */
|
||||
/* { dg-optimized "assigned OpenACC worker loop parallelism" "" { target *-*-* } l_loop_j$c_loop_j } */
|
||||
for (int j = 0; j < N; ++j)
|
||||
#pragma acc loop independent /* { dg-line l_loop_k[incr c_loop_k] } */
|
||||
/* { dg-warning "insufficient partitioning available to parallelize loop" "" { target *-*-* } l_loop_k$c_loop_k } */
|
||||
/* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_loop_k$c_loop_k } */
|
||||
for (int k = 0; k < N; ++k)
|
||||
a[(i + j + k) % N]
|
||||
= b[j]
|
||||
+ f_v (c[k]); /* { dg-optimized "assigned OpenACC vector loop parallelism" } */
|
||||
|
||||
/*TODO Should the following turn into "gang-single" instead of "parloops"?
|
||||
TODO The problem is that the first STMT is 'if (y <= 4) goto <D.2547>; else goto <D.2548>;', thus "parloops". */
|
||||
if (y < 5) /* { dg-message "note: beginning 'parloops' part in OpenACC 'kernels' region" } */
|
||||
#pragma acc loop independent /* { dg-line l_loop_j[incr c_loop_j] } */
|
||||
/* { dg-missed "unparallelized loop nest in OpenACC 'kernels' region: it's executed conditionally" "" { target *-*-* } l_loop_j$c_loop_j } */
|
||||
for (int j = 0; j < N; ++j)
|
||||
b[j] = f_w (c[j]);
|
||||
}
|
||||
|
||||
#pragma acc kernels
|
||||
{
|
||||
y = f_g (a[5]); /* { dg-line l_part[incr c_part] } */
|
||||
/*TODO If such a construct is placed in its own part (like it is, here), can't this actually use gang paralelism, instead of "gang-single"?
|
||||
{ dg-message "note: beginning 'gang-single' part in OpenACC 'kernels' region" "" { target *-*-* } l_part$c_part } */
|
||||
/* { dg-optimized "assigned OpenACC gang worker vector loop parallelism" "" { target *-*-* } l_part$c_part } */
|
||||
|
||||
#pragma acc loop independent /* { dg-line l_loop_j[incr c_loop_j] } */
|
||||
/* { dg-message "note: parallelized loop nest in OpenACC 'kernels' region" "" { target *-*-* } l_loop_j$c_loop_j } */
|
||||
/* { dg-optimized "assigned OpenACC gang loop parallelism" "" { target *-*-* } l_loop_j$c_loop_j } */
|
||||
for (int j = 0; j < N; ++j)
|
||||
b[j] = y + f_w (c[j]); /* { dg-optimized "assigned OpenACC worker vector loop parallelism" } */
|
||||
}
|
||||
|
||||
#pragma acc kernels
|
||||
{
|
||||
y = 3; /* { dg-message "note: beginning 'gang-single' part in OpenACC 'kernels' region" } */
|
||||
|
||||
#pragma acc loop independent /* { dg-line l_loop_j[incr c_loop_j] } */
|
||||
/* { dg-message "note: parallelized loop nest in OpenACC 'kernels' region" "" { target *-*-* } l_loop_j$c_loop_j } */
|
||||
/* { dg-optimized "assigned OpenACC gang worker loop parallelism" "" { target *-*-* } l_loop_j$c_loop_j } */
|
||||
for (int j = 0; j < N; ++j)
|
||||
b[j] = y + f_v (c[j]); /* { dg-optimized "assigned OpenACC vector loop parallelism" } */
|
||||
|
||||
z = 2; /* { dg-message "note: beginning 'gang-single' part in OpenACC 'kernels' region" } */
|
||||
}
|
||||
|
||||
#pragma acc kernels /* { dg-message "note: beginning 'gang-single' part in OpenACC 'kernels' region" } */
|
||||
;
|
||||
|
||||
return 0;
|
||||
}
|
108
gcc/testsuite/c-c++-common/goacc/kernels-decompose-ice-1.c
Normal file
108
gcc/testsuite/c-c++-common/goacc/kernels-decompose-ice-1.c
Normal file
|
@ -0,0 +1,108 @@
|
|||
/* Test OpenACC 'kernels' construct decomposition. */
|
||||
|
||||
/* { dg-additional-options "-fopt-info-omp-all" } */
|
||||
/* { dg-additional-options "-fopenacc-kernels=decompose" } */
|
||||
/* { dg-ice "TODO" }
|
||||
{ dg-prune-output "during GIMPLE pass: omplower" } */
|
||||
|
||||
/* Reduced from 'kernels-decompose-2.c'.
|
||||
(Hopefully) similar instances:
|
||||
- 'libgomp.oacc-c-c++-common/declare-vla-kernels-decompose-ice-1.c'
|
||||
- 'libgomp.oacc-c-c++-common/kernels-decompose-1.c'
|
||||
*/
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
#define N 10
|
||||
|
||||
#pragma acc kernels
|
||||
for (int i = 0; i < N; i++) /* { dg-message "note: beginning 'parloops' part in OpenACC 'kernels' region" } */
|
||||
;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
In 'gimple' we've got:
|
||||
|
||||
main ()
|
||||
{
|
||||
int D.2087;
|
||||
|
||||
{
|
||||
int a[10];
|
||||
|
||||
try
|
||||
{
|
||||
#pragma omp target oacc_kernels map(tofrom:a [len: 40])
|
||||
{
|
||||
{
|
||||
int i;
|
||||
|
||||
i = 0;
|
||||
goto <D.2085>;
|
||||
[...]
|
||||
|
||||
..., which in 'omp_oacc_kernels_decompose' we turn into:
|
||||
|
||||
main ()
|
||||
{
|
||||
int D.2087;
|
||||
|
||||
{
|
||||
int a[10];
|
||||
|
||||
try
|
||||
{
|
||||
#pragma omp target oacc_data_kernels map(tofrom:a [len: 40])
|
||||
{
|
||||
try
|
||||
{
|
||||
{
|
||||
int i;
|
||||
|
||||
#pragma omp target oacc_data_kernels map(alloc:i [len: 4])
|
||||
{
|
||||
try
|
||||
{
|
||||
{
|
||||
#pragma omp target oacc_kernels async(-1) map(force_present:i [len: 4]) map(force_present:a [len: 40])
|
||||
{
|
||||
i = 0;
|
||||
goto <D.2085>;
|
||||
[...]
|
||||
|
||||
..., which results in ICE in:
|
||||
|
||||
#1 0x0000000000d2247b in lower_omp_target (gsi_p=gsi_p@entry=0x7fffffffbc90, ctx=ctx@entry=0x2c994c0) at [...]/gcc/omp-low.c:11981
|
||||
11981 gcc_assert (offloaded);
|
||||
(gdb) list
|
||||
11976 talign = TYPE_ALIGN_UNIT (TREE_TYPE (TREE_TYPE (ovar)));
|
||||
11977 gimplify_assign (x, var, &ilist);
|
||||
11978 }
|
||||
11979 else if (is_gimple_reg (var))
|
||||
11980 {
|
||||
11981 gcc_assert (offloaded);
|
||||
11982 tree avar = create_tmp_var (TREE_TYPE (var));
|
||||
11983 mark_addressable (avar);
|
||||
11984 enum gomp_map_kind map_kind = OMP_CLAUSE_MAP_KIND (c);
|
||||
11985 if (GOMP_MAP_COPY_TO_P (map_kind)
|
||||
(gdb) call debug_tree(var)
|
||||
<var_decl 0x7ffff7feebd0 i
|
||||
type <integer_type 0x7ffff67be5e8 int sizes-gimplified public SI
|
||||
size <integer_cst 0x7ffff67a5f18 constant 32>
|
||||
unit-size <integer_cst 0x7ffff67a5f30 constant 4>
|
||||
align:32 warn_if_not_align:0 symtab:0 alias-set -1 canonical-type 0x7ffff67be5e8 precision:32 min <integer_cst 0x7ffff67a5ed0 -2147483648> max <integer_cst 0x7ffff67a5ee8 2147483647>
|
||||
pointer_to_this <pointer_type 0x7ffff67c69d8>>
|
||||
used read SI [...]:15:12 size <integer_cst 0x7ffff67a5f18 32> unit-size <integer_cst 0x7ffff67a5f30 4>
|
||||
align:32 warn_if_not_align:0 context <function_decl 0x7ffff68eea00 main>>
|
||||
|
||||
Just defusing the 'assert' is not sufficient:
|
||||
|
||||
libgomp: present clause: !acc_is_present (0x7ffe29cba3ec, 4 (0x4))
|
||||
|
||||
TODO Can't the 'omp_oacc_kernels_decompose' transformation be much simpler, such that we avoid the intermediate 'data' if we've got just one compute construct inside it?
|
||||
TODO But it's not clear if that'd just resolve one simple instance of the general problem?
|
||||
|
||||
*/
|
16
gcc/testsuite/c-c++-common/goacc/kernels-decompose-ice-2.c
Normal file
16
gcc/testsuite/c-c++-common/goacc/kernels-decompose-ice-2.c
Normal file
|
@ -0,0 +1,16 @@
|
|||
/* Test OpenACC 'kernels' construct decomposition. */
|
||||
|
||||
/* { dg-additional-options "-fopenacc-kernels=decompose" } */
|
||||
/* { dg-ice "TODO" }
|
||||
{ dg-prune-output "during GIMPLE pass: omp_oacc_kernels_decompose" } */
|
||||
|
||||
/* Reduced from 'kernels-decompose-ice-1.c'. */
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
#pragma acc kernels
|
||||
{
|
||||
int i;
|
||||
}
|
||||
}
|
81
gcc/testsuite/gfortran.dg/goacc/kernels-decompose-1.f95
Normal file
81
gcc/testsuite/gfortran.dg/goacc/kernels-decompose-1.f95
Normal file
|
@ -0,0 +1,81 @@
|
|||
! Test OpenACC 'kernels' construct decomposition.
|
||||
|
||||
! { dg-additional-options "-fopt-info-omp-all" }
|
||||
! { dg-additional-options "-fdump-tree-gimple" }
|
||||
! { dg-additional-options "-fopenacc-kernels=decompose" }
|
||||
! { dg-additional-options "-fdump-tree-omp_oacc_kernels_decompose" }
|
||||
|
||||
! See also '../../c-c++-common/goacc/kernels-decompose-1.c'.
|
||||
|
||||
program main
|
||||
implicit none
|
||||
integer, parameter :: N = 1024
|
||||
integer, dimension (1:N) :: a
|
||||
integer :: i, sum
|
||||
|
||||
!$acc kernels copyin(a(1:N)) copy(sum)
|
||||
! { dg-bogus "optimized: assigned OpenACC seq loop parallelism" "TODO" { xfail *-*-* } .-1 }
|
||||
!TODO Is this maybe the report that belongs to the XFAILed report further down? */
|
||||
|
||||
!$acc loop ! { dg-line l_loop_i[incr c_loop_i] }
|
||||
! { dg-message "note: forwarded loop nest in OpenACC 'kernels' region to 'parloops' for analysis" "" { target *-*-* } l_loop_i$c_loop_i }
|
||||
! { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_loop_i$c_loop_i }
|
||||
do i = 1, N
|
||||
sum = sum + a(i)
|
||||
end do
|
||||
|
||||
sum = sum + 1 ! { dg-message "note: beginning 'gang-single' part in OpenACC 'kernels' region" }
|
||||
a(1) = a(1) + 1
|
||||
|
||||
!$acc loop independent ! { dg-line l_loop_i[incr c_loop_i] }
|
||||
! { dg-message "note: parallelized loop nest in OpenACC 'kernels' region" "" { target *-*-* } l_loop_i$c_loop_i }
|
||||
! { dg-optimized "assigned OpenACC gang vector loop parallelism" "" { target *-*-* } l_loop_i$c_loop_i }
|
||||
do i = 1, N
|
||||
sum = sum + a(i)
|
||||
end do
|
||||
|
||||
if (sum .gt. 10) then ! { dg-message "note: beginning 'parloops' part in OpenACC 'kernels' region" }
|
||||
!$acc loop ! { dg-line l_loop_i[incr c_loop_i] }
|
||||
! { dg-missed "unparallelized loop nest in OpenACC 'kernels' region: it's executed conditionally" "" { target *-*-* } l_loop_i$c_loop_i }
|
||||
!TODO { dg-optimized "assigned OpenACC seq loop parallelism" "TODO" { xfail *-*-* } l_loop_i$c_loop_i }
|
||||
do i = 1, N
|
||||
sum = sum + a(i)
|
||||
end do
|
||||
end if
|
||||
|
||||
!$acc loop auto ! { dg-line l_loop_i[incr c_loop_i] }
|
||||
! { dg-message "note: forwarded loop nest in OpenACC 'kernels' region to 'parloops' for analysis" "" { target *-*-* } l_loop_i$c_loop_i }
|
||||
! { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_loop_i$c_loop_i }
|
||||
do i = 1, N
|
||||
sum = sum + a(i)
|
||||
end do
|
||||
|
||||
!$acc end kernels
|
||||
end program main
|
||||
|
||||
! { dg-final { scan-tree-dump-times {(?n)#pragma omp target oacc_kernels map\(to:a\[_[0-9]+\] \[len: _[0-9]+\]\) map\(alloc:a \[pointer assign, bias: _[0-9]+\]\) map\(tofrom:sum \[len: [0-9]+\]\)$} 1 "gimple" } }
|
||||
|
||||
! { dg-final { scan-tree-dump-times {(?n)#pragma acc loop private\(i\)$} 2 "gimple" } }
|
||||
! { dg-final { scan-tree-dump-times {(?n)#pragma acc loop private\(i\) independent$} 1 "gimple" } }
|
||||
! { dg-final { scan-tree-dump-times {(?n)#pragma acc loop private\(i\) auto$} 1 "gimple" } }
|
||||
! { dg-final { scan-tree-dump-times {(?n)#pragma acc loop} 4 "gimple" } }
|
||||
|
||||
! Check that the OpenACC 'kernels' got decomposed into 'data' and an enclosed
|
||||
! sequence of compute constructs.
|
||||
! { dg-final { scan-tree-dump-times {(?n)#pragma omp target oacc_data_kernels map\(to:a\[_[0-9]+\] \[len: _[0-9]+\]\) map\(tofrom:sum \[len: [0-9]+\]\)$} 1 "omp_oacc_kernels_decompose" } }
|
||||
! As noted above, we get three "old-style" kernel regions, one gang-single region, and one parallelized loop region.
|
||||
! { dg-final { scan-tree-dump-times {(?n)#pragma omp target oacc_kernels async\(-1\) map\(force_present:a\[_[0-9]+\] \[len: _[0-9]+\]\) map\(alloc:a \[pointer assign, bias: _[0-9]+\]\) map\(force_present:sum \[len: [0-9]+\]\)$} 3 "omp_oacc_kernels_decompose" } }
|
||||
! { dg-final { scan-tree-dump-times {(?n)#pragma omp target oacc_parallel_kernels_parallelized async\(-1\) map\(force_present:a\[_[0-9]+\] \[len: _[0-9]+\]\) map\(alloc:a \[pointer assign, bias: _[0-9]+\]\) map\(force_present:sum \[len: [0-9]+\]\)$} 1 "omp_oacc_kernels_decompose" } }
|
||||
! { dg-final { scan-tree-dump-times {(?n)#pragma omp target oacc_parallel_kernels_gang_single async\(-1\) num_gangs\(1\) map\(force_present:a\[_[0-9]+\] \[len: _[0-9]+\]\) map\(alloc:a \[pointer assign, bias: _[0-9]+\]\) map\(force_present:sum \[len: [0-9]+\]\)$} 1 "omp_oacc_kernels_decompose" } }
|
||||
!
|
||||
! 'data' plus five CCs.
|
||||
! { dg-final { scan-tree-dump-times {(?n)#pragma omp target } 6 "omp_oacc_kernels_decompose" } }
|
||||
|
||||
! { dg-final { scan-tree-dump-times {(?n)#pragma acc loop private\(i\)$} 2 "omp_oacc_kernels_decompose" } }
|
||||
! { dg-final { scan-tree-dump-times {(?n)#pragma acc loop private\(i\) independent$} 1 "omp_oacc_kernels_decompose" } }
|
||||
! { dg-final { scan-tree-dump-times {(?n)#pragma acc loop private\(i\) auto} 1 "omp_oacc_kernels_decompose" } }
|
||||
! { dg-final { scan-tree-dump-times {(?n)#pragma acc loop} 4 "omp_oacc_kernels_decompose" } }
|
||||
|
||||
! Each of the parallel regions is async, and there is a final call to
|
||||
! __builtin_GOACC_wait.
|
||||
! { dg-final { scan-tree-dump-times "__builtin_GOACC_wait" 1 "omp_oacc_kernels_decompose" } }
|
142
gcc/testsuite/gfortran.dg/goacc/kernels-decompose-2.f95
Normal file
142
gcc/testsuite/gfortran.dg/goacc/kernels-decompose-2.f95
Normal file
|
@ -0,0 +1,142 @@
|
|||
! Test OpenACC 'kernels' construct decomposition.
|
||||
|
||||
! { dg-additional-options "-fopt-info-omp-all" }
|
||||
! { dg-additional-options "-fopenacc-kernels=decompose" }
|
||||
! { dg-additional-options "-O2" } for 'parloops'.
|
||||
|
||||
! See also '../../c-c++-common/goacc/kernels-decompose-2.c'.
|
||||
|
||||
program main
|
||||
implicit none
|
||||
|
||||
integer, external :: f_g
|
||||
!$acc routine (f_g) gang
|
||||
integer, external :: f_w
|
||||
!$acc routine (f_w) worker
|
||||
integer, external :: f_v
|
||||
!$acc routine (f_v) vector
|
||||
integer, external :: f_s
|
||||
!$acc routine (f_s) seq
|
||||
|
||||
integer :: i, j, k
|
||||
integer :: x, y, z
|
||||
logical :: y_l
|
||||
integer, parameter :: N = 10
|
||||
integer :: a(N), b(N), c(N)
|
||||
|
||||
!$acc kernels
|
||||
x = 0 ! { dg-message "note: beginning 'gang-single' part in OpenACC 'kernels' region" }
|
||||
y = 0
|
||||
y_l = x < 10
|
||||
z = x
|
||||
x = x + 1
|
||||
;
|
||||
!$acc end kernels
|
||||
|
||||
!$acc kernels ! { dg-optimized "assigned OpenACC gang loop parallelism" }
|
||||
do i = 1, N ! { dg-message "note: beginning 'parloops' part in OpenACC 'kernels' region" }
|
||||
a(i) = 0
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
!$acc kernels loop ! { dg-line l_loop_i[incr c_loop_i] }
|
||||
! { dg-message "note: forwarded loop nest in OpenACC 'kernels' region to 'parloops' for analysis" "" { target *-*-* } l_loop_i$c_loop_i }
|
||||
! { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_loop_i$c_loop_i }
|
||||
do i = 1, N
|
||||
b(i) = a(N - i + 1)
|
||||
end do
|
||||
|
||||
!$acc kernels
|
||||
!$acc loop ! { dg-line l_loop_i[incr c_loop_i] }
|
||||
! { dg-message "note: forwarded loop nest in OpenACC 'kernels' region to 'parloops' for analysis" "" { target *-*-* } l_loop_i$c_loop_i }
|
||||
! { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_loop_i$c_loop_i }
|
||||
do i = 1, N
|
||||
b(i) = a(N - i + 1)
|
||||
end do
|
||||
|
||||
!$acc loop ! { dg-line l_loop_i[incr c_loop_i] }
|
||||
! { dg-message "note: forwarded loop nest in OpenACC 'kernels' region to 'parloops' for analysis" "" { target *-*-* } l_loop_i$c_loop_i }
|
||||
! { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_loop_i$c_loop_i }
|
||||
do i = 1, N
|
||||
c(i) = a(i) * b(i)
|
||||
end do
|
||||
|
||||
a(z) = 0 ! { dg-message "note: beginning 'gang-single' part in OpenACC 'kernels' region" }
|
||||
|
||||
!$acc loop ! { dg-line l_loop_i[incr c_loop_i] }
|
||||
! { dg-message "note: forwarded loop nest in OpenACC 'kernels' region to 'parloops' for analysis" "" { target *-*-* } l_loop_i$c_loop_i }
|
||||
! { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_loop_i$c_loop_i }
|
||||
do i = 1, N
|
||||
c(i) = c(i) + a(i)
|
||||
end do
|
||||
|
||||
!$acc loop seq ! { dg-line l_loop_i[incr c_loop_i] }
|
||||
! { dg-message "note: parallelized loop nest in OpenACC 'kernels' region" "" { target *-*-* } l_loop_i$c_loop_i }
|
||||
! { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_loop_i$c_loop_i }
|
||||
do i = 1 + 1, N
|
||||
c(i) = c(i) + c(i - 1)
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
!$acc kernels
|
||||
!TODO What does this mean?
|
||||
!TODO { dg-optimized "assigned OpenACC worker vector loop parallelism" "" { target *-*-* } .-2 }
|
||||
!$acc loop independent ! { dg-line l_loop_i[incr c_loop_i] }
|
||||
! { dg-message "note: parallelized loop nest in OpenACC 'kernels' region" "" { target *-*-* } l_loop_i$c_loop_i }
|
||||
! { dg-optimized "assigned OpenACC gang loop parallelism" "" { target *-*-* } l_loop_i$c_loop_i }
|
||||
do i = 1, N
|
||||
!$acc loop independent ! { dg-line l_loop_j[incr c_loop_j] }
|
||||
! { dg-optimized "assigned OpenACC worker loop parallelism" "" { target *-*-* } l_loop_j$c_loop_j }
|
||||
do j = 1, N
|
||||
!$acc loop independent ! { dg-line l_loop_k[incr c_loop_k] }
|
||||
! { dg-warning "insufficient partitioning available to parallelize loop" "" { target *-*-* } l_loop_k$c_loop_k }
|
||||
! { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_loop_k$c_loop_k }
|
||||
do k = 1, N
|
||||
a(1 + mod(i + j + k, N)) &
|
||||
= b(j) &
|
||||
+ f_v (c(k)) ! { dg-optimized "assigned OpenACC vector loop parallelism" }
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
|
||||
!TODO Should the following turn into "gang-single" instead of "parloops"?
|
||||
!TODO The problem is that the first STMT is 'if (y <= 4) goto <D.2547>; else goto <D.2548>;', thus "parloops".
|
||||
if (y < 5) then ! { dg-message "note: beginning 'parloops' part in OpenACC 'kernels' region" }
|
||||
!$acc loop independent ! { dg-line l_loop_j[incr c_loop_j] }
|
||||
! { dg-missed "unparallelized loop nest in OpenACC 'kernels' region: it's executed conditionally" "" { target *-*-* } l_loop_j$c_loop_j }
|
||||
do j = 1, N
|
||||
b(j) = f_w (c(j))
|
||||
end do
|
||||
end if
|
||||
!$acc end kernels
|
||||
|
||||
!$acc kernels
|
||||
y = f_g (a(5)) ! { dg-line l_part[incr c_part] }
|
||||
!TODO If such a construct is placed in its own part (like it is, here), can't this actually use gang paralelism, instead of "gang-single"?
|
||||
! { dg-message "note: beginning 'gang-single' part in OpenACC 'kernels' region" "" { target *-*-* } l_part$c_part }
|
||||
! { dg-optimized "assigned OpenACC gang worker vector loop parallelism" "" { target *-*-* } l_part$c_part }
|
||||
|
||||
!$acc loop independent ! { dg-line l_loop_j[incr c_loop_j] }
|
||||
! { dg-message "note: parallelized loop nest in OpenACC 'kernels' region" "" { target *-*-* } l_loop_j$c_loop_j }
|
||||
! { dg-optimized "assigned OpenACC gang loop parallelism" "" { target *-*-* } l_loop_j$c_loop_j }
|
||||
do j = 1, N
|
||||
b(j) = y + f_w (c(j)) ! { dg-optimized "assigned OpenACC worker vector loop parallelism" }
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
!$acc kernels
|
||||
y = 3 ! { dg-message "note: beginning 'gang-single' part in OpenACC 'kernels' region" }
|
||||
|
||||
!$acc loop independent ! { dg-line l_loop_j[incr c_loop_j] }
|
||||
! { dg-message "note: parallelized loop nest in OpenACC 'kernels' region" "" { target *-*-* } l_loop_j$c_loop_j }
|
||||
! { dg-optimized "assigned OpenACC gang worker loop parallelism" "" { target *-*-* } l_loop_j$c_loop_j }
|
||||
do j = 1, N
|
||||
b(j) = y + f_v (c(j)) ! { dg-optimized "assigned OpenACC vector loop parallelism" }
|
||||
end do
|
||||
|
||||
z = 2 ! { dg-message "note: beginning 'gang-single' part in OpenACC 'kernels' region" }
|
||||
!$acc end kernels
|
||||
|
||||
!$acc kernels ! { dg-message "note: beginning 'gang-single' part in OpenACC 'kernels' region" }
|
||||
!$acc end kernels
|
||||
end program main
|
|
@ -1,5 +1,7 @@
|
|||
! { dg-do compile }
|
||||
! { dg-additional-options "-fdump-tree-original" }
|
||||
! { dg-additional-options "-fopenacc-kernels=decompose" }
|
||||
! { dg-additional-options "-fdump-tree-omp_oacc_kernels_decompose" }
|
||||
|
||||
program test
|
||||
implicit none
|
||||
|
@ -34,3 +36,6 @@ end program test
|
|||
! { dg-final { scan-tree-dump-times "map\\(alloc:t\\)" 1 "original" } }
|
||||
|
||||
! { dg-final { scan-tree-dump-times "map\\(force_deviceptr:u\\)" 1 "original" } }
|
||||
|
||||
! { dg-final { scan-tree-dump-times {(?n)#pragma omp target oacc_data_kernels if\(D\.[0-9]+\)$} 1 "omp_oacc_kernels_decompose" } }
|
||||
! { dg-final { scan-tree-dump-times {(?n)#pragma omp target oacc_parallel_kernels_gang_single num_gangs\(1\) if\(D\.[0-9]+\) async\(-1\)$} 1 "omp_oacc_kernels_decompose" } }
|
||||
|
|
|
@ -416,6 +416,7 @@ extern gimple_opt_pass *make_pass_lower_switch (gcc::context *ctxt);
|
|||
extern gimple_opt_pass *make_pass_lower_switch_O0 (gcc::context *ctxt);
|
||||
extern gimple_opt_pass *make_pass_lower_vector (gcc::context *ctxt);
|
||||
extern gimple_opt_pass *make_pass_lower_vector_ssa (gcc::context *ctxt);
|
||||
extern gimple_opt_pass *make_pass_omp_oacc_kernels_decompose (gcc::context *ctxt);
|
||||
extern gimple_opt_pass *make_pass_lower_omp (gcc::context *ctxt);
|
||||
extern gimple_opt_pass *make_pass_diagnose_omp_blocks (gcc::context *ctxt);
|
||||
extern gimple_opt_pass *make_pass_expand_omp (gcc::context *ctxt);
|
||||
|
|
|
@ -0,0 +1,8 @@
|
|||
/* { dg-additional-options "-fopenacc-kernels=decompose" } */
|
||||
/* Hopefully, this is the same issue as '../../../gcc/testsuite/c-c++-common/goacc/kernels-decompose-ice-1.c'.
|
||||
{ dg-ice "TODO" }
|
||||
TODO { dg-prune-output "during GIMPLE pass: omplower" }
|
||||
TODO { dg-do link } */
|
||||
|
||||
#undef KERNELS_DECOMPOSE_ICE_HACK
|
||||
#include "declare-vla.c"
|
|
@ -0,0 +1,6 @@
|
|||
/* { dg-additional-options "-fopenacc-kernels=decompose" } */
|
||||
|
||||
/* See also 'declare-vla-kernels-decompose-ice-1.c'. */
|
||||
|
||||
#define KERNELS_DECOMPOSE_ICE_HACK
|
||||
#include "declare-vla.c"
|
|
@ -38,6 +38,12 @@ f_data (void)
|
|||
for (i = 0; i < N; i++)
|
||||
A[i] = -i;
|
||||
|
||||
/* See 'declare-vla-kernels-decompose.c'. */
|
||||
#ifdef KERNELS_DECOMPOSE_ICE_HACK
|
||||
(volatile int *) &i;
|
||||
(volatile int *) &N;
|
||||
#endif
|
||||
|
||||
# pragma acc kernels
|
||||
for (i = 0; i < N; i++)
|
||||
A[i] = i;
|
||||
|
|
|
@ -0,0 +1,38 @@
|
|||
/* Test OpenACC 'kernels' construct decomposition. */
|
||||
|
||||
/* { dg-additional-options "-fopt-info-omp-all" } */
|
||||
/* { dg-additional-options "-fopenacc-kernels=decompose" } */
|
||||
|
||||
#undef NDEBUG
|
||||
#include <assert.h>
|
||||
|
||||
int main()
|
||||
{
|
||||
int a = 0;
|
||||
/*TODO Without making 'a' addressable, for GCN offloading we will not see the expected value copied out. (But it does work for nvptx offloading, strange...) */
|
||||
(volatile int *) &a;
|
||||
#define N 123
|
||||
int b[N] = { 0 };
|
||||
|
||||
#pragma acc kernels
|
||||
{
|
||||
int c = 234; /* { dg-message "note: beginning 'gang-single' part in OpenACC 'kernels' region" } */
|
||||
|
||||
/*TODO Hopefully, this is the same issue as '../../../gcc/testsuite/c-c++-common/goacc/kernels-decompose-ice-1.c'. */
|
||||
(volatile int *) &c;
|
||||
|
||||
#pragma acc loop independent gang /* { dg-line l_loop_i[incr c_loop_i] } */
|
||||
/* { dg-message "note: parallelized loop nest in OpenACC 'kernels' region" "" { target *-*-* } l_loop_i$c_loop_i } */
|
||||
/* { dg-optimized "assigned OpenACC gang loop parallelism" "" { target *-*-* } l_loop_i$c_loop_i } */
|
||||
for (int i = 0; i < N; ++i)
|
||||
b[i] = c;
|
||||
|
||||
a = c; /* { dg-message "note: beginning 'gang-single' part in OpenACC 'kernels' region" } */
|
||||
}
|
||||
|
||||
for (int i = 0; i < N; ++i)
|
||||
assert (b[i] == 234);
|
||||
assert (a == 234);
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -1,17 +1,22 @@
|
|||
! { dg-do run }
|
||||
! { dg-additional-options "-fopt-info-omp-all" }
|
||||
! { dg-additional-options "-fopenacc-kernels=decompose" }
|
||||
|
||||
subroutine kernel(lo, hi, a, b, c)
|
||||
implicit none
|
||||
integer :: lo, hi, i
|
||||
real, dimension(lo:hi) :: a, b, c
|
||||
|
||||
!$acc kernels copyin(lo, hi) ! { dg-optimized "assigned OpenACC seq loop parallelism" }
|
||||
!$acc loop independent
|
||||
!$acc kernels copyin(lo, hi)
|
||||
!$acc loop independent ! { dg-line l_loop_i[incr c_loop_i] }
|
||||
! { dg-message "note: parallelized loop nest in OpenACC 'kernels' region" "" { target *-*-* } l_loop_i$c_loop_i }
|
||||
! { dg-optimized "assigned OpenACC gang vector loop parallelism" "" { target *-*-* } l_loop_i$c_loop_i }
|
||||
do i = lo, hi
|
||||
b(i) = a(i)
|
||||
end do
|
||||
!$acc loop independent
|
||||
!$acc loop independent ! { dg-line l_loop_i[incr c_loop_i] }
|
||||
! { dg-message "note: parallelized loop nest in OpenACC 'kernels' region" "" { target *-*-* } l_loop_i$c_loop_i }
|
||||
! { dg-optimized "assigned OpenACC gang vector loop parallelism" "" { target *-*-* } l_loop_i$c_loop_i }
|
||||
do i = lo, hi
|
||||
c(i) = b(i)
|
||||
end do
|
||||
|
|
Loading…
Add table
Reference in a new issue