OpenMP: Support reverse offload (middle end part)
gcc/ChangeLog: * internal-fn.cc (expand_GOMP_TARGET_REV): New. * internal-fn.def (GOMP_TARGET_REV): New. * lto-cgraph.cc (lto_output_node, verify_node_partition): Mark 'omp target device_ancestor_host' as in_other_partition and don't error if absent. * omp-low.cc (create_omp_child_function): Mark as 'noclone'. * omp-expand.cc (expand_omp_target): For reverse offload, remove sorry, use device = GOMP_DEVICE_HOST_FALLBACK and create empty-body nohost function. * omp-offload.cc (execute_omp_device_lower): Handle IFN_GOMP_TARGET_REV. (pass_omp_target_link::execute): For ACCEL_COMPILER, don't nullify fn argument for reverse offload libgomp/ChangeLog: * libgomp.texi (OpenMP 5.0): Mark 'ancestor' as implemented but refer to 'requires'. * testsuite/libgomp.c-c++-common/reverse-offload-1-aux.c: New test. * testsuite/libgomp.c-c++-common/reverse-offload-1.c: New test. * testsuite/libgomp.fortran/reverse-offload-1-aux.f90: New test. * testsuite/libgomp.fortran/reverse-offload-1.f90: New test. gcc/testsuite/ChangeLog: * c-c++-common/gomp/reverse-offload-1.c: Remove dg-sorry. * c-c++-common/gomp/target-device-ancestor-4.c: Likewise. * gfortran.dg/gomp/target-device-ancestor-4.f90: Likewise. * gfortran.dg/gomp/target-device-ancestor-5.f90: Likewise. * c-c++-common/goacc/classify-kernels-parloops.c: Add 'noclone' to scan-tree-dump-times. * c-c++-common/goacc/classify-kernels-unparallelized-parloops.c: Likewise. * c-c++-common/goacc/classify-kernels-unparallelized.c: Likewise. * c-c++-common/goacc/classify-kernels.c: Likewise. * c-c++-common/goacc/classify-parallel.c: Likewise. * c-c++-common/goacc/classify-serial.c: Likewise. * c-c++-common/goacc/kernels-counter-vars-function-scope.c: Likewise. * c-c++-common/goacc/kernels-loop-2.c: Likewise. * c-c++-common/goacc/kernels-loop-3.c: Likewise. * c-c++-common/goacc/kernels-loop-data-2.c: Likewise. * c-c++-common/goacc/kernels-loop-data-enter-exit-2.c: Likewise. * c-c++-common/goacc/kernels-loop-data-enter-exit.c: Likewise. * c-c++-common/goacc/kernels-loop-data-update.c: Likewise. * c-c++-common/goacc/kernels-loop-data.c: Likewise. * c-c++-common/goacc/kernels-loop-g.c: Likewise. * c-c++-common/goacc/kernels-loop-mod-not-zero.c: Likewise. * c-c++-common/goacc/kernels-loop-n.c: Likewise. * c-c++-common/goacc/kernels-loop-nest.c: Likewise. * c-c++-common/goacc/kernels-loop.c: Likewise. * c-c++-common/goacc/kernels-one-counter-var.c: Likewise. * c-c++-common/goacc/kernels-parallel-loop-data-enter-exit.c: Likewise. * gfortran.dg/goacc/classify-kernels-parloops.f95: Likewise. * gfortran.dg/goacc/classify-kernels-unparallelized-parloops.f95: Likewise. * gfortran.dg/goacc/classify-kernels-unparallelized.f95: Likewise. * gfortran.dg/goacc/classify-kernels.f95: Likewise. * gfortran.dg/goacc/classify-parallel.f95: Likewise. * gfortran.dg/goacc/classify-serial.f95: Likewise. * gfortran.dg/goacc/kernels-loop-2.f95: Likewise. * gfortran.dg/goacc/kernels-loop-data-2.f95: Likewise. * gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95: Likewise. * gfortran.dg/goacc/kernels-loop-data-enter-exit.f95: Likewise. * gfortran.dg/goacc/kernels-loop-data-update.f95: Likewise. * gfortran.dg/goacc/kernels-loop-data.f95: Likewise. * gfortran.dg/goacc/kernels-loop-n.f95: Likewise. * gfortran.dg/goacc/kernels-loop.f95: Likewise. * gfortran.dg/goacc/kernels-parallel-loop-data-enter-exit.f95: Likewise.
This commit is contained in:
parent
0c2d6aa1be
commit
d6621a2f31
51 changed files with 433 additions and 73 deletions
|
@ -368,6 +368,14 @@ expand_GOMP_SIMT_VF (internal_fn, gcall *)
|
|||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
/* This should get expanded in omp_device_lower pass. */
|
||||
|
||||
static void
|
||||
expand_GOMP_TARGET_REV (internal_fn, gcall *)
|
||||
{
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
/* Lane index of the first SIMT lane that supplies a non-zero argument.
|
||||
This is a SIMT counterpart to GOMP_SIMD_LAST_LANE, used to represent the
|
||||
lane that executed the last iteration for handling OpenMP lastprivate. */
|
||||
|
|
|
@ -336,6 +336,7 @@ DEF_INTERNAL_INT_FN (FFS, ECF_CONST | ECF_NOTHROW, ffs, unary)
|
|||
DEF_INTERNAL_INT_FN (PARITY, ECF_CONST | ECF_NOTHROW, parity, unary)
|
||||
DEF_INTERNAL_INT_FN (POPCOUNT, ECF_CONST | ECF_NOTHROW, popcount, unary)
|
||||
|
||||
DEF_INTERNAL_FN (GOMP_TARGET_REV, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
|
||||
DEF_INTERNAL_FN (GOMP_USE_SIMT, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
|
||||
DEF_INTERNAL_FN (GOMP_SIMT_ENTER, ECF_LEAF | ECF_NOTHROW, NULL)
|
||||
DEF_INTERNAL_FN (GOMP_SIMT_ENTER_ALLOC, ECF_LEAF | ECF_NOTHROW, NULL)
|
||||
|
|
|
@ -430,6 +430,13 @@ lto_output_node (struct lto_simple_output_block *ob, struct cgraph_node *node,
|
|||
after reading back. */
|
||||
in_other_partition = 1;
|
||||
}
|
||||
else if (UNLIKELY (lto_stream_offload_p
|
||||
&& lookup_attribute ("omp target device_ancestor_host",
|
||||
DECL_ATTRIBUTES (node->decl))))
|
||||
/* This symbol is only used as argument to IFN_GOMP_TARGET_REV; this IFN
|
||||
is ignored on ACCEL_COMPILER. Thus, mark it as in_other_partition to silence
|
||||
verify_node_partition diagnostic. */
|
||||
in_other_partition = 1;
|
||||
|
||||
clone_of = node->clone_of;
|
||||
while (clone_of
|
||||
|
@ -1140,10 +1147,15 @@ verify_node_partition (symtab_node *node)
|
|||
if (node->in_other_partition)
|
||||
{
|
||||
if (TREE_CODE (node->decl) == FUNCTION_DECL)
|
||||
error_at (DECL_SOURCE_LOCATION (node->decl),
|
||||
"function %qs has been referenced in offloaded code but"
|
||||
" hasn%'t been marked to be included in the offloaded code",
|
||||
node->name ());
|
||||
{
|
||||
if (lookup_attribute ("omp target device_ancestor_host",
|
||||
DECL_ATTRIBUTES (node->decl)) != NULL)
|
||||
return;
|
||||
error_at (DECL_SOURCE_LOCATION (node->decl),
|
||||
"function %qs has been referenced in offloaded code but"
|
||||
" hasn%'t been marked to be included in the offloaded code",
|
||||
node->name ());
|
||||
}
|
||||
else if (VAR_P (node->decl))
|
||||
error_at (DECL_SOURCE_LOCATION (node->decl),
|
||||
"variable %qs has been referenced in offloaded code but"
|
||||
|
|
|
@ -9663,7 +9663,7 @@ expand_omp_target (struct omp_region *region)
|
|||
{
|
||||
basic_block entry_bb, exit_bb, new_bb;
|
||||
struct function *child_cfun;
|
||||
tree child_fn, block, t;
|
||||
tree child_fn, child_fn2, block, t, c;
|
||||
gimple_stmt_iterator gsi;
|
||||
gomp_target *entry_stmt;
|
||||
gimple *stmt;
|
||||
|
@ -9700,10 +9700,16 @@ expand_omp_target (struct omp_region *region)
|
|||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
child_fn = NULL_TREE;
|
||||
tree clauses = gimple_omp_target_clauses (entry_stmt);
|
||||
|
||||
bool is_ancestor = false;
|
||||
child_fn = child_fn2 = NULL_TREE;
|
||||
child_cfun = NULL;
|
||||
if (offloaded)
|
||||
{
|
||||
c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
|
||||
if (ENABLE_OFFLOADING && c)
|
||||
is_ancestor = OMP_CLAUSE_DEVICE_ANCESTOR (c);
|
||||
child_fn = gimple_omp_target_child_fn (entry_stmt);
|
||||
child_cfun = DECL_STRUCT_FUNCTION (child_fn);
|
||||
}
|
||||
|
@ -9891,7 +9897,8 @@ expand_omp_target (struct omp_region *region)
|
|||
{
|
||||
if (in_lto_p)
|
||||
DECL_PRESERVE_P (child_fn) = 1;
|
||||
vec_safe_push (offload_funcs, child_fn);
|
||||
if (!is_ancestor)
|
||||
vec_safe_push (offload_funcs, child_fn);
|
||||
}
|
||||
|
||||
bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
|
||||
|
@ -9930,11 +9937,88 @@ expand_omp_target (struct omp_region *region)
|
|||
}
|
||||
|
||||
adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
|
||||
|
||||
/* Handle the case that an inner ancestor:1 target is called by an outer
|
||||
target region. */
|
||||
if (!is_ancestor)
|
||||
cgraph_node::get (child_fn)->calls_declare_variant_alt
|
||||
|= cgraph_node::get (cfun->decl)->calls_declare_variant_alt;
|
||||
else /* Duplicate function to create empty nonhost variant. */
|
||||
{
|
||||
/* Enable pass_omp_device_lower pass. */
|
||||
cgraph_node::get (cfun->decl)->calls_declare_variant_alt = 1;
|
||||
cgraph_node *fn2_node;
|
||||
child_fn2 = build_decl (DECL_SOURCE_LOCATION (child_fn),
|
||||
FUNCTION_DECL,
|
||||
clone_function_name (child_fn, "nohost"),
|
||||
TREE_TYPE (child_fn));
|
||||
if (in_lto_p)
|
||||
DECL_PRESERVE_P (child_fn2) = 1;
|
||||
TREE_STATIC (child_fn2) = 1;
|
||||
DECL_ARTIFICIAL (child_fn2) = 1;
|
||||
DECL_IGNORED_P (child_fn2) = 0;
|
||||
TREE_PUBLIC (child_fn2) = 0;
|
||||
DECL_UNINLINABLE (child_fn2) = 1;
|
||||
DECL_EXTERNAL (child_fn2) = 0;
|
||||
DECL_CONTEXT (child_fn2) = NULL_TREE;
|
||||
DECL_INITIAL (child_fn2) = make_node (BLOCK);
|
||||
BLOCK_SUPERCONTEXT (DECL_INITIAL (child_fn2)) = child_fn2;
|
||||
DECL_ATTRIBUTES (child_fn)
|
||||
= remove_attribute ("omp target entrypoint",
|
||||
DECL_ATTRIBUTES (child_fn));
|
||||
DECL_ATTRIBUTES (child_fn2)
|
||||
= tree_cons (get_identifier ("omp target device_ancestor_nohost"),
|
||||
NULL_TREE, copy_list (DECL_ATTRIBUTES (child_fn)));
|
||||
DECL_ATTRIBUTES (child_fn)
|
||||
= tree_cons (get_identifier ("omp target device_ancestor_host"),
|
||||
NULL_TREE, DECL_ATTRIBUTES (child_fn));
|
||||
DECL_FUNCTION_SPECIFIC_OPTIMIZATION (child_fn2)
|
||||
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (current_function_decl);
|
||||
DECL_FUNCTION_SPECIFIC_TARGET (child_fn2)
|
||||
= DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
|
||||
DECL_FUNCTION_VERSIONED (child_fn2)
|
||||
= DECL_FUNCTION_VERSIONED (current_function_decl);
|
||||
|
||||
fn2_node = cgraph_node::get_create (child_fn2);
|
||||
fn2_node->offloadable = 1;
|
||||
fn2_node->force_output = 1;
|
||||
node->offloadable = 0;
|
||||
|
||||
t = build_decl (DECL_SOURCE_LOCATION (child_fn),
|
||||
RESULT_DECL, NULL_TREE, void_type_node);
|
||||
DECL_ARTIFICIAL (t) = 1;
|
||||
DECL_IGNORED_P (t) = 1;
|
||||
DECL_CONTEXT (t) = child_fn2;
|
||||
DECL_RESULT (child_fn2) = t;
|
||||
DECL_SAVED_TREE (child_fn2) = build1 (RETURN_EXPR,
|
||||
void_type_node, NULL);
|
||||
tree tmp = DECL_ARGUMENTS (child_fn);
|
||||
t = build_decl (DECL_SOURCE_LOCATION (child_fn), PARM_DECL,
|
||||
DECL_NAME (tmp), TREE_TYPE (tmp));
|
||||
DECL_ARTIFICIAL (t) = 1;
|
||||
DECL_NAMELESS (t) = 1;
|
||||
DECL_ARG_TYPE (t) = ptr_type_node;
|
||||
DECL_CONTEXT (t) = current_function_decl;
|
||||
TREE_USED (t) = 1;
|
||||
TREE_READONLY (t) = 1;
|
||||
DECL_ARGUMENTS (child_fn2) = t;
|
||||
gcc_assert (TREE_CHAIN (tmp) == NULL_TREE);
|
||||
|
||||
gimplify_function_tree (child_fn2);
|
||||
cgraph_node::add_new_function (child_fn2, true);
|
||||
|
||||
vec_safe_push (offload_funcs, child_fn2);
|
||||
if (dump_file && !gimple_in_ssa_p (cfun))
|
||||
{
|
||||
dump_function_header (dump_file, child_fn2, dump_flags);
|
||||
dump_function_to_file (child_fn2, dump_file, dump_flags);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Emit a library call to launch the offloading region, or do data
|
||||
transfers. */
|
||||
tree t1, t2, t3, t4, depend, c, clauses;
|
||||
tree t1, t2, t3, t4, depend;
|
||||
enum built_in_function start_ix;
|
||||
unsigned int flags_i = 0;
|
||||
|
||||
|
@ -9984,8 +10068,6 @@ expand_omp_target (struct omp_region *region)
|
|||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
clauses = gimple_omp_target_clauses (entry_stmt);
|
||||
|
||||
tree device = NULL_TREE;
|
||||
location_t device_loc = UNKNOWN_LOCATION;
|
||||
tree goacc_flags = NULL_TREE;
|
||||
|
@ -10017,7 +10099,8 @@ expand_omp_target (struct omp_region *region)
|
|||
need_device_adjustment = true;
|
||||
device_loc = OMP_CLAUSE_LOCATION (c);
|
||||
if (OMP_CLAUSE_DEVICE_ANCESTOR (c))
|
||||
sorry_at (device_loc, "%<ancestor%> not yet supported");
|
||||
device = build_int_cst (integer_type_node,
|
||||
GOMP_DEVICE_HOST_FALLBACK);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -10194,7 +10277,7 @@ expand_omp_target (struct omp_region *region)
|
|||
else
|
||||
args.quick_push (device);
|
||||
if (offloaded)
|
||||
args.quick_push (build_fold_addr_expr (child_fn));
|
||||
args.quick_push (build_fold_addr_expr (child_fn2 ? child_fn2 : child_fn));
|
||||
args.quick_push (t1);
|
||||
args.quick_push (t2);
|
||||
args.quick_push (t3);
|
||||
|
@ -10316,6 +10399,14 @@ expand_omp_target (struct omp_region *region)
|
|||
/* Push terminal marker - zero. */
|
||||
args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
|
||||
|
||||
if (child_fn2)
|
||||
{
|
||||
g = gimple_build_call_internal (IFN_GOMP_TARGET_REV, 1,
|
||||
build_fold_addr_expr (child_fn));
|
||||
gimple_set_location (g, gimple_location (entry_stmt));
|
||||
gsi_insert_before (&gsi, g, GSI_SAME_STMT);
|
||||
}
|
||||
|
||||
g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
|
||||
gimple_set_location (g, gimple_location (entry_stmt));
|
||||
gsi_insert_before (&gsi, g, GSI_SAME_STMT);
|
||||
|
|
|
@ -2101,6 +2101,11 @@ create_omp_child_function (omp_context *ctx, bool task_copy)
|
|||
else
|
||||
target_attr = NULL;
|
||||
}
|
||||
if (target_attr
|
||||
&& is_gimple_omp_offloaded (ctx->stmt)
|
||||
&& lookup_attribute ("noclone", DECL_ATTRIBUTES (decl)) == NULL_TREE)
|
||||
DECL_ATTRIBUTES (decl) = tree_cons (get_identifier ("noclone"),
|
||||
NULL_TREE, DECL_ATTRIBUTES (decl));
|
||||
if (target_attr)
|
||||
DECL_ATTRIBUTES (decl)
|
||||
= tree_cons (get_identifier (target_attr),
|
||||
|
|
|
@ -2627,6 +2627,47 @@ execute_omp_device_lower ()
|
|||
tree type = lhs ? TREE_TYPE (lhs) : integer_type_node;
|
||||
switch (gimple_call_internal_fn (stmt))
|
||||
{
|
||||
case IFN_GOMP_TARGET_REV:
|
||||
{
|
||||
#ifndef ACCEL_COMPILER
|
||||
gimple_stmt_iterator gsi2 = gsi;
|
||||
gsi_next (&gsi2);
|
||||
gcc_assert (!gsi_end_p (gsi2));
|
||||
gcc_assert (gimple_call_builtin_p (gsi_stmt (gsi2),
|
||||
BUILT_IN_GOMP_TARGET));
|
||||
tree old_decl
|
||||
= TREE_OPERAND (gimple_call_arg (gsi_stmt (gsi2), 1), 0);
|
||||
tree new_decl = gimple_call_arg (gsi_stmt (gsi), 0);
|
||||
gimple_call_set_arg (gsi_stmt (gsi2), 1, new_decl);
|
||||
update_stmt (gsi_stmt (gsi2));
|
||||
new_decl = TREE_OPERAND (new_decl, 0);
|
||||
unsigned i;
|
||||
unsigned num_funcs = vec_safe_length (offload_funcs);
|
||||
for (i = 0; i < num_funcs; i++)
|
||||
{
|
||||
if ((*offload_funcs)[i] == old_decl)
|
||||
{
|
||||
(*offload_funcs)[i] = new_decl;
|
||||
break;
|
||||
}
|
||||
else if ((*offload_funcs)[i] == new_decl)
|
||||
break; /* This can happen due to inlining. */
|
||||
}
|
||||
gcc_assert (i < num_funcs);
|
||||
#else
|
||||
tree old_decl = TREE_OPERAND (gimple_call_arg (gsi_stmt (gsi), 0),
|
||||
0);
|
||||
#endif
|
||||
/* FIXME: Find a way to actually prevent outputting the empty-body
|
||||
old_decl as debug symbol + function in the assembly file. */
|
||||
cgraph_node *node = cgraph_node::get (old_decl);
|
||||
node->address_taken = false;
|
||||
node->need_lto_streaming = false;
|
||||
node->offloadable = false;
|
||||
|
||||
unlink_stmt_vdef (stmt);
|
||||
}
|
||||
break;
|
||||
case IFN_GOMP_USE_SIMT:
|
||||
rhs = vf == 1 ? integer_zero_node : integer_one_node;
|
||||
break;
|
||||
|
@ -2803,6 +2844,15 @@ pass_omp_target_link::execute (function *fun)
|
|||
{
|
||||
if (gimple_call_builtin_p (gsi_stmt (gsi), BUILT_IN_GOMP_TARGET))
|
||||
{
|
||||
tree dev = gimple_call_arg (gsi_stmt (gsi), 0);
|
||||
tree fn = gimple_call_arg (gsi_stmt (gsi), 1);
|
||||
if (POINTER_TYPE_P (TREE_TYPE (fn)))
|
||||
fn = TREE_OPERAND (fn, 0);
|
||||
if (TREE_CODE (dev) == INTEGER_CST
|
||||
&& wi::to_wide (dev) == GOMP_DEVICE_HOST_FALLBACK
|
||||
&& lookup_attribute ("omp target device_ancestor_nohost",
|
||||
DECL_ATTRIBUTES (fn)) != NULL_TREE)
|
||||
continue; /* ancestor:1 */
|
||||
/* Nullify the second argument of __builtin_GOMP_target_ext. */
|
||||
gimple_call_set_arg (gsi_stmt (gsi), 1, null_pointer_node);
|
||||
update_stmt (gsi_stmt (gsi));
|
||||
|
|
|
@ -27,16 +27,16 @@ void KERNELS ()
|
|||
}
|
||||
|
||||
/* Check the offloaded function's attributes.
|
||||
{ dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels, omp target entrypoint\\)\\)" 1 "ompexp" } } */
|
||||
{ dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "ompexp" } } */
|
||||
|
||||
/* Check that exactly one OpenACC kernels construct is analyzed, and that it
|
||||
can be parallelized.
|
||||
{ dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } }
|
||||
{ dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
|
||||
{ dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "parloops1" } }
|
||||
{ dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
|
||||
|
||||
/* Check the offloaded function's classification and compute dimensions (will
|
||||
always be 1 x 1 x 1 for non-offloading compilation).
|
||||
{ dg-final { scan-tree-dump-times "(?n)Function is parallelized OpenACC kernels offload" 1 "oaccloops" } }
|
||||
{ dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccloops" } }
|
||||
{ dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccloops" } } */
|
||||
{ dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "oaccloops" } } */
|
||||
|
|
|
@ -31,16 +31,16 @@ void KERNELS ()
|
|||
}
|
||||
|
||||
/* Check the offloaded function's attributes.
|
||||
{ dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels, omp target entrypoint\\)\\)" 1 "ompexp" } } */
|
||||
{ dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "ompexp" } } */
|
||||
|
||||
/* Check that exactly one OpenACC kernels construct is analyzed, and that it
|
||||
can't be parallelized.
|
||||
{ dg-final { scan-tree-dump-times "FAILED:" 1 "parloops1" } }
|
||||
{ dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
|
||||
{ dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "parloops1" } }
|
||||
{ dg-final { scan-tree-dump-not "SUCCESS: may be parallelized" "parloops1" } } */
|
||||
|
||||
/* Check the offloaded function's classification and compute dimensions (will
|
||||
always be 1 x 1 x 1 for non-offloading compilation).
|
||||
{ dg-final { scan-tree-dump-times "(?n)Function is unparallelized OpenACC kernels offload" 1 "oaccloops" } }
|
||||
{ dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccloops" } }
|
||||
{ dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccloops" } } */
|
||||
{ dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "oaccloops" } } */
|
||||
|
|
|
@ -40,16 +40,16 @@ void KERNELS ()
|
|||
}
|
||||
|
||||
/* Check the offloaded function's attributes.
|
||||
{ dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels, omp target entrypoint\\)\\)" 1 "ompexp" } } */
|
||||
{ dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "ompexp" } } */
|
||||
|
||||
/* Check that exactly one OpenACC kernels construct is analyzed, and that it
|
||||
can't be parallelized.
|
||||
{ dg-final { scan-tree-dump-times "FAILED:" 1 "parloops1" } }
|
||||
{ dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
|
||||
{ dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "parloops1" } }
|
||||
{ dg-final { scan-tree-dump-not "SUCCESS: may be parallelized" "parloops1" } } */
|
||||
|
||||
/* Check the offloaded function's classification and compute dimensions (will
|
||||
always be 1 x 1 x 1 for non-offloading compilation).
|
||||
{ dg-final { scan-tree-dump-times "(?n)Function is unparallelized OpenACC kernels offload" 1 "oaccloops" } }
|
||||
{ dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccloops" } }
|
||||
{ dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccloops" } } */
|
||||
{ dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "oaccloops" } } */
|
||||
|
|
|
@ -36,16 +36,16 @@ void KERNELS ()
|
|||
}
|
||||
|
||||
/* Check the offloaded function's attributes.
|
||||
{ dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels, omp target entrypoint\\)\\)" 1 "ompexp" } } */
|
||||
{ dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "ompexp" } } */
|
||||
|
||||
/* Check that exactly one OpenACC kernels construct is analyzed, and that it
|
||||
can be parallelized.
|
||||
{ dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } }
|
||||
{ dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
|
||||
{ dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "parloops1" } }
|
||||
{ dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
|
||||
|
||||
/* Check the offloaded function's classification and compute dimensions (will
|
||||
always be 1 x 1 x 1 for non-offloading compilation).
|
||||
{ dg-final { scan-tree-dump-times "(?n)Function is parallelized OpenACC kernels offload" 1 "oaccloops" } }
|
||||
{ dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccloops" } }
|
||||
{ dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccloops" } } */
|
||||
{ dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "oaccloops" } } */
|
||||
|
|
|
@ -24,10 +24,10 @@ void PARALLEL ()
|
|||
}
|
||||
|
||||
/* Check the offloaded function's attributes.
|
||||
{ dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc parallel, omp target entrypoint\\)\\)" 1 "ompexp" } } */
|
||||
{ dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc parallel, omp target entrypoint, noclone\\)\\)" 1 "ompexp" } } */
|
||||
|
||||
/* Check the offloaded function's classification and compute dimensions (will
|
||||
always be 1 x 1 x 1 for non-offloading compilation).
|
||||
{ dg-final { scan-tree-dump-times "(?n)Function is OpenACC parallel offload" 1 "oaccloops" } }
|
||||
{ dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccloops" } }
|
||||
{ dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc parallel, omp target entrypoint\\)\\)" 1 "oaccloops" } } */
|
||||
{ dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc parallel, omp target entrypoint, noclone\\)\\)" 1 "oaccloops" } } */
|
||||
|
|
|
@ -29,10 +29,10 @@ void SERIAL ()
|
|||
}
|
||||
|
||||
/* Check the offloaded function's attributes.
|
||||
{ dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc serial, omp target entrypoint\\)\\)" 1 "ompexp" } } */
|
||||
{ dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc serial, omp target entrypoint, noclone\\)\\)" 1 "ompexp" } } */
|
||||
|
||||
/* Check the offloaded function's classification and compute dimensions (will
|
||||
always be 1 x 1 x 1 for non-offloading compilation).
|
||||
{ dg-final { scan-tree-dump-times "(?n)Function is OpenACC serial offload" 1 "oaccloops" } }
|
||||
{ dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccloops" } }
|
||||
{ dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc serial, omp target entrypoint\\)\\)" 1 "oaccloops" } } */
|
||||
{ dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc serial, omp target entrypoint, noclone\\)\\)" 1 "oaccloops" } } */
|
||||
|
|
|
@ -45,7 +45,7 @@ main (void)
|
|||
|
||||
/* Check that only one loop is analyzed, and that it can be parallelized. */
|
||||
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
|
||||
|
||||
/* Check that the loop has been split off into a function. */
|
||||
|
|
|
@ -59,7 +59,7 @@ main (void)
|
|||
/* Check that only three loops are analyzed, and that all can be
|
||||
parallelized. */
|
||||
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 3 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
|
||||
|
||||
/* Check that the loop has been split off into a function. */
|
||||
|
|
|
@ -39,7 +39,7 @@ main (void)
|
|||
|
||||
/* Check that only one loop is analyzed, and that it can be parallelized. */
|
||||
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
|
||||
|
||||
/* Check that the loop has been split off into a function. */
|
||||
|
|
|
@ -59,7 +59,7 @@ main (void)
|
|||
/* Check that only three loops are analyzed, and that all can be
|
||||
parallelized. */
|
||||
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 3 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
|
||||
|
||||
/* Check that the loop has been split off into a function. */
|
||||
|
|
|
@ -57,7 +57,7 @@ main (void)
|
|||
/* Check that only three loops are analyzed, and that all can be
|
||||
parallelized. */
|
||||
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 3 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
|
||||
|
||||
/* Check that the loop has been split off into a function. */
|
||||
|
|
|
@ -54,7 +54,7 @@ main (void)
|
|||
/* Check that only three loops are analyzed, and that all can be
|
||||
parallelized. */
|
||||
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 3 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
|
||||
|
||||
/* Check that the loop has been split off into a function. */
|
||||
|
|
|
@ -55,7 +55,7 @@ main (void)
|
|||
/* Check that only two loops are analyzed, and that both can be
|
||||
parallelized. */
|
||||
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 2 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 2 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
|
||||
|
||||
/* Check that the loop has been split off into a function. */
|
||||
|
|
|
@ -53,7 +53,7 @@ main (void)
|
|||
/* Check that only three loops are analyzed, and that all can be
|
||||
parallelized. */
|
||||
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 3 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
|
||||
|
||||
/* Check that the loop has been split off into a function. */
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
|
||||
/* Check that only one loop is analyzed, and that it can be parallelized. */
|
||||
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
|
||||
|
||||
/* Check that the loop has been split off into a function. */
|
||||
|
|
|
@ -43,7 +43,7 @@ main (void)
|
|||
|
||||
/* Check that only one loop is analyzed, and that it can be parallelized. */
|
||||
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
|
||||
|
||||
/* Check that the loop has been split off into a function. */
|
||||
|
|
|
@ -46,7 +46,7 @@ foo (COUNTERTYPE n)
|
|||
|
||||
/* Check that only one loop is analyzed, and that it can be parallelized. */
|
||||
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
|
||||
|
||||
/* Check that the loop has been split off into a function. */
|
||||
|
|
|
@ -30,7 +30,7 @@ main (void)
|
|||
|
||||
/* Check that only one loop is analyzed, and that it can be parallelized. */
|
||||
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
|
||||
|
||||
/* Check that the loop has been split off into a function. */
|
||||
|
|
|
@ -46,7 +46,7 @@ main (void)
|
|||
|
||||
/* Check that only one loop is analyzed, and that it can be parallelized. */
|
||||
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
|
||||
|
||||
/* Check that the loop has been split off into a function. */
|
||||
|
|
|
@ -44,7 +44,7 @@ main (void)
|
|||
|
||||
/* Check that only one loop is analyzed, and that it can be parallelized. */
|
||||
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
|
||||
|
||||
/* Check that the loop has been split off into a function. */
|
||||
|
|
|
@ -57,7 +57,7 @@ main (void)
|
|||
// FIXME: OpenACC kernels stopped working with the firstprivate subarray
|
||||
// changes.
|
||||
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops1" { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 2 "parloops1" { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 2 "parloops1" { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" { xfail *-*-* } } } */
|
||||
|
||||
/* Check that the loop has been split off into a function. */
|
||||
|
|
|
@ -43,7 +43,7 @@ tg_fn (int *x, int *y)
|
|||
x2 = x2 + 2 + called_in_target1 ();
|
||||
y2 = y2 + 7;
|
||||
|
||||
#pragma omp target device(ancestor : 1) map(tofrom: x2) /* { dg-message "sorry, unimplemented: 'ancestor' not yet supported" } */
|
||||
#pragma omp target device(ancestor : 1) map(tofrom: x2)
|
||||
check_offload(&x2, &y2);
|
||||
|
||||
if (x2 != 2+2+3+42 || y2 != 3 + 7)
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
void
|
||||
foo (void)
|
||||
{
|
||||
#pragma omp target device (ancestor: 1) /* { dg-message "sorry, unimplemented: 'ancestor' not yet supported" } */
|
||||
#pragma omp target device (ancestor: 1)
|
||||
;
|
||||
|
||||
}
|
||||
|
|
|
@ -29,16 +29,16 @@ program main
|
|||
end program main
|
||||
|
||||
! Check the offloaded function's attributes.
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels, omp target entrypoint\\)\\)" 1 "ompexp" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "ompexp" } }
|
||||
|
||||
! Check that exactly one OpenACC kernels construct is analyzed, and that it
|
||||
! can be parallelized.
|
||||
! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "parloops1" } }
|
||||
! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
|
||||
|
||||
! Check the offloaded function's classification and compute dimensions (will
|
||||
! always be 1 x 1 x 1 for non-offloading compilation).
|
||||
! { dg-final { scan-tree-dump-times "(?n)Function is parallelized OpenACC kernels offload" 1 "oaccloops" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccloops" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccloops" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "oaccloops" } }
|
||||
|
|
|
@ -33,16 +33,16 @@ program main
|
|||
end program main
|
||||
|
||||
! Check the offloaded function's attributes.
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels, omp target entrypoint\\)\\)" 1 "ompexp" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "ompexp" } }
|
||||
|
||||
! Check that exactly one OpenACC kernels construct is analyzed, and that it
|
||||
! can't be parallelized.
|
||||
! { dg-final { scan-tree-dump-times "FAILED:" 1 "parloops1" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "parloops1" } }
|
||||
! { dg-final { scan-tree-dump-not "SUCCESS: may be parallelized" "parloops1" } }
|
||||
|
||||
! Check the offloaded function's classification and compute dimensions (will
|
||||
! always be 1 x 1 x 1 for non-offloading compilation).
|
||||
! { dg-final { scan-tree-dump-times "(?n)Function is unparallelized OpenACC kernels offload" 1 "oaccloops" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccloops" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccloops" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "oaccloops" } }
|
||||
|
|
|
@ -34,16 +34,16 @@ program main
|
|||
end program main
|
||||
|
||||
! Check the offloaded function's attributes.
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels, omp target entrypoint\\)\\)" 1 "ompexp" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "ompexp" } }
|
||||
|
||||
! Check that exactly one OpenACC kernels construct is analyzed, and that it
|
||||
! can't be parallelized.
|
||||
! { dg-final { scan-tree-dump-times "FAILED:" 1 "parloops1" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "parloops1" } }
|
||||
! { dg-final { scan-tree-dump-not "SUCCESS: may be parallelized" "parloops1" } }
|
||||
|
||||
! Check the offloaded function's classification and compute dimensions (will
|
||||
! always be 1 x 1 x 1 for non-offloading compilation).
|
||||
! { dg-final { scan-tree-dump-times "(?n)Function is unparallelized OpenACC kernels offload" 1 "oaccloops" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccloops" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccloops" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "oaccloops" } }
|
||||
|
|
|
@ -32,16 +32,16 @@ program main
|
|||
end program main
|
||||
|
||||
! Check the offloaded function's attributes.
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels, omp target entrypoint\\)\\)" 1 "ompexp" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "ompexp" } }
|
||||
|
||||
! Check that exactly one OpenACC kernels construct is analyzed, and that it
|
||||
! can be parallelized.
|
||||
! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "parloops1" } }
|
||||
! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
|
||||
|
||||
! Check the offloaded function's classification and compute dimensions (will
|
||||
! always be 1 x 1 x 1 for non-offloading compilation).
|
||||
! { dg-final { scan-tree-dump-times "(?n)Function is parallelized OpenACC kernels offload" 1 "oaccloops" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccloops" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccloops" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "oaccloops" } }
|
||||
|
|
|
@ -26,10 +26,10 @@ program main
|
|||
end program main
|
||||
|
||||
! Check the offloaded function's attributes.
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc parallel, omp target entrypoint\\)\\)" 1 "ompexp" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc parallel, omp target entrypoint, noclone\\)\\)" 1 "ompexp" } }
|
||||
|
||||
! Check the offloaded function's classification and compute dimensions (will
|
||||
! always be 1 x 1 x 1 for non-offloading compilation).
|
||||
! { dg-final { scan-tree-dump-times "(?n)Function is OpenACC parallel offload" 1 "oaccloops" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccloops" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc parallel, omp target entrypoint\\)\\)" 1 "oaccloops" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc parallel, omp target entrypoint, noclone\\)\\)" 1 "oaccloops" } }
|
||||
|
|
|
@ -29,10 +29,10 @@ program main
|
|||
end program main
|
||||
|
||||
! Check the offloaded function's attributes.
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc serial, omp target entrypoint\\)\\)" 1 "ompexp" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc serial, omp target entrypoint, noclone\\)\\)" 1 "ompexp" } }
|
||||
|
||||
! Check the offloaded function's classification and compute dimensions (will
|
||||
! always be 1 x 1 x 1 for non-offloading compilation).
|
||||
! { dg-final { scan-tree-dump-times "(?n)Function is OpenACC serial offload" 1 "oaccloops" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccloops" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc serial, omp target entrypoint\\)\\)" 1 "oaccloops" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc serial, omp target entrypoint, noclone\\)\\)" 1 "oaccloops" } }
|
||||
|
|
|
@ -34,7 +34,7 @@ end program main
|
|||
|
||||
! Check that only three loops are analyzed, and that all can be parallelized.
|
||||
! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 3 "parloops1" } }
|
||||
! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
|
||||
|
||||
! Check that the loop has been split off into a function.
|
||||
|
|
|
@ -40,7 +40,7 @@ end program main
|
|||
|
||||
! Check that only three loops are analyzed, and that all can be parallelized.
|
||||
! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 3 "parloops1" } }
|
||||
! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
|
||||
|
||||
! Check that the loop has been split off into a function.
|
||||
|
|
|
@ -40,7 +40,7 @@ end program main
|
|||
|
||||
! Check that only three loops are analyzed, and that all can be parallelized.
|
||||
! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 3 "parloops1" } }
|
||||
! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
|
||||
|
||||
! Check that the loop has been split off into a function.
|
||||
|
|
|
@ -38,7 +38,7 @@ end program main
|
|||
|
||||
! Check that only three loops are analyzed, and that all can be parallelized.
|
||||
! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 3 "parloops1" } }
|
||||
! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
|
||||
|
||||
! Check that the loop has been split off into a function.
|
||||
|
|
|
@ -38,7 +38,7 @@ end program main
|
|||
|
||||
! Check that only three loops are analyzed, and that all can be parallelized.
|
||||
! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops1" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 2 "parloops1" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 2 "parloops1" } }
|
||||
! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
|
||||
|
||||
! Check that the loop has been split off into a function.
|
||||
|
|
|
@ -38,7 +38,7 @@ end program main
|
|||
|
||||
! Check that only three loops are analyzed, and that all can be parallelized.
|
||||
! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 3 "parloops1" } }
|
||||
! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
|
||||
|
||||
! Check that the loop has been split off into a function.
|
||||
|
|
|
@ -33,7 +33,7 @@ end module test
|
|||
! Check that only one loop is analyzed, and that it can be parallelized.
|
||||
! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } }
|
||||
! TODO, PR70545.
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" { xfail *-*-* } } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "parloops1" { xfail *-*-* } } }
|
||||
! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
|
||||
|
||||
! Check that the loop has been split off into a function.
|
||||
|
|
|
@ -30,7 +30,7 @@ end program main
|
|||
|
||||
! Check that only one loop is analyzed, and that it can be parallelized.
|
||||
! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "parloops1" } }
|
||||
! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
|
||||
|
||||
! Check that the loop has been split off into a function.
|
||||
|
|
|
@ -39,7 +39,7 @@ end program main
|
|||
|
||||
! Check that only three loops are analyzed, and that all can be parallelized.
|
||||
! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops1" { xfail *-*-* } } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 2 "parloops1" { xfail *-*-* } } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 2 "parloops1" { xfail *-*-* } } }
|
||||
! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" { xfail *-*-* } } }
|
||||
|
||||
! Check that the loop has been split off into a function.
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
!$omp requires reverse_offload
|
||||
|
||||
!$omp target device (ancestor : 1) ! { dg-message "sorry, unimplemented: 'ancestor' not yet supported" }
|
||||
!$omp target device (ancestor : 1)
|
||||
!$omp end target
|
||||
|
||||
end
|
||||
|
|
|
@ -17,7 +17,7 @@ contains
|
|||
block
|
||||
block
|
||||
block
|
||||
!$omp target device(ancestor:1) ! { dg-message "sorry, unimplemented: 'ancestor' not yet supported" }
|
||||
!$omp target device(ancestor:1)
|
||||
!$omp end target
|
||||
end block
|
||||
end block
|
||||
|
|
|
@ -225,7 +225,7 @@ The OpenMP 4.5 specification is fully supported.
|
|||
@item @code{allocate} clause @tab P @tab Initial support
|
||||
@item @code{use_device_addr} clause on @code{target data} @tab Y @tab
|
||||
@item @code{ancestor} modifier on @code{device} clause
|
||||
@tab P @tab Reverse offload unsupported
|
||||
@tab Y @tab See comment for @code{requires}
|
||||
@item Implicit declare target directive @tab Y @tab
|
||||
@item Discontiguous array section with @code{target update} construct
|
||||
@tab N @tab
|
||||
|
|
|
@ -0,0 +1,10 @@
|
|||
/* { dg-do compile { target skip-all-targets } } */
|
||||
|
||||
/* Declare the following function in a separare translation unit
|
||||
to ensure it won't have a device version. */
|
||||
|
||||
int
|
||||
add_3 (int x)
|
||||
{
|
||||
return x + 3;
|
||||
}
|
83
libgomp/testsuite/libgomp.c-c++-common/reverse-offload-1.c
Normal file
83
libgomp/testsuite/libgomp.c-c++-common/reverse-offload-1.c
Normal file
|
@ -0,0 +1,83 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-additional-sources reverse-offload-1-aux.c } */
|
||||
|
||||
/* Check that reverse offload works in particular:
|
||||
- no code is generated on the device side (i.e. no
|
||||
implicit declare target of called functions and no
|
||||
code gen for the target-region body)
|
||||
-> would otherwise fail due to 'add_3' symbol
|
||||
- Plus the usual (compiles, runs, produces correct result)
|
||||
|
||||
Note: Running also the non-reverse-offload target regions
|
||||
on the host (host fallback) is valid and will pass. */
|
||||
|
||||
#pragma omp requires reverse_offload
|
||||
|
||||
extern int add_3 (int);
|
||||
|
||||
static int global_var = 5;
|
||||
|
||||
void
|
||||
check_offload (int *x, int *y)
|
||||
{
|
||||
*x = add_3 (*x);
|
||||
*y = add_3 (*y);
|
||||
}
|
||||
|
||||
#pragma omp declare target
|
||||
void
|
||||
tg_fn (int *x, int *y)
|
||||
{
|
||||
int x2 = *x, y2 = *y;
|
||||
if (x2 != 2 || y2 != 3)
|
||||
__builtin_abort ();
|
||||
x2 = x2 + 2;
|
||||
y2 = y2 + 7;
|
||||
|
||||
#pragma omp target device(ancestor : 1) map(tofrom: x2)
|
||||
check_offload(&x2, &y2);
|
||||
|
||||
if (x2 != 2+2+3 || y2 != 3 + 7)
|
||||
__builtin_abort ();
|
||||
*x = x2, *y = y2;
|
||||
}
|
||||
#pragma omp end declare target
|
||||
|
||||
void
|
||||
my_func (int *x, int *y)
|
||||
{
|
||||
if (global_var != 5)
|
||||
__builtin_abort ();
|
||||
global_var = 242;
|
||||
*x = 2*add_3(*x);
|
||||
*y = 3*add_3(*y);
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
#pragma omp target
|
||||
{
|
||||
int x = 2, y = 3;
|
||||
tg_fn (&x, &y);
|
||||
}
|
||||
|
||||
#pragma omp target
|
||||
{
|
||||
int x = -2, y = -1;
|
||||
#pragma omp target device ( ancestor:1 ) firstprivate(y) map(tofrom:x)
|
||||
{
|
||||
if (x != -2 || y != -1)
|
||||
__builtin_abort ();
|
||||
my_func (&x, &y);
|
||||
if (x != 2*(3-2) || y != 3*(3-1))
|
||||
__builtin_abort ();
|
||||
}
|
||||
if (x != 2*(3-2) || y != -1)
|
||||
__builtin_abort ();
|
||||
}
|
||||
|
||||
if (global_var != 242)
|
||||
__builtin_abort ();
|
||||
return 0;
|
||||
}
|
12
libgomp/testsuite/libgomp.fortran/reverse-offload-1-aux.f90
Normal file
12
libgomp/testsuite/libgomp.fortran/reverse-offload-1-aux.f90
Normal file
|
@ -0,0 +1,12 @@
|
|||
! { dg-do compile { target skip-all-targets } }
|
||||
|
||||
! Declare the following function in a separare translation unit
|
||||
! to ensure it won't have a device version.
|
||||
|
||||
|
||||
integer function add_3 (x)
|
||||
implicit none
|
||||
integer, value :: x
|
||||
|
||||
add_3 = x + 3
|
||||
end function
|
88
libgomp/testsuite/libgomp.fortran/reverse-offload-1.f90
Normal file
88
libgomp/testsuite/libgomp.fortran/reverse-offload-1.f90
Normal file
|
@ -0,0 +1,88 @@
|
|||
! { dg-do run }
|
||||
! { dg-additional-sources reverse-offload-1-aux.f90 }
|
||||
|
||||
! Check that reverse offload works in particular:
|
||||
! - no code is generated on the device side (i.e. no
|
||||
! implicit declare target of called functions and no
|
||||
! code gen for the target-region body)
|
||||
! -> would otherwise fail due to 'add_3' symbol
|
||||
! - Plus the usual (compiles, runs, produces correct result)
|
||||
|
||||
! Note: Running also the non-reverse-offload target regions
|
||||
! on the host (host fallback) is valid and will pass.
|
||||
|
||||
module m
|
||||
interface
|
||||
integer function add_3 (x)
|
||||
implicit none
|
||||
integer, value :: x
|
||||
end function
|
||||
end interface
|
||||
integer :: global_var = 5
|
||||
end module m
|
||||
|
||||
module m2
|
||||
use m
|
||||
!$omp requires reverse_offload
|
||||
implicit none (type, external)
|
||||
contains
|
||||
subroutine check_offload (x, y)
|
||||
integer :: x, y
|
||||
x = add_3(x)
|
||||
y = add_3(y)
|
||||
end subroutine check_offload
|
||||
subroutine m2_tg_fn(x, y)
|
||||
integer :: x, y
|
||||
!$omp declare target
|
||||
if (x /= 2 .or. y /= 3) stop 1
|
||||
x = x + 2
|
||||
y = y + 7
|
||||
!$omp target device(ancestor : 1) map(tofrom: x)
|
||||
call check_offload(x, y)
|
||||
!$omp end target
|
||||
if (x /= 2+2+3 .or. y /= 3 + 7) stop 2
|
||||
end subroutine
|
||||
end module m2
|
||||
|
||||
program main
|
||||
use m
|
||||
!$omp requires reverse_offload
|
||||
implicit none (type, external)
|
||||
|
||||
integer :: prog_var = 99
|
||||
|
||||
!$omp target
|
||||
block
|
||||
use m2
|
||||
integer :: x, y
|
||||
x = 2; y = 3
|
||||
call m2_tg_fn (x, y)
|
||||
end block
|
||||
|
||||
!$omp target
|
||||
block
|
||||
use m2
|
||||
integer :: x, y
|
||||
x = -2; y = -1
|
||||
!$omp target device ( ancestor:1 ) firstprivate(y) map(tofrom:x)
|
||||
if (x /= -2 .or. y /= -1) stop 3
|
||||
call my_func (x, y)
|
||||
if (x /= 2*(3-2) .or. y /= 3*(3-1)) stop 5
|
||||
!$omp end target
|
||||
if (x /= 2*(3-2) .or. y /= -1) stop 6
|
||||
end block
|
||||
|
||||
if (prog_var /= 41 .or. global_var /= 242) stop 7
|
||||
|
||||
contains
|
||||
|
||||
subroutine my_func(x, y)
|
||||
integer :: x, y
|
||||
if (prog_var /= 99) stop 8
|
||||
if (global_var /= 5) stop 9
|
||||
prog_var = 41
|
||||
global_var = 242
|
||||
x = 2*add_3(x)
|
||||
y = 3*add_3(y)
|
||||
end subroutine my_func
|
||||
end
|
Loading…
Add table
Reference in a new issue