Attach an attribute to all outlined OpenACC compute regions
This allows for making some things more explicit, later on. gcc/ * omp-expand.c (expand_omp_target): Attach an attribute to all outlined OpenACC compute regions. * omp-offload.c (execute_oacc_device_lower): Adjust. gcc/testsuite/ * c-c++-common/goacc/classify-parallel.c: Adjust. * gfortran.dg/goacc/classify-parallel.f95: Likewise. * c-c++-common/goacc/classify-serial.c: New. * gfortran.dg/goacc/classify-serial.f95: Likewise.
This commit is contained in:
parent
d1ba078d9b
commit
703e4f8649
6 changed files with 114 additions and 27 deletions
|
@ -9284,27 +9284,33 @@ expand_omp_target (struct omp_region *region)
|
|||
entry_bb = region->entry;
|
||||
exit_bb = region->exit;
|
||||
|
||||
if (target_kind == GF_OMP_TARGET_KIND_OACC_KERNELS)
|
||||
mark_loops_in_oacc_kernels_region (region->entry, region->exit);
|
||||
|
||||
/* Going on, all OpenACC compute constructs are mapped to
|
||||
'BUILT_IN_GOACC_PARALLEL', and get their compute regions outlined.
|
||||
To distinguish between them, we attach attributes. */
|
||||
switch (target_kind)
|
||||
{
|
||||
case GF_OMP_TARGET_KIND_OACC_PARALLEL:
|
||||
DECL_ATTRIBUTES (child_fn)
|
||||
= tree_cons (get_identifier ("oacc parallel"),
|
||||
NULL_TREE, DECL_ATTRIBUTES (child_fn));
|
||||
break;
|
||||
case GF_OMP_TARGET_KIND_OACC_KERNELS:
|
||||
mark_loops_in_oacc_kernels_region (region->entry, region->exit);
|
||||
|
||||
/* Further down, all OpenACC compute constructs will be mapped to
|
||||
BUILT_IN_GOACC_PARALLEL, and to distinguish between them, there
|
||||
is an "oacc kernels" attribute set for OpenACC kernels. */
|
||||
DECL_ATTRIBUTES (child_fn)
|
||||
= tree_cons (get_identifier ("oacc kernels"),
|
||||
NULL_TREE, DECL_ATTRIBUTES (child_fn));
|
||||
break;
|
||||
case GF_OMP_TARGET_KIND_OACC_SERIAL:
|
||||
/* Further down, all OpenACC compute constructs will be mapped to
|
||||
BUILT_IN_GOACC_PARALLEL, and to distinguish between them, there
|
||||
is an "oacc serial" attribute set for OpenACC serial. */
|
||||
DECL_ATTRIBUTES (child_fn)
|
||||
= tree_cons (get_identifier ("oacc serial"),
|
||||
NULL_TREE, DECL_ATTRIBUTES (child_fn));
|
||||
break;
|
||||
default:
|
||||
/* Make sure we don't miss any. */
|
||||
gcc_checking_assert (!(is_gimple_omp_oacc (entry_stmt)
|
||||
&& is_gimple_omp_offloaded (entry_stmt)));
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
@ -1762,12 +1762,45 @@ execute_oacc_device_lower ()
|
|||
flag_openacc_dims = (char *)&flag_openacc_dims;
|
||||
}
|
||||
|
||||
bool is_oacc_parallel
|
||||
= (lookup_attribute ("oacc parallel",
|
||||
DECL_ATTRIBUTES (current_function_decl)) != NULL);
|
||||
bool is_oacc_kernels
|
||||
= (lookup_attribute ("oacc kernels",
|
||||
DECL_ATTRIBUTES (current_function_decl)) != NULL);
|
||||
bool is_oacc_serial
|
||||
= (lookup_attribute ("oacc serial",
|
||||
DECL_ATTRIBUTES (current_function_decl)) != NULL);
|
||||
int fn_level = oacc_fn_attrib_level (attrs);
|
||||
bool is_oacc_routine = (fn_level >= 0);
|
||||
gcc_checking_assert (is_oacc_parallel
|
||||
+ is_oacc_kernels
|
||||
+ is_oacc_serial
|
||||
+ is_oacc_routine
|
||||
== 1);
|
||||
|
||||
bool is_oacc_kernels_parallelized
|
||||
= (lookup_attribute ("oacc kernels parallelized",
|
||||
DECL_ATTRIBUTES (current_function_decl)) != NULL);
|
||||
if (is_oacc_kernels_parallelized)
|
||||
gcc_checking_assert (is_oacc_kernels);
|
||||
|
||||
if (dump_file)
|
||||
{
|
||||
if (is_oacc_parallel)
|
||||
fprintf (dump_file, "Function is OpenACC parallel offload\n");
|
||||
else if (is_oacc_kernels)
|
||||
fprintf (dump_file, "Function is %s OpenACC kernels offload\n",
|
||||
(is_oacc_kernels_parallelized
|
||||
? "parallelized" : "unparallelized"));
|
||||
else if (is_oacc_serial)
|
||||
fprintf (dump_file, "Function is OpenACC serial offload\n");
|
||||
else if (is_oacc_routine)
|
||||
fprintf (dump_file, "Function is OpenACC routine level %d\n",
|
||||
fn_level);
|
||||
else
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
/* Unparallelized OpenACC kernels constructs must get launched as 1 x 1 x 1
|
||||
kernels, so remove the parallelism dimensions function attributes
|
||||
|
@ -1780,22 +1813,10 @@ execute_oacc_device_lower ()
|
|||
|
||||
/* Discover, partition and process the loops. */
|
||||
oacc_loop *loops = oacc_loop_discovery ();
|
||||
int fn_level = oacc_fn_attrib_level (attrs);
|
||||
|
||||
if (dump_file)
|
||||
{
|
||||
if (fn_level >= 0)
|
||||
fprintf (dump_file, "Function is OpenACC routine level %d\n",
|
||||
fn_level);
|
||||
else if (is_oacc_kernels)
|
||||
fprintf (dump_file, "Function is %s OpenACC kernels offload\n",
|
||||
(is_oacc_kernels_parallelized
|
||||
? "parallelized" : "unparallelized"));
|
||||
else
|
||||
fprintf (dump_file, "Function is OpenACC parallel offload\n");
|
||||
}
|
||||
|
||||
unsigned outer_mask = fn_level >= 0 ? GOMP_DIM_MASK (fn_level) - 1 : 0;
|
||||
unsigned outer_mask = 0;
|
||||
if (is_oacc_routine)
|
||||
outer_mask = GOMP_DIM_MASK (fn_level) - 1;
|
||||
unsigned used_mask = oacc_loop_partition (loops, outer_mask);
|
||||
/* OpenACC kernels constructs are special: they currently don't use the
|
||||
generic oacc_loop infrastructure and attribute/dimension processing. */
|
||||
|
|
|
@ -20,10 +20,10 @@ void PARALLEL ()
|
|||
}
|
||||
|
||||
/* Check the offloaded function's attributes.
|
||||
{ dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp target entrypoint\\)\\)" 1 "ompexp" } } */
|
||||
{ dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc parallel, omp target entrypoint\\)\\)" 1 "ompexp" } } */
|
||||
|
||||
/* Check the offloaded function's classification and compute dimensions (will
|
||||
always be 1 x 1 x 1 for non-offloading compilation).
|
||||
{ dg-final { scan-tree-dump-times "(?n)Function is OpenACC parallel offload" 1 "oaccdevlow" } }
|
||||
{ dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
|
||||
{ dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), omp target entrypoint\\)\\)" 1 "oaccdevlow" } } */
|
||||
{ dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc parallel, omp target entrypoint\\)\\)" 1 "oaccdevlow" } } */
|
||||
|
|
29
gcc/testsuite/c-c++-common/goacc/classify-serial.c
Normal file
29
gcc/testsuite/c-c++-common/goacc/classify-serial.c
Normal file
|
@ -0,0 +1,29 @@
|
|||
/* Check offloaded function's attributes and classification for OpenACC
|
||||
serial. */
|
||||
|
||||
/* { dg-additional-options "-O2" }
|
||||
{ dg-additional-options "-fopt-info-optimized-omp" }
|
||||
{ dg-additional-options "-fdump-tree-ompexp" }
|
||||
{ dg-additional-options "-fdump-tree-oaccdevlow" } */
|
||||
|
||||
#define N 1024
|
||||
|
||||
extern unsigned int *__restrict a;
|
||||
extern unsigned int *__restrict b;
|
||||
extern unsigned int *__restrict c;
|
||||
|
||||
void SERIAL ()
|
||||
{
|
||||
#pragma acc serial loop copyin (a[0:N], b[0:N]) copyout (c[0:N]) /* { dg-message "optimized: assigned OpenACC gang vector loop parallelism" } */
|
||||
for (unsigned int i = 0; i < N; i++)
|
||||
c[i] = a[i] + b[i];
|
||||
}
|
||||
|
||||
/* Check the offloaded function's attributes.
|
||||
{ dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc serial, omp target entrypoint\\)\\)" 1 "ompexp" } } */
|
||||
|
||||
/* Check the offloaded function's classification and compute dimensions (will
|
||||
always be 1 x 1 x 1 for non-offloading compilation).
|
||||
{ dg-final { scan-tree-dump-times "(?n)Function is OpenACC serial offload" 1 "oaccdevlow" } }
|
||||
{ dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
|
||||
{ dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc serial, omp target entrypoint\\)\\)" 1 "oaccdevlow" } } */
|
|
@ -22,10 +22,10 @@ program main
|
|||
end program main
|
||||
|
||||
! Check the offloaded function's attributes.
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp target entrypoint\\)\\)" 1 "ompexp" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc parallel, omp target entrypoint\\)\\)" 1 "ompexp" } }
|
||||
|
||||
! Check the offloaded function's classification and compute dimensions (will
|
||||
! always be 1 x 1 x 1 for non-offloading compilation).
|
||||
! { dg-final { scan-tree-dump-times "(?n)Function is OpenACC parallel offload" 1 "oaccdevlow" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), omp target entrypoint\\)\\)" 1 "oaccdevlow" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc parallel, omp target entrypoint\\)\\)" 1 "oaccdevlow" } }
|
||||
|
|
31
gcc/testsuite/gfortran.dg/goacc/classify-serial.f95
Normal file
31
gcc/testsuite/gfortran.dg/goacc/classify-serial.f95
Normal file
|
@ -0,0 +1,31 @@
|
|||
! Check offloaded function's attributes and classification for OpenACC
|
||||
! serial.
|
||||
|
||||
! { dg-additional-options "-O2" }
|
||||
! { dg-additional-options "-fopt-info-optimized-omp" }
|
||||
! { dg-additional-options "-fdump-tree-ompexp" }
|
||||
! { dg-additional-options "-fdump-tree-oaccdevlow" }
|
||||
|
||||
program main
|
||||
implicit none
|
||||
integer, parameter :: n = 1024
|
||||
integer, dimension (0:n-1) :: a, b, c
|
||||
integer :: i
|
||||
|
||||
call setup(a, b)
|
||||
|
||||
!$acc serial loop copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1)) ! { dg-message "optimized: assigned OpenACC gang vector loop parallelism" }
|
||||
do i = 0, n - 1
|
||||
c(i) = a(i) + b(i)
|
||||
end do
|
||||
!$acc end serial loop
|
||||
end program main
|
||||
|
||||
! Check the offloaded function's attributes.
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc serial, omp target entrypoint\\)\\)" 1 "ompexp" } }
|
||||
|
||||
! Check the offloaded function's classification and compute dimensions (will
|
||||
! always be 1 x 1 x 1 for non-offloading compilation).
|
||||
! { dg-final { scan-tree-dump-times "(?n)Function is OpenACC serial offload" 1 "oaccdevlow" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc serial, omp target entrypoint\\)\\)" 1 "oaccdevlow" } }
|
Loading…
Add table
Reference in a new issue