OpenACC 'kernels' decomposition: wrong-code cases unless manually making certain variables addressable [PR104892]
Document a few examples of the status quo. PR middle-end/104892 libgomp/ * testsuite/libgomp.oacc-c-c++-common/kernels-decompose-1.c: Point to PR104892. * testsuite/libgomp.oacc-c-c++-common/default-1.c: Likewise, enable '--param=openacc-kernels=decompose' and adjust. * testsuite/libgomp.oacc-c-c++-common/kernels-reduction-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Likewise. * testsuite/libgomp.oacc-fortran/kernels-reduction-1.f90: Likewise.
This commit is contained in:
parent
2e53fa7bb2
commit
535afbd959
5 changed files with 59 additions and 16 deletions
|
@ -1,3 +1,5 @@
|
|||
/* { dg-additional-options "--param=openacc-kernels=decompose" } */
|
||||
|
||||
/* { dg-additional-options "-fopt-info-all-omp" }
|
||||
{ dg-additional-options "-foffload=-fopt-info-all-omp" } */
|
||||
|
||||
|
@ -63,6 +65,8 @@ int test_parallel ()
|
|||
int test_kernels ()
|
||||
{
|
||||
int val = 2;
|
||||
/*TODO <https://gcc.gnu.org/PR104892> */
|
||||
(volatile int *) &val;
|
||||
int ary[32];
|
||||
int ondev = 0;
|
||||
|
||||
|
@ -71,12 +75,18 @@ int test_kernels ()
|
|||
|
||||
/* val defaults to copy, ary defaults to copy. */
|
||||
#pragma acc kernels copy(ondev) /* { dg-line l_compute[incr c_compute] } */
|
||||
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
|
||||
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target *-*-* } l_compute$c_compute } */
|
||||
/* { dg-note {variable 'ondev\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
|
||||
/* { dg-note {variable 'val\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
|
||||
{
|
||||
/* { dg-note {beginning 'gang-single' part in OpenACC 'kernels' region} {} { target *-*-* } .+1 } */
|
||||
ondev = acc_on_device (acc_device_not_host);
|
||||
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
|
||||
..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper. */
|
||||
#pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */
|
||||
/* { dg-note {forwarded loop nest in OpenACC 'kernels' region to 'parloops' for analysis} {} { target *-*-* } l_loop_i$c_loop_i } */
|
||||
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
|
||||
/* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
|
||||
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
|
||||
for (unsigned i = 0; i < 32; i++)
|
||||
{
|
||||
ary[i] = val;
|
||||
|
|
|
@ -29,12 +29,12 @@ static int g2;
|
|||
static void f1 ()
|
||||
{
|
||||
int a = 0;
|
||||
/*TODO Without making 'a' addressable, for GCN offloading we will not see the expected value copied out. (But it does work for nvptx offloading, strange...) */
|
||||
/*TODO <https://gcc.gnu.org/PR104892> */
|
||||
(volatile int *) &a;
|
||||
#define N 123
|
||||
int b[N] = { 0 };
|
||||
unsigned long long f1;
|
||||
/*TODO See above. */
|
||||
/*TODO <https://gcc.gnu.org/PR104892> */
|
||||
(volatile void *) &f1;
|
||||
|
||||
#pragma acc kernels /* { dg-line l_compute[incr c_compute] } */
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
/* Verify that a simple, explicit acc loop reduction works inside
|
||||
a kernels region. */
|
||||
|
||||
/* { dg-additional-options "--param=openacc-kernels=decompose" } */
|
||||
|
||||
/* { dg-additional-options "-fopt-info-all-omp" }
|
||||
{ dg-additional-options "-foffload=-fopt-info-all-omp" } */
|
||||
|
||||
|
@ -17,12 +19,16 @@ int
|
|||
main ()
|
||||
{
|
||||
int i, red = 0;
|
||||
/*TODO <https://gcc.gnu.org/PR104892> */
|
||||
(volatile int *) &red;
|
||||
|
||||
#pragma acc kernels /* { dg-line l_compute1 } */
|
||||
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target *-*-* } l_compute1 } */
|
||||
/* { dg-note {variable 'red\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute1 } */
|
||||
{
|
||||
#pragma acc loop reduction (+:red) /* { dg-line l_loop_i1 } */
|
||||
/* { dg-note {forwarded loop nest in OpenACC 'kernels' region to 'parloops' for analysis} {} { target *-*-* } l_loop_i1 } */
|
||||
/* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i1 } */
|
||||
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target *-*-* } l_loop_i1 } */
|
||||
for (i = 0; i < N; i++)
|
||||
red++;
|
||||
}
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
/* OpenACC parallelism dimensions clauses: num_gangs, num_workers,
|
||||
vector_length. */
|
||||
|
||||
/* { dg-additional-options "--param=openacc-kernels=decompose" } */
|
||||
|
||||
/* { dg-additional-options "-fopt-info-all-omp" }
|
||||
{ dg-additional-options "-foffload=-fopt-info-all-omp" } */
|
||||
|
||||
|
@ -640,20 +642,26 @@ int main ()
|
|||
kernels. */
|
||||
{
|
||||
int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
|
||||
/*TODO <https://gcc.gnu.org/PR104892> */
|
||||
(volatile int *) &gangs_min, &gangs_max, &workers_min, &workers_max, &vectors_min, &vectors_max;
|
||||
gangs_min = workers_min = vectors_min = INT_MAX;
|
||||
gangs_max = workers_max = vectors_max = INT_MIN;
|
||||
#pragma acc kernels /* { dg-line l_compute[incr c_compute] } */
|
||||
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
|
||||
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target *-*-* } l_compute$c_compute } */
|
||||
/* { dg-note {variable 'gangs_max\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
|
||||
/* { dg-note {variable 'workers_max\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
|
||||
/* { dg-note {variable 'vectors_max\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
|
||||
{
|
||||
/* This is to make the OpenACC kernels construct unparallelizable. */
|
||||
asm volatile ("" : : : "memory");
|
||||
|
||||
#pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
|
||||
reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
|
||||
/* { dg-note {forwarded loop nest in OpenACC 'kernels' region to 'parloops' for analysis} {} { target *-*-* } l_loop_i$c_loop_i } */
|
||||
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
|
||||
/* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
|
||||
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
|
||||
for (int i = 100; i > -100; --i)
|
||||
{
|
||||
/* This is to make the loop unparallelizable. */
|
||||
asm volatile ("" : : : "memory");
|
||||
|
||||
gangs_min = gangs_max = acc_gang ();
|
||||
workers_min = workers_max = acc_worker ();
|
||||
vectors_min = vectors_max = acc_vector ();
|
||||
|
@ -674,23 +682,29 @@ int main ()
|
|||
#define WORKERS 5
|
||||
#define VECTORS 13
|
||||
int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
|
||||
/*TODO <https://gcc.gnu.org/PR104892> */
|
||||
(volatile int *) &gangs_min, &gangs_max, &workers_min, &workers_max, &vectors_min, &vectors_max;
|
||||
gangs_min = workers_min = vectors_min = INT_MAX;
|
||||
gangs_max = workers_max = vectors_max = INT_MIN;
|
||||
#pragma acc kernels /* { dg-line l_compute[incr c_compute] } */ \
|
||||
num_gangs (gangs) \
|
||||
num_workers (WORKERS) \
|
||||
vector_length (VECTORS)
|
||||
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
|
||||
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target *-*-* } l_compute$c_compute } */
|
||||
/* { dg-note {variable 'gangs_max\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
|
||||
/* { dg-note {variable 'workers_max\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
|
||||
/* { dg-note {variable 'vectors_max\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
|
||||
{
|
||||
/* This is to make the OpenACC kernels construct unparallelizable. */
|
||||
asm volatile ("" : : : "memory");
|
||||
|
||||
#pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
|
||||
reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
|
||||
/* { dg-note {forwarded loop nest in OpenACC 'kernels' region to 'parloops' for analysis} {} { target *-*-* } l_loop_i$c_loop_i } */
|
||||
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
|
||||
/* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
|
||||
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
|
||||
for (int i = 100; i > -100; --i)
|
||||
{
|
||||
/* This is to make the loop unparallelizable. */
|
||||
asm volatile ("" : : : "memory");
|
||||
|
||||
gangs_min = gangs_max = acc_gang ();
|
||||
workers_min = workers_max = acc_worker ();
|
||||
vectors_min = vectors_max = acc_vector ();
|
||||
|
|
|
@ -2,6 +2,8 @@
|
|||
|
||||
! { dg-do run }
|
||||
|
||||
! { dg-additional-options "--param=openacc-kernels=decompose" }
|
||||
|
||||
! { dg-additional-options "-fopt-info-all-omp" }
|
||||
! { dg-additional-options "-foffload=-fopt-info-all-omp" } */
|
||||
|
||||
|
@ -13,17 +15,28 @@
|
|||
program reduction
|
||||
integer, parameter :: n = 20
|
||||
integer :: i, red
|
||||
!TODO <https://gcc.gnu.org/PR104892>
|
||||
call make_addressable (red)
|
||||
|
||||
red = 0
|
||||
|
||||
!$acc kernels ! { dg-line l_compute1 } */
|
||||
! { dg-optimized {assigned OpenACC seq loop parallelism} {} { target *-*-* } l_compute1 }
|
||||
! { dg-note {variable 'red\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute1 }
|
||||
!$acc loop reduction (+:red) ! { dg-line l_loop_i1 }
|
||||
! { dg-note {forwarded loop nest in OpenACC 'kernels' region to 'parloops' for analysis} {} { target *-*-* } l_loop_i1 }
|
||||
! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i1 }
|
||||
! { dg-optimized {assigned OpenACC seq loop parallelism} {} { target *-*-* } l_loop_i1 }
|
||||
do i = 1, n
|
||||
red = red + 1
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
if (red .ne. n) stop 1
|
||||
|
||||
contains
|
||||
|
||||
subroutine make_addressable (v)
|
||||
integer :: v ! by reference
|
||||
end subroutine make_addressable
|
||||
|
||||
end program reduction
|
||||
|
|
Loading…
Add table
Reference in a new issue