diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog index becfc701ad7..5d13262c398 100644 --- a/libgomp/ChangeLog +++ b/libgomp/ChangeLog @@ -1,3 +1,9 @@ +2019-01-23 Tom de Vries + + PR target/87835 + * plugin/plugin-nvptx.c (map_push): Fix adding of allocated element. + * testsuite/libgomp.oacc-c-c++-common/pr87835.c: New test. + 2019-01-15 Tom de Vries PR target/80547 diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c index dd2bcf3083f..a220560b189 100644 --- a/libgomp/plugin/plugin-nvptx.c +++ b/libgomp/plugin/plugin-nvptx.c @@ -296,35 +296,46 @@ map_pop (struct ptx_stream *s) static CUdeviceptr map_push (struct ptx_stream *s, size_t size) { - struct cuda_map *map = NULL, *t = NULL; + struct cuda_map *map = NULL; + struct cuda_map **t; assert (s); assert (s->map); - /* Each PTX stream requires a separate data region to store the - launch arguments for cuLaunchKernel. Allocate a new - cuda_map and push it to the end of the list. */ + /* Select an element to push. */ if (s->map->active) - { - map = cuda_map_create (size); - - for (t = s->map; t->next != NULL; t = t->next) - ; - - t->next = map; - } - else if (s->map->size < size) - { - cuda_map_destroy (s->map); - map = cuda_map_create (size); - } + map = cuda_map_create (size); else - map = s->map; + { + /* Pop the inactive front element. */ + struct cuda_map *pop = s->map; + s->map = pop->next; + pop->next = NULL; - s->map = map; - s->map->active = true; + if (pop->size < size) + { + cuda_map_destroy (pop); - return s->map->d; + map = cuda_map_create (size); + } + else + map = pop; + } + + /* Check that the element is as expected. */ + assert (map->next == NULL); + assert (!map->active); + + /* Mark the element active. */ + map->active = true; + + /* Push the element to the back of the list. */ + for (t = &s->map; (*t) != NULL; t = &(*t)->next) + ; + assert (t != NULL && *t == NULL); + *t = map; + + return map->d; } /* Target data function launch information. */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr87835.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr87835.c new file mode 100644 index 00000000000..310a485e74f --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr87835.c @@ -0,0 +1,62 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include +#include +#include "cuda.h" + +#include + +#define n 128 + +int +main (void) +{ + CUresult r; + CUstream stream1; + int N = n; + int a[n]; + int b[n]; + int c[n]; + + acc_init (acc_device_nvidia); + + r = cuStreamCreate (&stream1, CU_STREAM_NON_BLOCKING); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + acc_set_cuda_stream (1, stream1); + + for (int i = 0; i < n; i++) + { + a[i] = 3; + c[i] = 0; + } + +#pragma acc data copy (a, b, c) copyin (N) + { +#pragma acc parallel async (1) + ; + +#pragma acc parallel async (1) num_gangs (320) + #pragma loop gang + for (int ii = 0; ii < N; ii++) + c[ii] = (a[ii] + a[N - ii - 1]); + +#pragma acc parallel async (1) + #pragma acc loop seq + for (int ii = 0; ii < n; ii++) + a[ii] = 6; + +#pragma acc wait (1) + } + + for (int i = 0; i < n; i++) + if (c[i] != 6) + abort (); + + return 0; +}