gcc/libgomp/testsuite/libgomp.oacc-fortran/acc_memcpy_device-1.f90
Tobias Burnus f4aa6b5a8d libgomp: Add OpenACC's acc_memcpy_device{,_async} routines [PR93226]
libgomp/ChangeLog:

	PR libgomp/93226
	* libgomp-plugin.h (GOMP_OFFLOAD_openacc_async_dev2dev): New
	prototype.
	* libgomp.h (struct acc_dispatch_t): Add dev2dev_func.
	(gomp_copy_dev2dev): New prototype.
	* libgomp.map (OACC_2.6.1): New; add acc_memcpy_device{,_async}.
	* libgomp.texi (acc_memcpy_device): New.
	* oacc-mem.c (memcpy_tofrom_device): Change to take from/to
	device boolean; use memcpy not memmove; add early return if
	size == 0 or same device + same ptr.
	(acc_memcpy_to_device, acc_memcpy_to_device_async,
	acc_memcpy_from_device, acc_memcpy_from_device_async): Update.
	(acc_memcpy_device, acc_memcpy_device_async): New.
	* openacc.f90 (acc_memcpy_device, acc_memcpy_device_async):
	Add interface.
	* openacc_lib.h (acc_memcpy_device, acc_memcpy_device_async):
	Likewise.
	* openacc.h (acc_memcpy_device, acc_memcpy_device_async): Add
	prototype.
	* plugin/plugin-gcn.c (GOMP_OFFLOAD_openacc_async_host2dev):
	Update comment.
	(GOMP_OFFLOAD_openacc_async_dev2host): Update call.
	(GOMP_OFFLOAD_openacc_async_dev2dev): New.
	* plugin/plugin-nvptx.c (cuda_memcpy_dev_sanity_check): New.
	(GOMP_OFFLOAD_dev2dev): Call it.
	(GOMP_OFFLOAD_openacc_async_dev2dev): New.
	* target.c (gomp_copy_dev2dev): New.
	(gomp_load_plugin_for_device): Load dev2dev and async_dev2dev.
	* testsuite/libgomp.oacc-c-c++-common/acc_memcpy_device-1.c: New test.
	* testsuite/libgomp.oacc-fortran/acc_memcpy_device-1.f90: New test.
2025-05-29 22:47:06 +02:00

113 lines
2.5 KiB
Fortran

! { dg-prune-output "using .vector_length \\(32\\)" }
! PR libgomp/93226 */
module m
use iso_c_binding
use openacc
implicit none (external, type)
integer, parameter :: N = 1024
integer :: D(N)
!$acc declare device_resident(D)
contains
integer(c_intptr_t) function init_d()
!$acc routine
integer :: i
do i = 1, N
D(i) = 27*i
end do
init_d = loc(D)
end
end module
program main
use m
implicit none (external, type)
integer, allocatable, target :: a(:), b(:), e(:)
type(c_ptr) :: d_a, d_b, d_c, d_d, d_e, d_f
integer(c_intptr_t) intptr
integer :: i
logical fail
fail = .false.
allocate(a(N), b(N), e(N))
d_c = acc_malloc (N*c_sizeof (i))
d_f = acc_malloc (N*c_sizeof (i))
e = huge(e)
call acc_copyin (e, N*c_sizeof (i));
d_e = acc_deviceptr (e);
!$acc serial copyout(intptr)
intptr = init_d ()
!$acc end serial
d_d = transfer(intptr, d_d)
call acc_memcpy_device (d_c, d_d, N*c_sizeof (i))
!$acc serial copy(fail) copy(a) deviceptr(d_c, d_d) firstprivate(intptr)
block
integer, pointer :: cc(:), dd(:)
call c_f_pointer (d_c, cc, [N])
call c_f_pointer (d_d, dd, [N])
a = cc
do i = 1, N
if (dd(i) /= 27*i .or. cc(i) /= 27*i) then
fail = .true.
stop 1
end if
end do
end block
!$acc end serial
if (fail) error stop 1
do i = 1, N
a(i) = 11*i
b(i) = 31*i
end do
call acc_copyin (a, N*c_sizeof (i))
d_a = acc_deviceptr (a)
call acc_copyin_async (b, N*c_sizeof (i), acc_async_noval)
!$acc parallel deviceptr(d_c) private(i) async
block
integer, pointer :: cc(:)
call c_f_pointer (d_c, cc, [N])
!$acc loop
do i = 1, N
cc(i) = -17*i
end do
end block
!$acc end parallel
call acc_memcpy_device_async (d_d, d_a, N*c_sizeof (i), acc_async_noval)
call acc_memcpy_device_async (d_f, d_c, N*c_sizeof (i), acc_async_noval)
call acc_wait (acc_async_noval)
d_b = acc_deviceptr (b)
call acc_memcpy_device_async (d_e, d_b, N*c_sizeof (i), acc_async_noval)
call acc_wait (acc_async_noval)
!$acc serial deviceptr(d_d, d_e, d_f) private(i) copy(fail)
block
integer, pointer :: dd(:), ee(:), ff(:)
call c_f_pointer (d_d, dd, [N])
call c_f_pointer (d_e, ee, [N])
call c_f_pointer (d_f, ff, [N])
do i = 1, N
if (dd(i) /= 11*i &
.or. ee(i) /= 31*i &
.or. ff(i) /= -17*i) then
fail = .true.
stop 2
end if
end do
end block
!$acc end serial
if (fail) error stop 2
end