gcc/libgomp/testsuite/libgomp.fortran/omp_target_memset.f90
Tobias Burnus 4e47e2f833 libgomp: Add OpenMP's omp_target_memset/omp_target_memset_async
PR libgomp/120444

include/ChangeLog:

	* cuda/cuda.h (cuMemsetD8, cuMemsetD8Async): Declare.

libgomp/ChangeLog:

	* libgomp-plugin.h (GOMP_OFFLOAD_memset): Declare.
	* libgomp.h (struct gomp_device_descr): Add memset_func.
	* libgomp.map (GOMP_6.0.1): Add omp_target_memset{,_async}.
	* libgomp.texi (Device Memory Routines): Document them.
	* omp.h.in (omp_target_memset, omp_target_memset_async): Declare.
	* omp_lib.f90.in (omp_target_memset, omp_target_memset_async):
	Add interfaces.
	* omp_lib.h.in (omp_target_memset, omp_target_memset_async): Likewise.
	* plugin/cuda-lib.def: Add cuMemsetD8.
	* plugin/plugin-gcn.c (struct hsa_runtime_fn_info): Add
	hsa_amd_memory_fill_fn.
	(init_hsa_runtime_functions): DLSYM_OPT_FN load it.
	(GOMP_OFFLOAD_memset): New.
	* plugin/plugin-nvptx.c (GOMP_OFFLOAD_memset): New.
	* target.c (omp_target_memset_int, omp_target_memset,
	omp_target_memset_async_helper, omp_target_memset_async): New.
	(gomp_load_plugin_for_device): Add DLSYM (memset).
	* testsuite/libgomp.c-c++-common/omp_target_memset.c: New test.
	* testsuite/libgomp.c-c++-common/omp_target_memset-2.c: New test.
	* testsuite/libgomp.c-c++-common/omp_target_memset-3.c: New test.
	* testsuite/libgomp.fortran/omp_target_memset.f90: New test.
	* testsuite/libgomp.fortran/omp_target_memset-2.f90: New test.
2025-06-02 17:43:57 +02:00

39 lines
1.1 KiB
Fortran

! PR libgomp/120444
use omp_lib
use iso_c_binding
implicit none (type, external)
integer(c_int) :: dev, i, val, start, tail
type(c_ptr) :: ptr, ptr2, tmpptr
integer(c_int8_t), pointer, contiguous :: fptr(:)
integer(c_intptr_t) :: intptr
integer(c_size_t), parameter :: count = 1024
do dev = omp_initial_device, omp_get_num_devices ()
ptr = omp_target_alloc (count, dev)
! Play also around with the alignment - as hsa_amd_memory_fill operates
! on multiples of 4 bytes (c_int32_t)
do start = 0, 31
do tail = 0, 31
val = iachar('0') + start + tail
tmpptr = transfer (transfer (ptr, intptr) + start, tmpptr)
ptr2 = omp_target_memset (tmpptr, val, count - start - tail, dev)
if (.not. c_associated (tmpptr, ptr2)) stop 1
!$omp target device(dev) is_device_ptr(ptr)
do i = 1 + start, int(count, c_int) - start - tail
call c_f_pointer (ptr, fptr, [count])
if (fptr(i) /= int (val, c_int8_t)) stop 2
end do
!$omp end target
end do
end do
call omp_target_free (ptr, dev);
end do
end