
Add checks for nowait/depend and for checks that the returned CUDA, CUDA_DRIVER and HIP interop objects actually work. While the CUDA/CUDA_DRIVER ones are only for Nvidia GPUs, HIP works on both AMD and Nvidia GPUs; on Nvidia GPUs, it is a very thin wrapper around CUDA. For Fortran, only a HIP test has been added - using hipfort. While libgomp.c-c++-common/interop-2.c always works - even without GPU - and checks for depend / nowait, all others require that runtime libraries are found at link (and execution) time: For Nvidia GPUs, libcuda + libcudart or libcublas, For AMD GPUs, libamdhip64 or libhipblas. The header files and hipfort modules do not need to be present as a fallback has been implemented, but if they are, they get used. Due to the combinations, the basic 1x C/C++, 4x C and 1x Fortran tests yield 1x C/C++, 14x C and 4 Fortran run-test files. libgomp/ChangeLog: * testsuite/lib/libgomp.exp (check_effective_target_openacc_cublas, check_effective_target_openacc_cudart): Update description as the check requires more. (check_effective_target_openacc_libcuda, check_effective_target_openacc_libcublas, check_effective_target_openacc_libcudart, check_effective_target_gomp_hip_header_amd, check_effective_target_gomp_hip_header_nvidia, check_effective_target_gomp_hipfort_module, check_effective_target_gomp_libamdhip64, check_effective_target_gomp_libhipblas): New. * testsuite/libgomp.c-c++-common/interop-2.c: New test. * testsuite/libgomp.c/interop-cublas-full.c: New test. * testsuite/libgomp.c/interop-cublas-libonly.c: New test. * testsuite/libgomp.c/interop-cuda-full.c: New test. * testsuite/libgomp.c/interop-cuda-libonly.c: New test. * testsuite/libgomp.c/interop-hip-amd-full.c: New test. * testsuite/libgomp.c/interop-hip-amd-no-hip-header.c: New test. * testsuite/libgomp.c/interop-hip-nvidia-full.c: New test. * testsuite/libgomp.c/interop-hip-nvidia-no-headers.c: New test. * testsuite/libgomp.c/interop-hip-nvidia-no-hip-header.c: New test. * testsuite/libgomp.c/interop-hip.h: New test. * testsuite/libgomp.c/interop-hipblas-amd-full.c: New test. * testsuite/libgomp.c/interop-hipblas-amd-no-hip-header.c: New test. * testsuite/libgomp.c/interop-hipblas-nvidia-full.c: New test. * testsuite/libgomp.c/interop-hipblas-nvidia-no-headers.c: New test. * testsuite/libgomp.c/interop-hipblas-nvidia-no-hip-header.c: New test. * testsuite/libgomp.c/interop-hipblas.h: New test. * testsuite/libgomp.fortran/interop-hip-amd-full.F90: New test. * testsuite/libgomp.fortran/interop-hip-amd-no-module.F90: New test. * testsuite/libgomp.fortran/interop-hip-nvidia-full.F90: New test. * testsuite/libgomp.fortran/interop-hip-nvidia-no-module.F90: New test. * testsuite/libgomp.fortran/interop-hip.h: New test.
214 lines
6.3 KiB
C
214 lines
6.3 KiB
C
! Minimal check whether HIP works - by checking whether the API routines
|
|
! seem to work. This includes a fallback if hipfort is not available
|
|
|
|
#ifndef HAVE_HIPFORT
|
|
#ifndef USE_HIP_FALLBACK_MODULE
|
|
#if USE_CUDA_NAMES
|
|
#warning "Using fallback implementation for module hipfort as HAVE_HIPFORT is undefined (for NVIDA/CUDA)"
|
|
#else
|
|
#warning "Using fallback implementation for module hipfort as HAVE_HIPFORT is undefined - assume AMD as USE_CUDA_NAMES is unset"
|
|
#endif
|
|
#endif
|
|
module hipfort ! Minimal implementation for the testsuite
|
|
implicit none
|
|
|
|
enum, bind(c)
|
|
enumerator :: hipSuccess = 0
|
|
enumerator :: hipErrorNotSupported = 801
|
|
end enum
|
|
|
|
enum, bind(c)
|
|
enumerator :: hipDeviceAttributeClockRate = 5
|
|
enumerator :: hipDeviceAttributeMaxGridDimX = 29
|
|
end enum
|
|
|
|
interface
|
|
integer(kind(hipSuccess)) function hipDeviceGetAttribute (ip, attr, dev) &
|
|
#if USE_CUDA_NAMES
|
|
bind(c, name="cudaDeviceGetAttribute")
|
|
#else
|
|
bind(c, name="hipDeviceGetAttribute")
|
|
#endif
|
|
use iso_c_binding, only: c_ptr, c_int
|
|
import
|
|
implicit none
|
|
type(c_ptr), value :: ip
|
|
integer(kind(hipDeviceAttributeClockRate)), value :: attr
|
|
integer(c_int), value :: dev
|
|
end
|
|
|
|
integer(kind(hipSuccess)) function hipCtxGetApiVersion (ctx, ip) &
|
|
#if USE_CUDA_NAMES
|
|
bind(c, name="cudaCtxGetApiVersion")
|
|
#else
|
|
bind(c, name="hipCtxGetApiVersion")
|
|
#endif
|
|
use iso_c_binding, only: c_ptr
|
|
import
|
|
implicit none
|
|
type(c_ptr), value :: ctx, ip
|
|
end
|
|
|
|
integer(kind(hipSuccess)) function hipStreamQuery (stream) &
|
|
#if USE_CUDA_NAMES
|
|
bind(c, name="cudaStreamQuery")
|
|
#else
|
|
bind(c, name="hipStreamQuery")
|
|
#endif
|
|
use iso_c_binding, only: c_ptr
|
|
import
|
|
implicit none
|
|
type(c_ptr), value :: stream
|
|
end
|
|
|
|
integer(kind(hipSuccess)) function hipStreamGetFlags (stream, flags) &
|
|
#if USE_CUDA_NAMES
|
|
bind(c, name="cudaStreamGetFlags")
|
|
#else
|
|
bind(c, name="hipStreamGetFlags")
|
|
#endif
|
|
use iso_c_binding, only: c_ptr
|
|
import
|
|
implicit none
|
|
type(c_ptr), value :: stream
|
|
type(c_ptr), value :: flags
|
|
end
|
|
end interface
|
|
end module
|
|
#endif
|
|
|
|
program main
|
|
use iso_c_binding, only: c_ptr, c_int, c_loc
|
|
use omp_lib
|
|
use hipfort
|
|
implicit none (type, external)
|
|
|
|
! Only supported since CUDA 12.8 - skip for better compatibility
|
|
! ! Manally implement hipStreamGetDevice as hipfort misses it
|
|
! ! -> https://github.com/ROCm/hipfort/issues/238
|
|
! interface
|
|
! integer(kind(hipSuccess)) function my_hipStreamGetDevice(stream, dev) &
|
|
!#if USE_CUDA_NAMES
|
|
! bind(c, name="cudaStreamGetDevice")
|
|
!#else
|
|
! bind(c, name="hipStreamGetDevice")
|
|
!#endif
|
|
! use iso_c_binding, only: c_ptr, c_int
|
|
! import
|
|
! implicit none
|
|
! type(c_ptr), value :: stream
|
|
! integer(c_int) :: dev
|
|
! end
|
|
! end interface
|
|
|
|
integer(c_int), target :: ivar
|
|
integer(omp_interop_rc_kind) :: res
|
|
integer(omp_interop_kind) :: obj
|
|
integer(omp_interop_fr_kind) :: fr
|
|
integer(kind(hipSuccess)) :: hip_err
|
|
integer(c_int) :: hip_dev, dev_stream
|
|
type(c_ptr) :: hip_ctx, hip_sm
|
|
|
|
logical :: vendor_is_amd
|
|
|
|
obj = omp_interop_none
|
|
|
|
!$omp interop init(target, targetsync, prefer_type("hip") : obj)
|
|
|
|
fr = omp_get_interop_int (obj, omp_ipr_fr_id, res)
|
|
if (res /= omp_irc_success) error stop 1
|
|
if (fr /= omp_ifr_hip) error stop 1
|
|
|
|
ivar = omp_get_interop_int (obj, omp_ipr_vendor, res)
|
|
if (ivar == 1) then ! AMD
|
|
vendor_is_amd = .true.
|
|
else if (ivar == 11) then ! Nvidia
|
|
vendor_is_amd = .false.
|
|
else
|
|
error stop 1 ! Unknown
|
|
endif
|
|
#if USE_CUDA_NAMES
|
|
if (vendor_is_amd) error stop 1
|
|
#else
|
|
if (.not. vendor_is_amd) error stop 1
|
|
#endif
|
|
|
|
! Check whether the omp_ipr_device -> hipDevice_t yields a valid device.
|
|
|
|
hip_dev = omp_get_interop_int (obj, omp_ipr_device, res)
|
|
if (res /= omp_irc_success) error stop 1
|
|
|
|
! AMD messed up in Fortran with the attribute handling, missing the
|
|
! translation table it has for C.
|
|
block
|
|
enum, bind(c)
|
|
enumerator :: cudaDevAttrClockRate = 13
|
|
enumerator :: cudaDevAttrMaxGridDimX = 5
|
|
end enum
|
|
|
|
! Assume a clock size is available and > 1 GHz; value is in kHz.
|
|
! c_loc is completely bogus, but as AMD messed up the interface ...
|
|
! Cf. https://github.com/ROCm/hipfort/issues/239
|
|
if (vendor_is_amd) then
|
|
hip_err = hipDeviceGetAttribute (c_loc(ivar), hipDeviceAttributeClockRate, hip_dev)
|
|
else
|
|
hip_err = hipDeviceGetAttribute (c_loc(ivar), cudaDevAttrClockRate, hip_dev)
|
|
endif
|
|
if (hip_err /= hipSuccess) error stop 1
|
|
if (ivar <= 1000000) error stop 1 ! in kHz
|
|
|
|
! Assume that the MaxGridDimX is available and > 1024
|
|
! c_loc is completely bogus, but as AMD messed up the interface ...
|
|
! Cf. https://github.com/ROCm/hipfort/issues/239
|
|
if (vendor_is_amd) then
|
|
hip_err = hipDeviceGetAttribute (c_loc(ivar), hipDeviceAttributeMaxGridDimX, hip_dev)
|
|
else
|
|
hip_err = hipDeviceGetAttribute (c_loc(ivar), cudaDevAttrMaxGridDimX, hip_dev)
|
|
endif
|
|
if (hip_err /= hipSuccess) error stop 1
|
|
if (ivar <= 1024) error stop 1
|
|
end block
|
|
|
|
|
|
! Check whether the omp_ipr_device_context -> hipCtx_t yields a context.
|
|
|
|
hip_ctx = omp_get_interop_ptr (obj, omp_ipr_device_context, res)
|
|
if (res /= omp_irc_success) error stop 1
|
|
|
|
! ! Assume API Version > 0 for Nvidia, hipErrorNotSupported for AMD. */
|
|
! ivar = -99
|
|
! ! AMD deprectated hipCtxGetApiVersion (in C/C++)
|
|
! hip_err = hipCtxGetApiVersion (hip_ctx, c_loc(ivar))
|
|
!
|
|
! if (vendor_is_amd) then
|
|
! if (hip_err /= hipErrorNotSupported .or. ivar /= -99) error stop 1
|
|
! else
|
|
! if (hip_err /= hipSuccess) error stop 1
|
|
! if (ivar <= 0) error stop 1
|
|
! end if
|
|
|
|
|
|
! Check whether the omp_ipr_targetsync -> hipStream_t yields a stream.
|
|
|
|
hip_sm = omp_get_interop_ptr (obj, omp_ipr_targetsync, res)
|
|
if (res /= omp_irc_success) error stop 1
|
|
|
|
! Skip as this is only in CUDA 12.8
|
|
! dev_stream = 99
|
|
! ! Not (yet) implemented: https://github.com/ROCm/hipfort/issues/238
|
|
! ! hip_err = hipStreamGetDevice (hip_sm, dev_stream)
|
|
! hip_err = my_hipStreamGetDevice (hip_sm, dev_stream)
|
|
! if (hip_err /= hipSuccess) error stop 1
|
|
! if (dev_stream /= hip_dev) error stop 1
|
|
|
|
! Get flags of the stream
|
|
hip_err = hipStreamGetFlags (hip_sm, c_loc (ivar))
|
|
if (hip_err /= hipSuccess) error stop 1
|
|
! Accept any value
|
|
|
|
! All jobs should have been completed (as there were none none)
|
|
hip_err = hipStreamQuery (hip_sm)
|
|
if (hip_err /= hipSuccess) error stop 1
|
|
|
|
!$omp interop destroy(obj)
|
|
end
|