libgomp/plugin: Add initial interop support to nvptx + gcn

The interop directive operates on an opaque object that represents a
foreign runtime. This commit adds support for
this to the two offloading plugins.

For nvptx, it supports cuda, cuda_driver and hip; the latter is AMD's
version of CUDA which for Nvidia devices boils down to normal CUDA.
Thus, at the end for this limited use, cuda/cuda_driver/hip are all
the same - and for plugin-nvptx.c, the they differ only in terms of
what gets fr_id, fr_name and get_interop_type_desc return.

For gcn, it supports hip and hsa.

Regarding get-mapped-ptr-1.c: That's actually a fix for the
GOMP_interop commit r15-8654-g99e2906ae255fc that added
GOMP_DEVICE_DEFAULT_OMP_61 alias omp_default_device, which is
a conforming device number. But that test used -5 as check for a
non-conforming device number.

libgomp/ChangeLog:

	* plugin/plugin-gcn.c (_LIBGOMP_PLUGIN_INCLUDE): Define.
	(struct hsa_runtime_fn_info): Add two queue functions.
	(hipError_t, hipCtx_t, hipStream_s, hipStream_t): New types.
	(struct hip_runtime_fn_info): New.
	(hip_runtime_lib, hip_fns): New global vars.
	(init_environment_variables): Handle hip_runtime_lib.
	(init_hsa_runtime_functions): Load the two queue functions.
	(init_hip_runtime_functions, GOMP_OFFLOAD_interop,
	GOMP_OFFLOAD_get_interop_int, GOMP_OFFLOAD_get_interop_ptr,
	GOMP_OFFLOAD_get_interop_str,
	GOMP_OFFLOAD_get_interop_type_desc): New.
	* plugin/plugin-nvptx.c (_LIBGOMP_PLUGIN_INCLUDE): Define.
	(GOMP_OFFLOAD_interop, GOMP_OFFLOAD_get_interop_int,
	GOMP_OFFLOAD_get_interop_ptr, GOMP_OFFLOAD_get_interop_str,
	GOMP_OFFLOAD_get_interop_type_desc): New.
	* testsuite/libgomp.c/interop-fr-1.c: New test.
	* testsuite/libgomp.c-c++-common/get-mapped-ptr-1.c: Use -6
	not -5 as non-conforming device number.
This commit is contained in:
Tobias Burnus 2025-03-21 21:39:42 +01:00
parent 78592fdbdc
commit 41b9c3b848
4 changed files with 1342 additions and 3 deletions

View file

@ -41,7 +41,9 @@
#include <hsa_ext_amd.h>
#include <dlfcn.h>
#include <signal.h>
#define _LIBGOMP_PLUGIN_INCLUDE 1
#include "libgomp-plugin.h"
#undef _LIBGOMP_PLUGIN_INCLUDE
#include "config/gcn/libgomp-gcn.h" /* For struct output. */
#include "gomp-constants.h"
#include <elf.h>
@ -190,6 +192,8 @@ struct hsa_runtime_fn_info
uint64_t (*hsa_queue_add_write_index_release_fn) (const hsa_queue_t *queue,
uint64_t value);
uint64_t (*hsa_queue_load_read_index_acquire_fn) (const hsa_queue_t *queue);
uint64_t (*hsa_queue_load_read_index_relaxed_fn) (const hsa_queue_t *queue);
uint64_t (*hsa_queue_load_write_index_relaxed_fn) (const hsa_queue_t *queue);
void (*hsa_signal_store_relaxed_fn) (hsa_signal_t signal,
hsa_signal_value_t value);
void (*hsa_signal_store_release_fn) (hsa_signal_t signal,
@ -216,6 +220,25 @@ struct hsa_runtime_fn_info
const hsa_signal_t *dep_signals, hsa_signal_t completion_signal);
};
/* As an HIP runtime is dlopened, following structure defines function
pointers utilized by the interop feature of this plugin.
Add suffient type declarations to get this work. */
typedef int hipError_t; /* Actually an enum; 0 == success. */
typedef void* hipCtx_t;
struct hipStream_s;
typedef struct hipStream_s* hipStream_t;
struct hip_runtime_fn_info
{
hipError_t (*hipStreamCreate_fn) (hipStream_t *);
hipError_t (*hipStreamDestroy_fn) (hipStream_t);
hipError_t (*hipStreamSynchronize_fn) (hipStream_t);
hipError_t (*hipCtxGetCurrent_fn) (hipCtx_t *ctx);
hipError_t (*hipSetDevice_fn) (int deviceId);
hipError_t (*hipGetDevice_fn) (int *deviceId);
};
/* Structure describing the run-time and grid properties of an HSA kernel
lauch. This needs to match the format passed to GOMP_OFFLOAD_run. */
@ -553,9 +576,11 @@ struct hsa_context_info
static struct hsa_context_info hsa_context;
/* HSA runtime functions that are initialized in init_hsa_context. */
static struct hsa_runtime_fn_info hsa_fns;
/* HIP runtime functions that are initialized in init_hip_runtime_functions. */
static struct hip_runtime_fn_info hip_fns;
/* Heap space, allocated target-side, provided for use of newlib malloc.
Each module should have it's own heap allocated.
Beware that heap usage increases with OpenMP teams. See also arenas. */
@ -578,10 +603,11 @@ static bool debug;
static bool suppress_host_fallback;
/* Flag to locate HSA runtime shared library that is dlopened
/* Flag to locate HSA and HIP runtime shared libraries that are dlopened
by this plug-in. */
static const char *hsa_runtime_lib;
static const char *hip_runtime_lib;
/* Flag to decide if the runtime should support also CPU devices (can be
a simulator). */
@ -1068,6 +1094,10 @@ init_environment_variables (void)
if (hsa_runtime_lib == NULL)
hsa_runtime_lib = "libhsa-runtime64.so.1";
hip_runtime_lib = secure_getenv ("HIP_RUNTIME_LIB");
if (hip_runtime_lib == NULL)
hip_runtime_lib = "libamdhip64.so";
support_cpu_devices = secure_getenv ("GCN_SUPPORT_CPU_DEVICES");
const char *x = secure_getenv ("GCN_NUM_TEAMS");
@ -1418,6 +1448,8 @@ init_hsa_runtime_functions (void)
DLSYM_FN (hsa_executable_iterate_symbols)
DLSYM_FN (hsa_queue_add_write_index_release)
DLSYM_FN (hsa_queue_load_read_index_acquire)
DLSYM_FN (hsa_queue_load_read_index_relaxed)
DLSYM_FN (hsa_queue_load_write_index_relaxed)
DLSYM_FN (hsa_signal_wait_acquire)
DLSYM_FN (hsa_signal_store_relaxed)
DLSYM_FN (hsa_signal_store_release)
@ -4365,6 +4397,434 @@ unlock:
return retval;
}
static bool
init_hip_runtime_functions (void)
{
bool inited = false;
if (inited)
return hip_fns.hipStreamCreate_fn != NULL;
inited = true;
void *handle = dlopen (hip_runtime_lib, RTLD_LAZY);
if (handle == NULL)
return false;
#define DLSYM_OPT_FN(function) \
hip_fns.function##_fn = dlsym (handle, #function)
DLSYM_OPT_FN (hipStreamCreate);
DLSYM_OPT_FN (hipStreamDestroy);
DLSYM_OPT_FN (hipStreamSynchronize);
DLSYM_OPT_FN (hipCtxGetCurrent);
DLSYM_OPT_FN (hipGetDevice);
DLSYM_OPT_FN (hipSetDevice);
#undef DLSYM_OPT_FN
if (!hip_fns.hipStreamCreate_fn
|| !hip_fns.hipStreamDestroy_fn
|| !hip_fns.hipStreamSynchronize_fn
|| !hip_fns.hipCtxGetCurrent_fn
|| !hip_fns.hipGetDevice_fn
|| !hip_fns.hipSetDevice_fn)
{
hip_fns.hipStreamCreate_fn = NULL;
return false;
}
return true;
}
void
GOMP_OFFLOAD_interop (struct interop_obj_t *obj, int ord,
enum gomp_interop_flag action, bool targetsync,
const char *prefer_type)
{
if ((action == gomp_interop_flag_destroy || action == gomp_interop_flag_use)
&& !obj->stream)
return;
if ((action == gomp_interop_flag_destroy || action == gomp_interop_flag_use)
&& obj->fr == omp_ifr_hsa)
{
/* Wait until the queue is is empty. */
bool is_empty;
uint64_t read_index, write_index;
hsa_queue_t *queue = (hsa_queue_t *) obj->stream;
do
{
read_index = hsa_fns.hsa_queue_load_read_index_relaxed_fn (queue);
write_index = hsa_fns.hsa_queue_load_write_index_relaxed_fn (queue);
is_empty = (read_index == write_index);
}
while (!is_empty);
if (action == gomp_interop_flag_destroy)
{
hsa_status_t status = hsa_fns.hsa_queue_destroy_fn (queue);
if (status != HSA_STATUS_SUCCESS)
hsa_fatal ("Error destroying interop hsa_queue_t", status);
}
return;
}
if (action == gomp_interop_flag_destroy)
{
hipError_t err = hip_fns.hipStreamDestroy_fn ((hipStream_t) obj->stream);
if (err != 0)
GOMP_PLUGIN_fatal ("Error destroying interop hipStream_t: %d", err);
return;
}
if (action == gomp_interop_flag_use)
{
hipError_t err
= hip_fns.hipStreamSynchronize_fn ((hipStream_t) obj->stream);
if (err != 0)
GOMP_PLUGIN_fatal ("Error synchronizing interop hipStream_t: %d", err);
return;
}
bool fr_set = false;
/* Check for the preferred type; cf. parser in C/C++/Fortran or
dump_omp_init_prefer_type for the format.
Accept the first '{...}' block that specifies a 'fr' that we support.
Currently, no 'attr(...)' are supported. */
if (prefer_type)
while (prefer_type[0] == (char) GOMP_INTEROP_IFR_SEPARATOR)
{
/* '{' item block starts. */
prefer_type++;
/* 'fr(...)' block */
while (prefer_type[0] != (char) GOMP_INTEROP_IFR_SEPARATOR)
{
omp_interop_fr_t fr = (omp_interop_fr_t) prefer_type[0];
if (fr == omp_ifr_hip)
{
obj->fr = omp_ifr_hip;
fr_set = true;
}
if (fr == omp_ifr_hsa)
{
obj->fr = omp_ifr_hsa;
fr_set = true;
}
prefer_type++;
}
prefer_type++;
/* 'attr(...)' block */
while (prefer_type[0] != '\0')
{
/* const char *attr = prefer_type; */
prefer_type += strlen (prefer_type) + 1;
}
prefer_type++;
/* end of '}'. */
if (fr_set)
break;
}
/* Prefer HIP, use HSA as fallback. The warning is only printed if GCN_DEBUG
is set and does not distinguishes between on prefer_type or hip prefer_type
nor whether a later/lower preference also specifies 'hsa'.
The assumption is that the user code handles HSA gracefully, but likely
just by falling back to the host version. On the other hand, have_hip is
likely true if HSA is available. */
if (!fr_set || obj->fr == omp_ifr_hip)
{
bool have_hip = init_hip_runtime_functions ();
if (have_hip)
obj->fr = omp_ifr_hip;
else
{
GCN_WARNING ("interop object requested, using HSA instead of HIP "
"as %s could not be loaded", hip_runtime_lib);
obj->fr = omp_ifr_hsa;
}
}
_Static_assert (sizeof (uint64_t) == sizeof (hsa_agent_t),
"sizeof (uint64_t) == sizeof (hsa_agent_t)");
struct agent_info *agent = get_agent_info (ord);
obj->device_data = agent;
if (targetsync && obj->fr == omp_ifr_hsa)
{
hsa_status_t status;
/* Queue size must be (for GPUs) a power of 2 >= 40, i.e. at least 64 and
maximally HSA_AGENT_INFO_QUEUE_MAX_SIZE. Arbitrary choice: */
uint32_t queue_size = ASYNC_QUEUE_SIZE;
status = hsa_fns.hsa_queue_create_fn (agent->id, queue_size,
HSA_QUEUE_TYPE_MULTI,
NULL, NULL, UINT32_MAX, UINT32_MAX,
(hsa_queue_t **) &obj->stream);
if (status != HSA_STATUS_SUCCESS)
hsa_fatal ("Error creating interop hsa_queue_t", status);
}
else if (targetsync)
{
hipError_t err;
int dev_curr;
err = hip_fns.hipGetDevice_fn (&dev_curr);
if (!err && ord != dev_curr)
err = hip_fns.hipSetDevice_fn (ord);
if (!err)
err = hip_fns.hipStreamCreate_fn ((hipStream_t *) &obj->stream);
if (!err && ord != dev_curr)
err = hip_fns.hipSetDevice_fn (dev_curr);
if (err != 0)
GOMP_PLUGIN_fatal ("Error creating interop hipStream_t: %d", err);
}
}
intptr_t
GOMP_OFFLOAD_get_interop_int (struct interop_obj_t *obj,
omp_interop_property_t property_id,
omp_interop_rc_t *ret_code)
{
if (obj->fr != omp_ifr_hip && obj->fr != omp_ifr_hsa)
{
if (ret_code)
*ret_code = omp_irc_no_value; /* Hmm. */
return 0;
}
switch (property_id)
{
case omp_ipr_fr_id:
if (ret_code)
*ret_code = omp_irc_success;
return obj->fr;
case omp_ipr_fr_name:
if (ret_code)
*ret_code = omp_irc_type_str;
return 0;
case omp_ipr_vendor:
if (ret_code)
*ret_code = omp_irc_success;
return 1; /* amd */
case omp_ipr_vendor_name:
if (ret_code)
*ret_code = omp_irc_type_str;
return 0;
case omp_ipr_device_num:
if (ret_code)
*ret_code = omp_irc_success;
return obj->device_num;
case omp_ipr_platform:
if (ret_code)
*ret_code = omp_irc_no_value;
return 0;
case omp_ipr_device:
if (obj->fr == omp_ifr_hsa)
{
if (ret_code)
*ret_code = omp_irc_type_ptr;
return 0;
}
if (ret_code)
*ret_code = omp_irc_success;
return ((struct agent_info *) obj->device_data)->device_id;
case omp_ipr_device_context:
if (ret_code && obj->fr == omp_ifr_hsa)
*ret_code = omp_irc_no_value;
else if (ret_code)
*ret_code = omp_irc_type_ptr;
return 0;
case omp_ipr_targetsync:
if (ret_code && !obj->stream)
*ret_code = omp_irc_no_value;
else if (ret_code)
*ret_code = omp_irc_type_ptr;
return 0;
default:
break;
}
__builtin_unreachable ();
return 0;
}
void *
GOMP_OFFLOAD_get_interop_ptr (struct interop_obj_t *obj,
omp_interop_property_t property_id,
omp_interop_rc_t *ret_code)
{
if (obj->fr != omp_ifr_hip && obj->fr != omp_ifr_hsa)
{
if (ret_code)
*ret_code = omp_irc_no_value; /* Hmm. */
return 0;
}
switch (property_id)
{
case omp_ipr_fr_id:
if (ret_code)
*ret_code = omp_irc_type_int;
return NULL;
case omp_ipr_fr_name:
if (ret_code)
*ret_code = omp_irc_type_str;
return NULL;
case omp_ipr_vendor:
if (ret_code)
*ret_code = omp_irc_type_str;
return NULL;
case omp_ipr_vendor_name:
if (ret_code)
*ret_code = omp_irc_type_str;
return NULL;
case omp_ipr_device_num:
if (ret_code)
*ret_code = omp_irc_type_int;
return NULL;
case omp_ipr_platform:
if (ret_code)
*ret_code = omp_irc_no_value;
return NULL;
case omp_ipr_device:
if (obj->fr == omp_ifr_hsa)
{
if (ret_code)
*ret_code = omp_irc_success;
/* hsa_agent_t is an struct containing a single uint64_t. */
return &((struct agent_info *) obj->device_data)->id;
}
else
{
if (ret_code)
*ret_code = omp_irc_type_int;
return NULL;
}
case omp_ipr_device_context:
if (obj->fr == omp_ifr_hsa)
{
if (ret_code)
*ret_code = omp_irc_no_value;
return NULL;
}
else
{
hipCtx_t ctx;
int dev_curr;
int dev = ((struct agent_info *) obj->device_data)->device_id;
hipError_t err;
err = hip_fns.hipGetDevice_fn (&dev_curr);
if (!err && dev != dev_curr)
err = hip_fns.hipSetDevice_fn (dev);
if (!err)
err = hip_fns.hipCtxGetCurrent_fn (&ctx);
if (!err && dev != dev_curr)
err = hip_fns.hipSetDevice_fn (dev_curr);
if (err)
GOMP_PLUGIN_fatal ("Error obtaining hipCtx_t for device %d: %d",
obj->device_num, err);
if (ret_code)
*ret_code = omp_irc_success;
return ctx;
}
case omp_ipr_targetsync:
if (!obj->stream)
{
if (ret_code)
*ret_code = omp_irc_no_value;
return NULL;
}
if (ret_code)
*ret_code = omp_irc_success;
return obj->stream;
default:
break;
}
__builtin_unreachable ();
return NULL;
}
const char *
GOMP_OFFLOAD_get_interop_str (struct interop_obj_t *obj,
omp_interop_property_t property_id,
omp_interop_rc_t *ret_code)
{
if (obj->fr != omp_ifr_hip && obj->fr != omp_ifr_hsa)
{
if (ret_code)
*ret_code = omp_irc_no_value; /* Hmm. */
return 0;
}
switch (property_id)
{
case omp_ipr_fr_id:
if (ret_code)
*ret_code = omp_irc_type_int;
return NULL;
case omp_ipr_fr_name:
if (ret_code)
*ret_code = omp_irc_success;
if (obj->fr == omp_ifr_hip)
return "hip";
if (obj->fr == omp_ifr_hsa)
return "hsa";
case omp_ipr_vendor:
if (ret_code)
*ret_code = omp_irc_type_int;
return NULL;
case omp_ipr_vendor_name:
if (ret_code)
*ret_code = omp_irc_success;
return "amd";
case omp_ipr_device_num:
if (ret_code)
*ret_code = omp_irc_type_int;
return NULL;
case omp_ipr_platform:
if (ret_code)
*ret_code = omp_irc_no_value;
return NULL;
case omp_ipr_device:
if (ret_code && obj->fr == omp_ifr_hsa)
*ret_code = omp_irc_type_ptr;
else if (ret_code)
*ret_code = omp_irc_type_int;
return NULL;
case omp_ipr_device_context:
if (ret_code && obj->fr == omp_ifr_hsa)
*ret_code = omp_irc_no_value;
else if (ret_code)
*ret_code = omp_irc_type_ptr;
return NULL;
case omp_ipr_targetsync:
if (ret_code && !obj->stream)
*ret_code = omp_irc_no_value;
else if (ret_code)
*ret_code = omp_irc_type_ptr;
return NULL;
default:
break;
}
__builtin_unreachable ();
return 0;
}
const char *
GOMP_OFFLOAD_get_interop_type_desc (struct interop_obj_t *obj,
omp_interop_property_t property_id)
{
_Static_assert (omp_ipr_targetsync == omp_ipr_first,
"omp_ipr_targetsync == omp_ipr_first");
_Static_assert (omp_ipr_platform - omp_ipr_first + 1 == 4,
"omp_ipr_platform - omp_ipr_first + 1 == 4");
static const char *desc_hip[] = {"N/A", /* platform */
"hipDevice_t", /* device */
"hipCtx_t", /* device_context */
"hipStream_t"}; /* targetsync */
static const char *desc_hsa[] = {"N/A", /* platform */
"hsa_agent_t *", /* device */
"N/A", /* device_context */
"hsa_queue_t *"}; /* targetsync */
if (obj->fr == omp_ifr_hip)
return desc_hip[omp_ipr_platform - property_id];
else
return desc_hsa[omp_ipr_platform - property_id];
return NULL;
}
/* }}} */
/* {{{ OpenMP Plugin API */

View file

@ -35,7 +35,9 @@
#include "openacc.h"
#include "config.h"
#include "symcat.h"
#define _LIBGOMP_PLUGIN_INCLUDE 1
#include "libgomp-plugin.h"
#undef _LIBGOMP_PLUGIN_INCLUDE
#include "oacc-plugin.h"
#include "gomp-constants.h"
#include "oacc-int.h"
@ -2425,6 +2427,306 @@ nvptx_stacks_acquire (struct ptx_device *ptx_dev, size_t size, int num)
return (void *) ptx_dev->omp_stacks.ptr;
}
void
GOMP_OFFLOAD_interop (struct interop_obj_t *obj, int ord,
enum gomp_interop_flag action, bool targetsync,
const char *prefer_type)
{
obj->fr = omp_ifr_cuda;
if (action == gomp_interop_flag_destroy)
{
if (obj->stream)
CUDA_CALL_ASSERT (cuStreamDestroy, obj->stream);
return;
}
if (action == gomp_interop_flag_use)
{
if (obj->stream)
CUDA_CALL_ASSERT (cuStreamSynchronize, obj->stream);
return;
}
/* Check for the preferred type; cf. parser in C/C++/Fortran or
dump_omp_init_prefer_type for the format.
Accept the first '{...}' block that specifies a 'fr' that we support.
Currently, no 'attr(...)' are supported. */
if (prefer_type)
while (prefer_type[0] == (char) GOMP_INTEROP_IFR_SEPARATOR)
{
bool found = false;
/* '{' item block starts. */
prefer_type++;
/* 'fr(...)' block */
while (prefer_type[0] != (char) GOMP_INTEROP_IFR_SEPARATOR)
{
omp_interop_fr_t fr = (omp_interop_fr_t) prefer_type[0];
if (fr == omp_ifr_cuda
|| fr == omp_ifr_cuda_driver
|| fr == omp_ifr_hip)
{
obj->fr = fr;
found = true;
}
prefer_type++;
}
prefer_type++;
/* 'attr(...)' block */
while (prefer_type[0] != '\0')
{
/* const char *attr = prefer_type; */
prefer_type += strlen (prefer_type) + 1;
}
prefer_type++;
/* end of '}'. */
if (found)
break;
}
obj->device_data = ptx_devices[ord];
if (targetsync)
{
CUstream stream = NULL;
CUDA_CALL_ASSERT (cuStreamCreate, &stream, CU_STREAM_DEFAULT);
obj->stream = stream;
}
}
intptr_t
GOMP_OFFLOAD_get_interop_int (struct interop_obj_t *obj,
omp_interop_property_t property_id,
omp_interop_rc_t *ret_code)
{
if (obj->fr != omp_ifr_cuda
&& obj->fr != omp_ifr_cuda_driver
&& obj->fr != omp_ifr_hip)
{
if (ret_code)
*ret_code = omp_irc_no_value; /* Hmm. */
return 0;
}
switch (property_id)
{
case omp_ipr_fr_id:
if (ret_code)
*ret_code = omp_irc_success;
return obj->fr;
case omp_ipr_fr_name:
if (ret_code)
*ret_code = omp_irc_type_str;
return 0;
case omp_ipr_vendor:
if (ret_code)
*ret_code = omp_irc_success;
return 11; /* nvidia */
case omp_ipr_vendor_name:
if (ret_code)
*ret_code = omp_irc_type_str;
return 0;
case omp_ipr_device_num:
if (ret_code)
*ret_code = omp_irc_success;
return obj->device_num;
case omp_ipr_platform:
if (ret_code)
*ret_code = omp_irc_no_value;
return 0;
case omp_ipr_device:
if (ret_code)
*ret_code = omp_irc_success;
return ((struct ptx_device *) obj->device_data)->dev;
case omp_ipr_device_context:
if (ret_code && obj->fr == omp_ifr_cuda)
*ret_code = omp_irc_no_value;
else if (ret_code)
*ret_code = omp_irc_type_ptr;
return 0;
case omp_ipr_targetsync:
if (!obj->stream)
{
if (ret_code)
*ret_code = omp_irc_no_value;
return 0;
}
/* ptr fits into (u)intptr_t */
if (ret_code)
*ret_code = omp_irc_success;
return (uintptr_t) obj->stream;
default:
break;
}
__builtin_unreachable ();
return 0;
}
void *
GOMP_OFFLOAD_get_interop_ptr (struct interop_obj_t *obj,
omp_interop_property_t property_id,
omp_interop_rc_t *ret_code)
{
if (obj->fr != omp_ifr_cuda
&& obj->fr != omp_ifr_cuda_driver
&& obj->fr != omp_ifr_hip)
{
if (ret_code)
*ret_code = omp_irc_no_value; /* Hmm. */
return 0;
}
switch (property_id)
{
case omp_ipr_fr_id:
if (ret_code)
*ret_code = omp_irc_type_int;
return NULL;
case omp_ipr_fr_name:
if (ret_code)
*ret_code = omp_irc_type_str;
return NULL;
case omp_ipr_vendor:
if (ret_code)
*ret_code = omp_irc_type_int;
return NULL;
case omp_ipr_vendor_name:
if (ret_code)
*ret_code = omp_irc_type_str;
return NULL;
case omp_ipr_device_num:
if (ret_code)
*ret_code = omp_irc_type_int;
return NULL;
case omp_ipr_platform:
if (ret_code)
*ret_code = omp_irc_no_value;
return NULL;
case omp_ipr_device:
if (ret_code)
*ret_code = omp_irc_type_int;
return NULL;
case omp_ipr_device_context:
if (obj->fr == omp_ifr_cuda)
{
if (ret_code)
*ret_code = omp_irc_no_value;
return NULL;
}
if (ret_code)
*ret_code = omp_irc_success;
return ((struct ptx_device *) obj->device_data)->ctx;
case omp_ipr_targetsync:
if (!obj->stream)
{
if (ret_code)
*ret_code = omp_irc_no_value;
return NULL;
}
if (ret_code)
*ret_code = omp_irc_success;
return obj->stream;
default:
break;
}
__builtin_unreachable ();
return NULL;
}
const char *
GOMP_OFFLOAD_get_interop_str (struct interop_obj_t *obj,
omp_interop_property_t property_id,
omp_interop_rc_t *ret_code)
{
if (obj->fr != omp_ifr_cuda
&& obj->fr != omp_ifr_cuda_driver
&& obj->fr != omp_ifr_hip)
{
if (ret_code)
*ret_code = omp_irc_no_value; /* Hmm. */
return 0;
}
switch (property_id)
{
case omp_ipr_fr_id:
if (ret_code)
*ret_code = omp_irc_type_int;
return NULL;
case omp_ipr_fr_name:
if (ret_code)
*ret_code = omp_irc_success;
if (obj->fr == omp_ifr_cuda)
return "cuda";
if (obj->fr == omp_ifr_cuda_driver)
return "cuda_driver";
if (obj->fr == omp_ifr_hip)
return "hip";
break;
case omp_ipr_vendor:
if (ret_code)
*ret_code = omp_irc_type_int;
return NULL;
case omp_ipr_vendor_name:
if (ret_code)
*ret_code = omp_irc_success;
return "nvidia";
case omp_ipr_device_num:
if (ret_code)
*ret_code = omp_irc_type_int;
return NULL;
case omp_ipr_platform:
if (ret_code)
*ret_code = omp_irc_no_value;
return NULL;
case omp_ipr_device:
if (ret_code)
*ret_code = omp_irc_type_ptr;
return NULL;
case omp_ipr_device_context:
if (ret_code && obj->fr == omp_ifr_cuda)
*ret_code = omp_irc_no_value;
else if (ret_code)
*ret_code = omp_irc_type_ptr;
return NULL;
case omp_ipr_targetsync:
if (ret_code && !obj->stream)
*ret_code = omp_irc_no_value;
else if (ret_code)
*ret_code = omp_irc_type_ptr;
return NULL;
default:
break;
}
__builtin_unreachable ();
return NULL;
}
const char *
GOMP_OFFLOAD_get_interop_type_desc (struct interop_obj_t *obj,
omp_interop_property_t property_id)
{
_Static_assert (omp_ipr_targetsync == omp_ipr_first,
"omp_ipr_targetsync == omp_ipr_first");
_Static_assert (omp_ipr_platform - omp_ipr_first + 1 == 4,
"omp_ipr_platform - omp_ipr_first + 1 == 4");
static const char *desc_cuda[] = {"N/A", /* platform */
"int", /* device */
"N/A", /* device_context */
"cudaStream_t"}; /* targetsync */
static const char *desc_cuda_driver[] = {"N/A", /* platform */
"CUdevice", /* device */
"CUcontext", /* device_context */
"CUstream"}; /* targetsync */
static const char *desc_hip[] = {"N/A", /* platform */
"hipDevice_t", /* device */
"hipCtx_t", /* device_context */
"hipStream_t"}; /* targetsync */
if (obj->fr == omp_ifr_cuda)
return desc_cuda[omp_ipr_platform - property_id];
if (obj->fr == omp_ifr_cuda_driver)
return desc_cuda_driver[omp_ipr_platform - property_id];
else
return desc_hip[omp_ipr_platform - property_id];
return NULL;
}
void
GOMP_OFFLOAD_run (int ord, void *tgt_fn, void *tgt_vars, void **args)

View file

@ -21,7 +21,7 @@ main ()
if (omp_target_associate_ptr (q, p, sizeof (int), 0, d) != 0)
return 0;
if (omp_get_mapped_ptr (q, -5) != NULL)
if (omp_get_mapped_ptr (q, -6) != NULL)
abort ();
if (omp_get_mapped_ptr (q, omp_get_num_devices () + 1) != NULL)

View file

@ -0,0 +1,577 @@
/* { dg-do run } */
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <omp.h>
#include "../libgomp.c-c++-common/on_device_arch.h"
#define DEFAULT_DEVICE -99
/* The following assumes that when a nvptx device is available,
cuda/cuda_driver/hip are supported.
And that likewise when a gcn device is available that the
plugin also can not only the HSA but also the HIP library
such that hsa/hip are supported.
For the host, omp_interop_none is expected.
Otherwise, it only does some basic tests without checking
that the returned result really makes sense. */
void check_host (int);
void check_nvptx (int);
void check_gcn (int);
void check_type (omp_interop_t obj)
{
const char *type;
type = omp_get_interop_type_desc (obj, omp_ipr_fr_id);
if (obj != omp_interop_none)
assert (strcmp (type, "omp_interop_t") == 0);
else
assert (type == NULL);
type = omp_get_interop_type_desc (obj, omp_ipr_fr_name);
if (obj != omp_interop_none)
assert (strcmp (type, "const char *") == 0);
else
assert (type == NULL);
type = omp_get_interop_type_desc (obj, omp_ipr_vendor);
if (obj != omp_interop_none)
assert (strcmp (type, "int") == 0);
else
assert (type == NULL);
type = omp_get_interop_type_desc (obj, omp_ipr_vendor_name);
if (obj != omp_interop_none)
assert (strcmp (type, "const char *") == 0);
else
assert (type == NULL);
type = omp_get_interop_type_desc (obj, omp_ipr_device_num);
if (obj != omp_interop_none)
assert (strcmp (type, "int") == 0);
else
assert (type == NULL);
if (obj != omp_interop_none)
return;
assert (omp_get_interop_type_desc (obj, omp_ipr_platform) == NULL);
assert (omp_get_interop_type_desc (obj, omp_ipr_device) == NULL);
assert (omp_get_interop_type_desc (obj, omp_ipr_device_context) == NULL);
assert (omp_get_interop_type_desc (obj, omp_ipr_targetsync) == NULL);
}
void
do_check (int dev)
{
int num_dev = omp_get_num_devices ();
const char *dev_type;
if (dev != DEFAULT_DEVICE)
omp_set_default_device (dev);
int is_nvptx = on_device_arch_nvptx ();
int is_gcn = on_device_arch_gcn ();
int is_host;
if (dev != DEFAULT_DEVICE)
is_host = dev == -1 || dev == num_dev;
else
{
int def_dev = omp_get_default_device ();
is_host = def_dev == -1 || def_dev == num_dev;
}
assert (is_nvptx + is_gcn + is_host == 1);
if (num_dev > 0 && dev != DEFAULT_DEVICE)
{
if (is_host)
omp_set_default_device (0);
else
omp_set_default_device (-1);
}
if (is_host)
dev_type = "host";
else if (is_nvptx)
dev_type = "nvptx";
else if (is_gcn)
dev_type = "gcn";
printf ("Running on the %s device (%d)\n", dev_type, dev);
if (is_host)
check_host (dev);
else if (is_nvptx)
check_nvptx (dev);
else if (is_gcn)
check_gcn (dev);
}
void
check_host (int dev)
{
omp_interop_t obj = (omp_interop_t) -1L;
if (dev == DEFAULT_DEVICE) {
#pragma omp interop init(target : obj)
} else {
#pragma omp interop init(target : obj) device(dev)
}
assert (obj == omp_interop_none);
check_type (obj);
obj = (omp_interop_t) -1L;
if (dev == DEFAULT_DEVICE) {
#pragma omp interop init(target, prefer_type({attr("ompx_foo")}, {attr("ompx_bar"), fr("cuda"), attr("ompx_foobar")},{fr("cuda_driver")}, {fr("hip")}, {fr("hsa")}) : obj)
} else {
#pragma omp interop init(target, prefer_type({attr("ompx_foo")}, {attr("ompx_bar"), fr("cuda"), attr("ompx_foobar")},{fr("cuda_driver")}, {fr("hip")}, {fr("hsa")}) : obj) device(dev)
}
assert (obj == omp_interop_none);
check_type (obj);
obj = (omp_interop_t) -1L;
if (dev == DEFAULT_DEVICE) {
#pragma omp interop init(targetsync : obj)
} else {
#pragma omp interop init(targetsync : obj) device(dev)
}
assert (obj == omp_interop_none);
check_type (obj);
obj = (omp_interop_t) -1L;
if (dev == DEFAULT_DEVICE) {
#pragma omp interop init(targetsync, prefer_type("cuda","cuda_driver", "hip", "hsa") : obj)
} else {
#pragma omp interop init(targetsync, prefer_type("cuda","cuda_driver", "hip", "hsa") : obj) device(dev)
}
assert (obj == omp_interop_none);
check_type (obj);
}
void
check_nvptx (int dev)
{
for (int variant = 0; variant <= 7; variant++)
{
omp_interop_t obj = (omp_interop_t) -1L;
switch (variant)
{
/* Expect 'cuda'. */
case 0:
{
if (dev == DEFAULT_DEVICE) {
#pragma omp interop init(target : obj)
} else {
#pragma omp interop init(target : obj) device(dev)
}
break;
}
case 1:
{
if (dev == DEFAULT_DEVICE) {
#pragma omp interop init(targetsync : obj)
} else {
#pragma omp interop init(targetsync : obj) device(dev)
}
break;
}
case 2:
{
if (dev == DEFAULT_DEVICE) {
#pragma omp interop init(target, prefer_type({attr("ompx_foo")}, {fr("hsa")}, {attr("ompx_bar"), fr("cuda"), attr("ompx_foobar")},{fr("cuda_driver")}, {fr("hip")}) : obj)
} else {
#pragma omp interop init(target, prefer_type({attr("ompx_foo")}, {fr("hsa")}, {attr("ompx_bar"), fr("cuda"), attr("ompx_foobar")},{fr("cuda_driver")}, {fr("hip")}) : obj) device(dev)
}
break;
}
case 3:
{
if (dev == DEFAULT_DEVICE) {
#pragma omp interop init(targetsync, prefer_type("hsa", "cuda", "cuda_driver", "hip") : obj)
} else {
#pragma omp interop init(targetsync, prefer_type("hsa", "cuda", "cuda_driver", "hip") : obj) device(dev)
}
break;
}
/* Expect 'cuda_driver'. */
case 4:
{
if (dev == DEFAULT_DEVICE) {
#pragma omp interop init(target, prefer_type("hsa", "cuda_driver", "hip", "cuda") : obj)
} else {
#pragma omp interop init(target, prefer_type("hsa", "cuda_driver", "hip", "cuda") : obj) device(dev)
}
break;
}
case 5:
{
if (dev == DEFAULT_DEVICE) {
#pragma omp interop init(targetsync, prefer_type("hsa", "cuda_driver", "hip", "cuda") : obj)
} else {
#pragma omp interop init(targetsync, prefer_type("hsa", "cuda_driver", "hip", "cuda") : obj) device(dev)
}
break;
}
/* Expect 'hip'. */
case 6:
{
if (dev == DEFAULT_DEVICE) {
#pragma omp interop init(target, prefer_type("hsa", "hip", "cuda", "cuda_driver") : obj)
} else {
#pragma omp interop init(target, prefer_type("hsa", "hip", "cuda", "cuda_driver") : obj) device(dev)
}
break;
}
case 7:
{
if (dev == DEFAULT_DEVICE) {
#pragma omp interop init(targetsync, prefer_type("hsa", "hip", "cuda", "cuda_driver") : obj)
} else {
#pragma omp interop init(targetsync, prefer_type("hsa", "hip", "cuda", "cuda_driver") : obj) device(dev)
}
break;
}
default:
abort ();
}
assert (obj != omp_interop_none && obj != (omp_interop_t) -1L);
omp_interop_rc_t ret_code = omp_irc_no_value;
omp_interop_fr_t fr = (omp_interop_fr_t) omp_get_interop_int (obj, omp_ipr_fr_id, &ret_code);
assert (ret_code == omp_irc_success);
if (variant >= 0 && variant <= 3)
assert (fr == omp_ifr_cuda);
else if (variant <= 5)
assert (fr == omp_ifr_cuda_driver);
else if (variant <= 7)
assert (fr == omp_ifr_hip);
else
assert (0);
ret_code = omp_irc_no_value;
const char *fr_name = omp_get_interop_str (obj, omp_ipr_fr_name, &ret_code);
assert (ret_code == omp_irc_success);
if (fr == omp_ifr_cuda)
assert (strcmp (fr_name, "cuda") == 0);
else if (fr == omp_ifr_cuda_driver)
assert (strcmp (fr_name, "cuda_driver") == 0);
else if (fr == omp_ifr_hip)
assert (strcmp (fr_name, "hip") == 0);
else
assert (0);
ret_code = omp_irc_no_value;
int vendor = (int) omp_get_interop_int (obj, omp_ipr_vendor, &ret_code);
assert (ret_code == omp_irc_success);
assert (vendor == 11); /* Nvidia */
ret_code = omp_irc_no_value;
const char *vendor_name = omp_get_interop_str (obj, omp_ipr_vendor_name, &ret_code);
assert (ret_code == omp_irc_success);
assert (strcmp (vendor_name, "nvidia") == 0);
ret_code = omp_irc_no_value;
int dev_num = (int) omp_get_interop_int (obj, omp_ipr_device_num, &ret_code);
assert (ret_code == omp_irc_success);
if (dev == DEFAULT_DEVICE)
assert (dev_num == omp_get_default_device ());
else
assert (dev_num == dev);
/* Platform: N/A. */
ret_code = omp_irc_success;
(void) omp_get_interop_int (obj, omp_ipr_platform, &ret_code);
assert (ret_code == omp_irc_no_value);
ret_code = omp_irc_success;
(void) omp_get_interop_ptr (obj, omp_ipr_platform, &ret_code);
assert (ret_code == omp_irc_no_value);
ret_code = omp_irc_success;
(void) omp_get_interop_str (obj, omp_ipr_platform, &ret_code);
assert (ret_code == omp_irc_no_value);
/* Device: int / CUdevice / hipDevice_t -- all internally an 'int'. */
ret_code = omp_irc_no_value;
int fr_device = (int) omp_get_interop_int (obj, omp_ipr_device, &ret_code);
/* CUDA also starts from 0 and goes to < n with cudaGetDeviceCount(&cn). */
assert (ret_code == omp_irc_success);
assert (fr_device >= 0 && fr_device < omp_get_num_devices ());
/* Device context: N/A / CUcontext / hipCtx_t -- a pointer. */
ret_code = omp_irc_out_of_range;
void *ctx = omp_get_interop_ptr (obj, omp_ipr_device_context, &ret_code);
if (fr == omp_ifr_cuda)
{
assert (ret_code == omp_irc_no_value);
assert (ctx == NULL);
}
else
{
assert (ret_code == omp_irc_success);
assert (ctx != NULL);
}
/* Stream/targetsync: cudaStream_t / CUstream / hipStream_t -- a pointer. */
ret_code = omp_irc_out_of_range;
void *stream = omp_get_interop_ptr (obj, omp_ipr_targetsync, &ret_code);
if (variant % 2 == 0) /* no targetsync */
{
assert (ret_code == omp_irc_no_value);
assert (stream == NULL);
}
else
{
assert (ret_code == omp_irc_success);
assert (stream != NULL);
}
check_type (obj);
if (fr == omp_ifr_cuda)
{
assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_platform), "N/A") == 0);
assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device), "int") == 0);
assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device_context), "N/A") == 0);
assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_targetsync), "cudaStream_t") == 0);
}
else if (fr == omp_ifr_cuda_driver)
{
assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_platform), "N/A") == 0);
assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device), "CUdevice") == 0);
assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device_context), "CUcontext") == 0);
assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_targetsync), "CUstream") == 0);
}
else
{
assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_platform), "N/A") == 0);
assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device), "hipDevice_t") == 0);
assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device_context), "hipCtx_t") == 0);
assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_targetsync), "hipStream_t") == 0);
}
if (dev == DEFAULT_DEVICE) {
#pragma omp interop use(obj)
#pragma omp interop destroy(obj)
} else {
#pragma omp interop use(obj) device(dev)
#pragma omp interop destroy(obj) device(dev)
}
}
}
void
check_gcn (int dev)
{
for (int variant = 0; variant <= 5; variant++)
{
omp_interop_t obj = (omp_interop_t) -1L;
switch (variant)
{
/* Expect 'hip'. */
case 0:
{
if (dev == DEFAULT_DEVICE) {
#pragma omp interop init(target : obj)
} else {
#pragma omp interop init(target : obj) device(dev)
}
break;
}
case 1:
{
if (dev == DEFAULT_DEVICE) {
#pragma omp interop init(targetsync : obj)
} else {
#pragma omp interop init(targetsync : obj) device(dev)
}
break;
}
case 2:
{
if (dev == DEFAULT_DEVICE) {
#pragma omp interop init(target, prefer_type({attr("ompx_foo")}, {fr("cuda")}, {fr("cuda_driver")}, {attr("ompx_bar"), fr("hip"), attr("ompx_foobar")},{fr("hsa")}) : obj)
} else {
#pragma omp interop init(target, prefer_type({attr("ompx_foo")}, {fr("cuda")}, {fr("cuda_driver")}, {attr("ompx_bar"), fr("hip"), attr("ompx_foobar")},{fr("hsa")}) : obj) device(dev)
}
break;
}
case 3:
{
if (dev == DEFAULT_DEVICE) {
#pragma omp interop init(targetsync, prefer_type("cuda", "cuda_driver", "hip", "hsa") : obj)
} else {
#pragma omp interop init(targetsync, prefer_type("cuda", "cuda_driver", "hip", "hsa") : obj) device(dev)
}
break;
}
/* Expect 'hsa'. */
case 4:
{
if (dev == DEFAULT_DEVICE) {
#pragma omp interop init(target, prefer_type("cuda", "cuda_driver", "hsa", "hip") : obj)
} else {
#pragma omp interop init(target, prefer_type("cuda", "cuda_driver", "hsa", "hip") : obj) device(dev)
}
break;
}
case 5:
{
if (dev == DEFAULT_DEVICE) {
#pragma omp interop init(targetsync, prefer_type("cuda", "cuda_driver", "hsa", "hip") : obj)
} else {
#pragma omp interop init(targetsync, prefer_type("cuda", "cuda_driver", "hsa", "hip") : obj) device(dev)
}
break;
}
default:
abort ();
}
assert (obj != omp_interop_none && obj != (omp_interop_t) -1L);
omp_interop_rc_t ret_code = omp_irc_no_value;
omp_interop_fr_t fr = (omp_interop_fr_t) omp_get_interop_int (obj, omp_ipr_fr_id, &ret_code);
assert (ret_code == omp_irc_success);
if (variant >= 0 && variant <= 3)
assert (fr == omp_ifr_hip);
else if (variant <= 5)
assert (fr == omp_ifr_hsa);
else
assert (0);
ret_code = omp_irc_no_value;
const char *fr_name = omp_get_interop_str (obj, omp_ipr_fr_name, &ret_code);
assert (ret_code == omp_irc_success);
if (fr == omp_ifr_hip)
assert (strcmp (fr_name, "hip") == 0);
else if (fr == omp_ifr_hsa)
assert (strcmp (fr_name, "hsa") == 0);
else
assert (0);
ret_code = omp_irc_no_value;
int vendor = (int) omp_get_interop_int (obj, omp_ipr_vendor, &ret_code);
assert (ret_code == omp_irc_success);
assert (vendor == 1); /* Amd */
ret_code = omp_irc_no_value;
const char *vendor_name = omp_get_interop_str (obj, omp_ipr_vendor_name, &ret_code);
assert (ret_code == omp_irc_success);
assert (strcmp (vendor_name, "amd") == 0);
ret_code = omp_irc_no_value;
int dev_num = (int) omp_get_interop_int (obj, omp_ipr_device_num, &ret_code);
assert (ret_code == omp_irc_success);
if (dev == DEFAULT_DEVICE)
assert (dev_num == omp_get_default_device ());
else
assert (dev_num == dev);
/* Platform: N/A. */
ret_code = omp_irc_success;
(void) omp_get_interop_int (obj, omp_ipr_platform, &ret_code);
assert (ret_code == omp_irc_no_value);
ret_code = omp_irc_success;
(void) omp_get_interop_ptr (obj, omp_ipr_platform, &ret_code);
assert (ret_code == omp_irc_no_value);
ret_code = omp_irc_success;
(void) omp_get_interop_str (obj, omp_ipr_platform, &ret_code);
assert (ret_code == omp_irc_no_value);
/* Device: hipDevice_t / hsa_agent_t* -- hip is internally an 'int'. */
ret_code = omp_irc_no_value;
if (fr == omp_ifr_hip)
{
/* HIP also starts from 0 and goes to < n as with cudaGetDeviceCount(&cn). */
int fr_device = (int) omp_get_interop_int (obj, omp_ipr_device, &ret_code);
assert (ret_code == omp_irc_success);
assert (fr_device >= 0 && fr_device < omp_get_num_devices ());
}
else
{
void *agent = omp_get_interop_ptr (obj, omp_ipr_device, &ret_code);
assert (ret_code == omp_irc_success);
assert (agent != NULL);
}
/* Device context: hipCtx_t / N/A -- a pointer. */
ret_code = omp_irc_out_of_range;
void *ctx = omp_get_interop_ptr (obj, omp_ipr_device_context, &ret_code);
if (fr == omp_ifr_hip)
{
assert (ret_code == omp_irc_success);
assert (ctx != NULL);
}
else
{
assert (ret_code == omp_irc_no_value);
assert (ctx == NULL);
}
/* Stream/targetsync: cudaStream_t / CUstream / hipStream_t -- a pointer. */
ret_code = omp_irc_out_of_range;
void *stream = omp_get_interop_ptr (obj, omp_ipr_targetsync, &ret_code);
if (variant % 2 == 0) /* no targetsync */
{
assert (ret_code == omp_irc_no_value);
assert (stream == NULL);
}
else
{
assert (ret_code == omp_irc_success);
assert (stream != NULL);
}
check_type (obj);
if (fr == omp_ifr_hip)
{
assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_platform), "N/A") == 0);
assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device), "hipDevice_t") == 0);
assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device_context), "hipCtx_t") == 0);
assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_targetsync), "hipStream_t") == 0);
}
else
{
assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_platform), "N/A") == 0);
assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device), "hsa_agent_t *") == 0);
assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device_context), "N/A") == 0);
assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_targetsync), "hsa_queue_t *") == 0);
}
if (dev == DEFAULT_DEVICE) {
#pragma omp interop use(obj)
#pragma omp interop destroy(obj)
} else {
#pragma omp interop use(obj) device(dev)
#pragma omp interop destroy(obj) device(dev)
}
}
}
int
main ()
{
do_check (DEFAULT_DEVICE);
int ndev = omp_get_num_devices ();
for (int dev = -1; dev < ndev; dev++)
do_check (dev);
for (int dev = -1; dev < ndev; dev++)
{
omp_set_default_device (dev);
do_check (DEFAULT_DEVICE);
}
}