openmp: Implement omp_get_device_num routine
This patch implements the omp_get_device_num library routine, specified in OpenMP 5.0. GOMP_DEVICE_NUM_VAR is a macro symbol which defines name of a "device number" variable, is defined on the device-side libgomp, has it's address returned to host-side libgomp during device initialization, and the host libgomp then sets its value to the designated device number. libgomp/ChangeLog: * icv-device.c (omp_get_device_num): New API function, host side. * fortran.c (omp_get_device_num_): New interface function. * libgomp-plugin.h (GOMP_DEVICE_NUM_VAR): Define macro symbol. * libgomp.map (OMP_5.0.2): New version space with omp_get_device_num, omp_get_device_num_. * libgomp.texi (omp_get_device_num): Add documentation for new API function. * omp.h.in (omp_get_device_num): Add declaration. * omp_lib.f90.in (omp_get_device_num): Likewise. * omp_lib.h.in (omp_get_device_num): Likewise. * target.c (gomp_load_image_to_device): If additional entry for device number exists at end of returned entries from 'load_image_func' hook, copy the assigned device number over to the device variable. * config/gcn/icv-device.c (GOMP_DEVICE_NUM_VAR): Define static global. (omp_get_device_num): New API function, device side. * plugin/plugin-gcn.c ("symcat.h"): Add include. (GOMP_OFFLOAD_load_image): Add addresses of device GOMP_DEVICE_NUM_VAR at end of returned 'target_table' entries. * config/nvptx/icv-device.c (GOMP_DEVICE_NUM_VAR): Define static global. (omp_get_device_num): New API function, device side. * plugin/plugin-nvptx.c ("symcat.h"): Add include. (GOMP_OFFLOAD_load_image): Add addresses of device GOMP_DEVICE_NUM_VAR at end of returned 'target_table' entries. * testsuite/lib/libgomp.exp (check_effective_target_offload_target_intelmic): New function for testing for intelmic offloading. * testsuite/libgomp.c-c++-common/target-45.c: New test. * testsuite/libgomp.fortran/target10.f90: New test.
This commit is contained in:
parent
8dec72aeb5
commit
0bac793ed6
16 changed files with 238 additions and 7 deletions
|
@ -70,6 +70,16 @@ omp_is_initial_device (void)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* This is set to the device number of current GPU during device initialization,
|
||||
when the offload image containing this libgomp portion is loaded. */
|
||||
static volatile int GOMP_DEVICE_NUM_VAR;
|
||||
|
||||
int
|
||||
omp_get_device_num (void)
|
||||
{
|
||||
return GOMP_DEVICE_NUM_VAR;
|
||||
}
|
||||
|
||||
ialias (omp_set_default_device)
|
||||
ialias (omp_get_default_device)
|
||||
ialias (omp_get_initial_device)
|
||||
|
@ -77,3 +87,4 @@ ialias (omp_get_num_devices)
|
|||
ialias (omp_get_num_teams)
|
||||
ialias (omp_get_team_num)
|
||||
ialias (omp_is_initial_device)
|
||||
ialias (omp_get_device_num)
|
||||
|
|
|
@ -58,8 +58,19 @@ omp_is_initial_device (void)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* This is set to the device number of current GPU during device initialization,
|
||||
when the offload image containing this libgomp portion is loaded. */
|
||||
static volatile int GOMP_DEVICE_NUM_VAR;
|
||||
|
||||
int
|
||||
omp_get_device_num (void)
|
||||
{
|
||||
return GOMP_DEVICE_NUM_VAR;
|
||||
}
|
||||
|
||||
ialias (omp_set_default_device)
|
||||
ialias (omp_get_default_device)
|
||||
ialias (omp_get_initial_device)
|
||||
ialias (omp_get_num_devices)
|
||||
ialias (omp_is_initial_device)
|
||||
ialias (omp_get_device_num)
|
||||
|
|
|
@ -83,6 +83,7 @@ ialias_redirect (omp_get_partition_place_nums)
|
|||
ialias_redirect (omp_set_default_device)
|
||||
ialias_redirect (omp_get_default_device)
|
||||
ialias_redirect (omp_get_num_devices)
|
||||
ialias_redirect (omp_get_device_num)
|
||||
ialias_redirect (omp_get_num_teams)
|
||||
ialias_redirect (omp_get_team_num)
|
||||
ialias_redirect (omp_is_initial_device)
|
||||
|
@ -599,6 +600,12 @@ omp_get_initial_device_ (void)
|
|||
return omp_get_initial_device ();
|
||||
}
|
||||
|
||||
int32_t
|
||||
omp_get_device_num_ (void)
|
||||
{
|
||||
return omp_get_device_num ();
|
||||
}
|
||||
|
||||
int32_t
|
||||
omp_get_max_task_priority_ (void)
|
||||
{
|
||||
|
|
|
@ -61,8 +61,17 @@ omp_is_initial_device (void)
|
|||
return 1;
|
||||
}
|
||||
|
||||
int
|
||||
omp_get_device_num (void)
|
||||
{
|
||||
/* By specification, this is equivalent to omp_get_initial_device
|
||||
on the host. */
|
||||
return omp_get_initial_device ();
|
||||
}
|
||||
|
||||
ialias (omp_set_default_device)
|
||||
ialias (omp_get_default_device)
|
||||
ialias (omp_get_initial_device)
|
||||
ialias (omp_get_num_devices)
|
||||
ialias (omp_is_initial_device)
|
||||
ialias (omp_get_device_num)
|
||||
|
|
|
@ -102,6 +102,12 @@ struct addr_pair
|
|||
uintptr_t end;
|
||||
};
|
||||
|
||||
/* This symbol is to name a target side variable that holds the designated
|
||||
'device number' of the target device. The symbol needs to be available to
|
||||
libgomp code and the offload plugin (which in the latter case must be
|
||||
stringified). */
|
||||
#define GOMP_DEVICE_NUM_VAR __gomp_device_num
|
||||
|
||||
/* Miscellaneous functions. */
|
||||
extern void *GOMP_PLUGIN_malloc (size_t) __attribute__ ((malloc));
|
||||
extern void *GOMP_PLUGIN_malloc_cleared (size_t) __attribute__ ((malloc));
|
||||
|
|
|
@ -199,12 +199,18 @@ OMP_5.0.1 {
|
|||
omp_fulfill_event_;
|
||||
} OMP_5.0;
|
||||
|
||||
OMP_5.0.2 {
|
||||
global:
|
||||
omp_get_device_num;
|
||||
omp_get_device_num_;
|
||||
} OMP_5.0.1;
|
||||
|
||||
OMP_5.1 {
|
||||
global:
|
||||
omp_display_env;
|
||||
omp_display_env_;
|
||||
omp_display_env_8_;
|
||||
} OMP_5.0.1;
|
||||
} OMP_5.0.2;
|
||||
|
||||
GOMP_1.0 {
|
||||
global:
|
||||
|
|
|
@ -165,6 +165,7 @@ linkage, and do not throw exceptions.
|
|||
* omp_get_ancestor_thread_num:: Ancestor thread ID
|
||||
* omp_get_cancellation:: Whether cancellation support is enabled
|
||||
* omp_get_default_device:: Get the default device for target regions
|
||||
* omp_get_device_num:: Get device that current thread is running on
|
||||
* omp_get_dynamic:: Dynamic teams setting
|
||||
* omp_get_initial_device:: Device number of host device
|
||||
* omp_get_level:: Number of parallel regions
|
||||
|
@ -385,6 +386,34 @@ For OpenMP 5.1, this must be equal to the value returned by the
|
|||
|
||||
|
||||
|
||||
@node omp_get_device_num
|
||||
@section @code{omp_get_device_num} -- Return device number of current device
|
||||
@table @asis
|
||||
@item @emph{Description}:
|
||||
This function returns a device number that represents the device that the
|
||||
current thread is executing on. For OpenMP 5.0, this must be equal to the
|
||||
value returned by the @code{omp_get_initial_device} function when called
|
||||
from the host.
|
||||
|
||||
@item @emph{C/C++}
|
||||
@multitable @columnfractions .20 .80
|
||||
@item @emph{Prototype}: @tab @code{int omp_get_device_num(void);}
|
||||
@end multitable
|
||||
|
||||
@item @emph{Fortran}:
|
||||
@multitable @columnfractions .20 .80
|
||||
@item @emph{Interface}: @tab @code{integer function omp_get_device_num()}
|
||||
@end multitable
|
||||
|
||||
@item @emph{See also}:
|
||||
@ref{omp_get_initial_device}
|
||||
|
||||
@item @emph{Reference}:
|
||||
@uref{https://www.openmp.org, OpenMP specification v5.0}, Section 3.2.37.
|
||||
@end table
|
||||
|
||||
|
||||
|
||||
@node omp_get_level
|
||||
@section @code{omp_get_level} -- Obtain the current nesting level
|
||||
@table @asis
|
||||
|
|
|
@ -243,6 +243,7 @@ extern void omp_get_partition_place_nums (int *) __GOMP_NOTHROW;
|
|||
extern void omp_set_default_device (int) __GOMP_NOTHROW;
|
||||
extern int omp_get_default_device (void) __GOMP_NOTHROW;
|
||||
extern int omp_get_num_devices (void) __GOMP_NOTHROW;
|
||||
extern int omp_get_device_num (void) __GOMP_NOTHROW;
|
||||
extern int omp_get_num_teams (void) __GOMP_NOTHROW;
|
||||
extern int omp_get_team_num (void) __GOMP_NOTHROW;
|
||||
|
||||
|
|
|
@ -550,6 +550,12 @@
|
|||
end function omp_get_initial_device
|
||||
end interface
|
||||
|
||||
interface
|
||||
function omp_get_device_num ()
|
||||
integer (4) :: omp_get_device_num
|
||||
end function omp_get_device_num
|
||||
end interface
|
||||
|
||||
interface
|
||||
function omp_get_max_task_priority ()
|
||||
integer (4) :: omp_get_max_task_priority
|
||||
|
|
|
@ -244,6 +244,9 @@
|
|||
external omp_get_initial_device
|
||||
integer(4) omp_get_initial_device
|
||||
|
||||
external omp_get_device_num
|
||||
integer(4) omp_get_device_num
|
||||
|
||||
external omp_get_max_task_priority
|
||||
integer(4) omp_get_max_task_priority
|
||||
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
/* {{{ Includes and defines */
|
||||
|
||||
#include "config.h"
|
||||
#include "symcat.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
@ -3305,6 +3306,7 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
|
|||
struct kernel_info *kernel;
|
||||
int kernel_count = image_desc->kernel_count;
|
||||
unsigned var_count = image_desc->global_variable_count;
|
||||
int other_count = 1;
|
||||
|
||||
agent = get_agent_info (ord);
|
||||
if (!agent)
|
||||
|
@ -3321,7 +3323,8 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
|
|||
|
||||
GCN_DEBUG ("Encountered %d kernels in an image\n", kernel_count);
|
||||
GCN_DEBUG ("Encountered %u global variables in an image\n", var_count);
|
||||
pair = GOMP_PLUGIN_malloc ((kernel_count + var_count - 2)
|
||||
GCN_DEBUG ("Expect %d other variables in an image\n", other_count);
|
||||
pair = GOMP_PLUGIN_malloc ((kernel_count + var_count + other_count - 2)
|
||||
* sizeof (struct addr_pair));
|
||||
*target_table = pair;
|
||||
module = (struct module_info *)
|
||||
|
@ -3396,6 +3399,37 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
|
|||
pair++;
|
||||
}
|
||||
|
||||
GCN_DEBUG ("Looking for variable %s\n", STRINGX (GOMP_DEVICE_NUM_VAR));
|
||||
|
||||
hsa_status_t status;
|
||||
hsa_executable_symbol_t var_symbol;
|
||||
status = hsa_fns.hsa_executable_get_symbol_fn (agent->executable, NULL,
|
||||
STRINGX (GOMP_DEVICE_NUM_VAR),
|
||||
agent->id, 0, &var_symbol);
|
||||
if (status == HSA_STATUS_SUCCESS)
|
||||
{
|
||||
uint64_t device_num_varptr;
|
||||
uint32_t device_num_varsize;
|
||||
|
||||
status = hsa_fns.hsa_executable_symbol_get_info_fn
|
||||
(var_symbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS,
|
||||
&device_num_varptr);
|
||||
if (status != HSA_STATUS_SUCCESS)
|
||||
hsa_fatal ("Could not extract a variable from its symbol", status);
|
||||
status = hsa_fns.hsa_executable_symbol_get_info_fn
|
||||
(var_symbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE,
|
||||
&device_num_varsize);
|
||||
if (status != HSA_STATUS_SUCCESS)
|
||||
hsa_fatal ("Could not extract a variable size from its symbol", status);
|
||||
|
||||
pair->start = device_num_varptr;
|
||||
pair->end = device_num_varptr + device_num_varsize;
|
||||
}
|
||||
else
|
||||
/* The 'GOMP_DEVICE_NUM_VAR' variable was not in this image. */
|
||||
pair->start = pair->end = 0;
|
||||
pair++;
|
||||
|
||||
/* Ensure that constructors are run first. */
|
||||
struct GOMP_kernel_launch_attributes kla =
|
||||
{ 3,
|
||||
|
@ -3418,7 +3452,7 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
|
|||
if (module->fini_array_func)
|
||||
kernel_count--;
|
||||
|
||||
return kernel_count + var_count;
|
||||
return kernel_count + var_count + other_count;
|
||||
}
|
||||
|
||||
/* Unload GCN object-code module described by struct gcn_image_desc in
|
||||
|
|
|
@ -34,6 +34,7 @@
|
|||
#define _GNU_SOURCE
|
||||
#include "openacc.h"
|
||||
#include "config.h"
|
||||
#include "symcat.h"
|
||||
#include "libgomp-plugin.h"
|
||||
#include "oacc-plugin.h"
|
||||
#include "gomp-constants.h"
|
||||
|
@ -1265,7 +1266,7 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
|
|||
CUmodule module;
|
||||
const char *const *var_names;
|
||||
const struct targ_fn_launch *fn_descs;
|
||||
unsigned int fn_entries, var_entries, i, j;
|
||||
unsigned int fn_entries, var_entries, other_entries, i, j;
|
||||
struct targ_fn_descriptor *targ_fns;
|
||||
struct addr_pair *targ_tbl;
|
||||
const nvptx_tdata_t *img_header = (const nvptx_tdata_t *) target_data;
|
||||
|
@ -1295,8 +1296,11 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
|
|||
fn_entries = img_header->fn_num;
|
||||
fn_descs = img_header->fn_descs;
|
||||
|
||||
/* Currently, the only other entry kind is 'device number'. */
|
||||
other_entries = 1;
|
||||
|
||||
targ_tbl = GOMP_PLUGIN_malloc (sizeof (struct addr_pair)
|
||||
* (fn_entries + var_entries));
|
||||
* (fn_entries + var_entries + other_entries));
|
||||
targ_fns = GOMP_PLUGIN_malloc (sizeof (struct targ_fn_descriptor)
|
||||
* fn_entries);
|
||||
|
||||
|
@ -1345,9 +1349,24 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
|
|||
targ_tbl->end = targ_tbl->start + bytes;
|
||||
}
|
||||
|
||||
CUdeviceptr device_num_varptr;
|
||||
size_t device_num_varsize;
|
||||
CUresult r = CUDA_CALL_NOCHECK (cuModuleGetGlobal, &device_num_varptr,
|
||||
&device_num_varsize, module,
|
||||
STRINGX (GOMP_DEVICE_NUM_VAR));
|
||||
if (r == CUDA_SUCCESS)
|
||||
{
|
||||
targ_tbl->start = (uintptr_t) device_num_varptr;
|
||||
targ_tbl->end = (uintptr_t) (device_num_varptr + device_num_varsize);
|
||||
}
|
||||
else
|
||||
/* The 'GOMP_DEVICE_NUM_VAR' variable was not in this image. */
|
||||
targ_tbl->start = targ_tbl->end = 0;
|
||||
targ_tbl++;
|
||||
|
||||
nvptx_set_clocktick (module, dev);
|
||||
|
||||
return fn_entries + var_entries;
|
||||
return fn_entries + var_entries + other_entries;
|
||||
}
|
||||
|
||||
/* Unload the program described by TARGET_DATA. DEV_DATA is the
|
||||
|
|
|
@ -1974,6 +1974,9 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
|
|||
int num_funcs = host_funcs_end - host_func_table;
|
||||
int num_vars = (host_vars_end - host_var_table) / 2;
|
||||
|
||||
/* Others currently is only 'device_num' */
|
||||
int num_others = 1;
|
||||
|
||||
/* Load image to device and get target addresses for the image. */
|
||||
struct addr_pair *target_table = NULL;
|
||||
int i, num_target_entries;
|
||||
|
@ -1982,7 +1985,9 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
|
|||
= devicep->load_image_func (devicep->target_id, version,
|
||||
target_data, &target_table);
|
||||
|
||||
if (num_target_entries != num_funcs + num_vars)
|
||||
if (num_target_entries != num_funcs + num_vars
|
||||
/* Others (device_num) are included as trailing entries in pair list. */
|
||||
&& num_target_entries != num_funcs + num_vars + num_others)
|
||||
{
|
||||
gomp_mutex_unlock (&devicep->lock);
|
||||
if (is_register_lock)
|
||||
|
@ -2054,6 +2059,35 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
|
|||
array++;
|
||||
}
|
||||
|
||||
/* Last entry is for the on-device 'device_num' variable. Tolerate case
|
||||
where plugin does not return this entry. */
|
||||
if (num_funcs + num_vars < num_target_entries)
|
||||
{
|
||||
struct addr_pair *device_num_var = &target_table[num_funcs + num_vars];
|
||||
/* Start address will be non-zero for last entry if GOMP_DEVICE_NUM_VAR
|
||||
was found in this image. */
|
||||
if (device_num_var->start != 0)
|
||||
{
|
||||
/* The index of the devicep within devices[] is regarded as its
|
||||
'device number', which is different from the per-device type
|
||||
devicep->target_id. */
|
||||
int device_num_val = (int) (devicep - &devices[0]);
|
||||
if (device_num_var->end - device_num_var->start != sizeof (int))
|
||||
{
|
||||
gomp_mutex_unlock (&devicep->lock);
|
||||
if (is_register_lock)
|
||||
gomp_mutex_unlock (®ister_lock);
|
||||
gomp_fatal ("offload plugin managed 'device_num' not of expected "
|
||||
"format");
|
||||
}
|
||||
|
||||
/* Copy device_num value to place on device memory, hereby actually
|
||||
designating its device number into effect. */
|
||||
gomp_copy_host2dev (devicep, NULL, (void *) device_num_var->start,
|
||||
&device_num_val, sizeof (int), false, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
free (target_table);
|
||||
}
|
||||
|
||||
|
|
|
@ -374,6 +374,11 @@ proc check_effective_target_offload_target_amdgcn { } {
|
|||
return [libgomp_check_effective_target_offload_target "amdgcn"]
|
||||
}
|
||||
|
||||
# Return 1 if compiling for offload target intelmic
|
||||
proc check_effective_target_offload_target_intelmic { } {
|
||||
return [libgomp_check_effective_target_offload_target "*-intelmic"]
|
||||
}
|
||||
|
||||
# Return 1 if offload device is available.
|
||||
proc check_effective_target_offload_device { } {
|
||||
return [check_runtime_nocache offload_device_available_ {
|
||||
|
|
30
libgomp/testsuite/libgomp.c-c++-common/target-45.c
Normal file
30
libgomp/testsuite/libgomp.c-c++-common/target-45.c
Normal file
|
@ -0,0 +1,30 @@
|
|||
/* { dg-do run { target { ! offload_target_intelmic } } } */
|
||||
|
||||
#include <omp.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
int main (void)
|
||||
{
|
||||
|
||||
int host_device_num = omp_get_device_num ();
|
||||
|
||||
if (host_device_num != omp_get_initial_device ())
|
||||
abort ();
|
||||
|
||||
int device_num;
|
||||
int initial_device;
|
||||
|
||||
#pragma omp target map(from: device_num, initial_device)
|
||||
{
|
||||
initial_device = omp_is_initial_device ();
|
||||
device_num = omp_get_device_num ();
|
||||
}
|
||||
|
||||
if (initial_device && host_device_num != device_num)
|
||||
abort ();
|
||||
|
||||
if (!initial_device && host_device_num == device_num)
|
||||
abort ();
|
||||
|
||||
return 0;
|
||||
}
|
20
libgomp/testsuite/libgomp.fortran/target10.f90
Normal file
20
libgomp/testsuite/libgomp.fortran/target10.f90
Normal file
|
@ -0,0 +1,20 @@
|
|||
! { dg-do run { target { ! offload_target_intelmic } } }
|
||||
|
||||
program main
|
||||
use omp_lib
|
||||
implicit none
|
||||
integer :: device_num, host_device_num
|
||||
logical :: initial_device
|
||||
|
||||
host_device_num = omp_get_device_num ()
|
||||
if (host_device_num .ne. omp_get_initial_device ()) stop 1
|
||||
|
||||
!$omp target map(from: device_num, initial_device)
|
||||
initial_device = omp_is_initial_device ()
|
||||
device_num = omp_get_device_num ()
|
||||
!$omp end target
|
||||
|
||||
if (initial_device .and. (host_device_num .ne. device_num)) stop 2
|
||||
if ((.not. initial_device) .and. (host_device_num .eq. device_num)) stop 3
|
||||
|
||||
end program main
|
Loading…
Add table
Reference in a new issue