From 0bac793ed6bad2c0c13cd1e93a1aa5808467afc8 Mon Sep 17 00:00:00 2001 From: Chung-Lin Tang Date: Thu, 5 Aug 2021 23:29:03 +0800 Subject: [PATCH] openmp: Implement omp_get_device_num routine This patch implements the omp_get_device_num library routine, specified in OpenMP 5.0. GOMP_DEVICE_NUM_VAR is a macro symbol which defines name of a "device number" variable, is defined on the device-side libgomp, has it's address returned to host-side libgomp during device initialization, and the host libgomp then sets its value to the designated device number. libgomp/ChangeLog: * icv-device.c (omp_get_device_num): New API function, host side. * fortran.c (omp_get_device_num_): New interface function. * libgomp-plugin.h (GOMP_DEVICE_NUM_VAR): Define macro symbol. * libgomp.map (OMP_5.0.2): New version space with omp_get_device_num, omp_get_device_num_. * libgomp.texi (omp_get_device_num): Add documentation for new API function. * omp.h.in (omp_get_device_num): Add declaration. * omp_lib.f90.in (omp_get_device_num): Likewise. * omp_lib.h.in (omp_get_device_num): Likewise. * target.c (gomp_load_image_to_device): If additional entry for device number exists at end of returned entries from 'load_image_func' hook, copy the assigned device number over to the device variable. * config/gcn/icv-device.c (GOMP_DEVICE_NUM_VAR): Define static global. (omp_get_device_num): New API function, device side. * plugin/plugin-gcn.c ("symcat.h"): Add include. (GOMP_OFFLOAD_load_image): Add addresses of device GOMP_DEVICE_NUM_VAR at end of returned 'target_table' entries. * config/nvptx/icv-device.c (GOMP_DEVICE_NUM_VAR): Define static global. (omp_get_device_num): New API function, device side. * plugin/plugin-nvptx.c ("symcat.h"): Add include. (GOMP_OFFLOAD_load_image): Add addresses of device GOMP_DEVICE_NUM_VAR at end of returned 'target_table' entries. * testsuite/lib/libgomp.exp (check_effective_target_offload_target_intelmic): New function for testing for intelmic offloading. * testsuite/libgomp.c-c++-common/target-45.c: New test. * testsuite/libgomp.fortran/target10.f90: New test. --- libgomp/config/gcn/icv-device.c | 11 ++++++ libgomp/config/nvptx/icv-device.c | 11 ++++++ libgomp/fortran.c | 7 ++++ libgomp/icv-device.c | 9 +++++ libgomp/libgomp-plugin.h | 6 +++ libgomp/libgomp.map | 8 +++- libgomp/libgomp.texi | 29 ++++++++++++++ libgomp/omp.h.in | 1 + libgomp/omp_lib.f90.in | 6 +++ libgomp/omp_lib.h.in | 3 ++ libgomp/plugin/plugin-gcn.c | 38 ++++++++++++++++++- libgomp/plugin/plugin-nvptx.c | 25 ++++++++++-- libgomp/target.c | 36 +++++++++++++++++- libgomp/testsuite/lib/libgomp.exp | 5 +++ .../libgomp.c-c++-common/target-45.c | 30 +++++++++++++++ .../testsuite/libgomp.fortran/target10.f90 | 20 ++++++++++ 16 files changed, 238 insertions(+), 7 deletions(-) create mode 100644 libgomp/testsuite/libgomp.c-c++-common/target-45.c create mode 100644 libgomp/testsuite/libgomp.fortran/target10.f90 diff --git a/libgomp/config/gcn/icv-device.c b/libgomp/config/gcn/icv-device.c index 72d4f7cff74..34e0f8346f2 100644 --- a/libgomp/config/gcn/icv-device.c +++ b/libgomp/config/gcn/icv-device.c @@ -70,6 +70,16 @@ omp_is_initial_device (void) return 0; } +/* This is set to the device number of current GPU during device initialization, + when the offload image containing this libgomp portion is loaded. */ +static volatile int GOMP_DEVICE_NUM_VAR; + +int +omp_get_device_num (void) +{ + return GOMP_DEVICE_NUM_VAR; +} + ialias (omp_set_default_device) ialias (omp_get_default_device) ialias (omp_get_initial_device) @@ -77,3 +87,4 @@ ialias (omp_get_num_devices) ialias (omp_get_num_teams) ialias (omp_get_team_num) ialias (omp_is_initial_device) +ialias (omp_get_device_num) diff --git a/libgomp/config/nvptx/icv-device.c b/libgomp/config/nvptx/icv-device.c index 3b96890f338..b63149d0c34 100644 --- a/libgomp/config/nvptx/icv-device.c +++ b/libgomp/config/nvptx/icv-device.c @@ -58,8 +58,19 @@ omp_is_initial_device (void) return 0; } +/* This is set to the device number of current GPU during device initialization, + when the offload image containing this libgomp portion is loaded. */ +static volatile int GOMP_DEVICE_NUM_VAR; + +int +omp_get_device_num (void) +{ + return GOMP_DEVICE_NUM_VAR; +} + ialias (omp_set_default_device) ialias (omp_get_default_device) ialias (omp_get_initial_device) ialias (omp_get_num_devices) ialias (omp_is_initial_device) +ialias (omp_get_device_num) diff --git a/libgomp/fortran.c b/libgomp/fortran.c index e042702ac91..07f97656e51 100644 --- a/libgomp/fortran.c +++ b/libgomp/fortran.c @@ -83,6 +83,7 @@ ialias_redirect (omp_get_partition_place_nums) ialias_redirect (omp_set_default_device) ialias_redirect (omp_get_default_device) ialias_redirect (omp_get_num_devices) +ialias_redirect (omp_get_device_num) ialias_redirect (omp_get_num_teams) ialias_redirect (omp_get_team_num) ialias_redirect (omp_is_initial_device) @@ -599,6 +600,12 @@ omp_get_initial_device_ (void) return omp_get_initial_device (); } +int32_t +omp_get_device_num_ (void) +{ + return omp_get_device_num (); +} + int32_t omp_get_max_task_priority_ (void) { diff --git a/libgomp/icv-device.c b/libgomp/icv-device.c index c1bedf46647..f11bdfa85c4 100644 --- a/libgomp/icv-device.c +++ b/libgomp/icv-device.c @@ -61,8 +61,17 @@ omp_is_initial_device (void) return 1; } +int +omp_get_device_num (void) +{ + /* By specification, this is equivalent to omp_get_initial_device + on the host. */ + return omp_get_initial_device (); +} + ialias (omp_set_default_device) ialias (omp_get_default_device) ialias (omp_get_initial_device) ialias (omp_get_num_devices) ialias (omp_is_initial_device) +ialias (omp_get_device_num) diff --git a/libgomp/libgomp-plugin.h b/libgomp/libgomp-plugin.h index 62645ce9954..cf24a2bee41 100644 --- a/libgomp/libgomp-plugin.h +++ b/libgomp/libgomp-plugin.h @@ -102,6 +102,12 @@ struct addr_pair uintptr_t end; }; +/* This symbol is to name a target side variable that holds the designated + 'device number' of the target device. The symbol needs to be available to + libgomp code and the offload plugin (which in the latter case must be + stringified). */ +#define GOMP_DEVICE_NUM_VAR __gomp_device_num + /* Miscellaneous functions. */ extern void *GOMP_PLUGIN_malloc (size_t) __attribute__ ((malloc)); extern void *GOMP_PLUGIN_malloc_cleared (size_t) __attribute__ ((malloc)); diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map index 69aa69562b8..cc44885cba9 100644 --- a/libgomp/libgomp.map +++ b/libgomp/libgomp.map @@ -199,12 +199,18 @@ OMP_5.0.1 { omp_fulfill_event_; } OMP_5.0; +OMP_5.0.2 { + global: + omp_get_device_num; + omp_get_device_num_; +} OMP_5.0.1; + OMP_5.1 { global: omp_display_env; omp_display_env_; omp_display_env_8_; -} OMP_5.0.1; +} OMP_5.0.2; GOMP_1.0 { global: diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi index 2c1f1b5968b..fc9e708a8d2 100644 --- a/libgomp/libgomp.texi +++ b/libgomp/libgomp.texi @@ -165,6 +165,7 @@ linkage, and do not throw exceptions. * omp_get_ancestor_thread_num:: Ancestor thread ID * omp_get_cancellation:: Whether cancellation support is enabled * omp_get_default_device:: Get the default device for target regions +* omp_get_device_num:: Get device that current thread is running on * omp_get_dynamic:: Dynamic teams setting * omp_get_initial_device:: Device number of host device * omp_get_level:: Number of parallel regions @@ -385,6 +386,34 @@ For OpenMP 5.1, this must be equal to the value returned by the +@node omp_get_device_num +@section @code{omp_get_device_num} -- Return device number of current device +@table @asis +@item @emph{Description}: +This function returns a device number that represents the device that the +current thread is executing on. For OpenMP 5.0, this must be equal to the +value returned by the @code{omp_get_initial_device} function when called +from the host. + +@item @emph{C/C++} +@multitable @columnfractions .20 .80 +@item @emph{Prototype}: @tab @code{int omp_get_device_num(void);} +@end multitable + +@item @emph{Fortran}: +@multitable @columnfractions .20 .80 +@item @emph{Interface}: @tab @code{integer function omp_get_device_num()} +@end multitable + +@item @emph{See also}: +@ref{omp_get_initial_device} + +@item @emph{Reference}: +@uref{https://www.openmp.org, OpenMP specification v5.0}, Section 3.2.37. +@end table + + + @node omp_get_level @section @code{omp_get_level} -- Obtain the current nesting level @table @asis diff --git a/libgomp/omp.h.in b/libgomp/omp.h.in index c93db968d2e..da34a9d98a6 100644 --- a/libgomp/omp.h.in +++ b/libgomp/omp.h.in @@ -243,6 +243,7 @@ extern void omp_get_partition_place_nums (int *) __GOMP_NOTHROW; extern void omp_set_default_device (int) __GOMP_NOTHROW; extern int omp_get_default_device (void) __GOMP_NOTHROW; extern int omp_get_num_devices (void) __GOMP_NOTHROW; +extern int omp_get_device_num (void) __GOMP_NOTHROW; extern int omp_get_num_teams (void) __GOMP_NOTHROW; extern int omp_get_team_num (void) __GOMP_NOTHROW; diff --git a/libgomp/omp_lib.f90.in b/libgomp/omp_lib.f90.in index 5fc6587e49e..d7e804f4fd5 100644 --- a/libgomp/omp_lib.f90.in +++ b/libgomp/omp_lib.f90.in @@ -550,6 +550,12 @@ end function omp_get_initial_device end interface + interface + function omp_get_device_num () + integer (4) :: omp_get_device_num + end function omp_get_device_num + end interface + interface function omp_get_max_task_priority () integer (4) :: omp_get_max_task_priority diff --git a/libgomp/omp_lib.h.in b/libgomp/omp_lib.h.in index 9873cea9ac1..20c32645e3c 100644 --- a/libgomp/omp_lib.h.in +++ b/libgomp/omp_lib.h.in @@ -244,6 +244,9 @@ external omp_get_initial_device integer(4) omp_get_initial_device + external omp_get_device_num + integer(4) omp_get_device_num + external omp_get_max_task_priority integer(4) omp_get_max_task_priority diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c index 2548614a2e5..f26d7361106 100644 --- a/libgomp/plugin/plugin-gcn.c +++ b/libgomp/plugin/plugin-gcn.c @@ -29,6 +29,7 @@ /* {{{ Includes and defines */ #include "config.h" +#include "symcat.h" #include #include #include @@ -3305,6 +3306,7 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data, struct kernel_info *kernel; int kernel_count = image_desc->kernel_count; unsigned var_count = image_desc->global_variable_count; + int other_count = 1; agent = get_agent_info (ord); if (!agent) @@ -3321,7 +3323,8 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data, GCN_DEBUG ("Encountered %d kernels in an image\n", kernel_count); GCN_DEBUG ("Encountered %u global variables in an image\n", var_count); - pair = GOMP_PLUGIN_malloc ((kernel_count + var_count - 2) + GCN_DEBUG ("Expect %d other variables in an image\n", other_count); + pair = GOMP_PLUGIN_malloc ((kernel_count + var_count + other_count - 2) * sizeof (struct addr_pair)); *target_table = pair; module = (struct module_info *) @@ -3396,6 +3399,37 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data, pair++; } + GCN_DEBUG ("Looking for variable %s\n", STRINGX (GOMP_DEVICE_NUM_VAR)); + + hsa_status_t status; + hsa_executable_symbol_t var_symbol; + status = hsa_fns.hsa_executable_get_symbol_fn (agent->executable, NULL, + STRINGX (GOMP_DEVICE_NUM_VAR), + agent->id, 0, &var_symbol); + if (status == HSA_STATUS_SUCCESS) + { + uint64_t device_num_varptr; + uint32_t device_num_varsize; + + status = hsa_fns.hsa_executable_symbol_get_info_fn + (var_symbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, + &device_num_varptr); + if (status != HSA_STATUS_SUCCESS) + hsa_fatal ("Could not extract a variable from its symbol", status); + status = hsa_fns.hsa_executable_symbol_get_info_fn + (var_symbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE, + &device_num_varsize); + if (status != HSA_STATUS_SUCCESS) + hsa_fatal ("Could not extract a variable size from its symbol", status); + + pair->start = device_num_varptr; + pair->end = device_num_varptr + device_num_varsize; + } + else + /* The 'GOMP_DEVICE_NUM_VAR' variable was not in this image. */ + pair->start = pair->end = 0; + pair++; + /* Ensure that constructors are run first. */ struct GOMP_kernel_launch_attributes kla = { 3, @@ -3418,7 +3452,7 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data, if (module->fini_array_func) kernel_count--; - return kernel_count + var_count; + return kernel_count + var_count + other_count; } /* Unload GCN object-code module described by struct gcn_image_desc in diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c index 1215212d501..0f16e1cf00d 100644 --- a/libgomp/plugin/plugin-nvptx.c +++ b/libgomp/plugin/plugin-nvptx.c @@ -34,6 +34,7 @@ #define _GNU_SOURCE #include "openacc.h" #include "config.h" +#include "symcat.h" #include "libgomp-plugin.h" #include "oacc-plugin.h" #include "gomp-constants.h" @@ -1265,7 +1266,7 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data, CUmodule module; const char *const *var_names; const struct targ_fn_launch *fn_descs; - unsigned int fn_entries, var_entries, i, j; + unsigned int fn_entries, var_entries, other_entries, i, j; struct targ_fn_descriptor *targ_fns; struct addr_pair *targ_tbl; const nvptx_tdata_t *img_header = (const nvptx_tdata_t *) target_data; @@ -1295,8 +1296,11 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data, fn_entries = img_header->fn_num; fn_descs = img_header->fn_descs; + /* Currently, the only other entry kind is 'device number'. */ + other_entries = 1; + targ_tbl = GOMP_PLUGIN_malloc (sizeof (struct addr_pair) - * (fn_entries + var_entries)); + * (fn_entries + var_entries + other_entries)); targ_fns = GOMP_PLUGIN_malloc (sizeof (struct targ_fn_descriptor) * fn_entries); @@ -1345,9 +1349,24 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data, targ_tbl->end = targ_tbl->start + bytes; } + CUdeviceptr device_num_varptr; + size_t device_num_varsize; + CUresult r = CUDA_CALL_NOCHECK (cuModuleGetGlobal, &device_num_varptr, + &device_num_varsize, module, + STRINGX (GOMP_DEVICE_NUM_VAR)); + if (r == CUDA_SUCCESS) + { + targ_tbl->start = (uintptr_t) device_num_varptr; + targ_tbl->end = (uintptr_t) (device_num_varptr + device_num_varsize); + } + else + /* The 'GOMP_DEVICE_NUM_VAR' variable was not in this image. */ + targ_tbl->start = targ_tbl->end = 0; + targ_tbl++; + nvptx_set_clocktick (module, dev); - return fn_entries + var_entries; + return fn_entries + var_entries + other_entries; } /* Unload the program described by TARGET_DATA. DEV_DATA is the diff --git a/libgomp/target.c b/libgomp/target.c index 453b3210e40..67fcf41cc2e 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -1974,6 +1974,9 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, int num_funcs = host_funcs_end - host_func_table; int num_vars = (host_vars_end - host_var_table) / 2; + /* Others currently is only 'device_num' */ + int num_others = 1; + /* Load image to device and get target addresses for the image. */ struct addr_pair *target_table = NULL; int i, num_target_entries; @@ -1982,7 +1985,9 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, = devicep->load_image_func (devicep->target_id, version, target_data, &target_table); - if (num_target_entries != num_funcs + num_vars) + if (num_target_entries != num_funcs + num_vars + /* Others (device_num) are included as trailing entries in pair list. */ + && num_target_entries != num_funcs + num_vars + num_others) { gomp_mutex_unlock (&devicep->lock); if (is_register_lock) @@ -2054,6 +2059,35 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, array++; } + /* Last entry is for the on-device 'device_num' variable. Tolerate case + where plugin does not return this entry. */ + if (num_funcs + num_vars < num_target_entries) + { + struct addr_pair *device_num_var = &target_table[num_funcs + num_vars]; + /* Start address will be non-zero for last entry if GOMP_DEVICE_NUM_VAR + was found in this image. */ + if (device_num_var->start != 0) + { + /* The index of the devicep within devices[] is regarded as its + 'device number', which is different from the per-device type + devicep->target_id. */ + int device_num_val = (int) (devicep - &devices[0]); + if (device_num_var->end - device_num_var->start != sizeof (int)) + { + gomp_mutex_unlock (&devicep->lock); + if (is_register_lock) + gomp_mutex_unlock (®ister_lock); + gomp_fatal ("offload plugin managed 'device_num' not of expected " + "format"); + } + + /* Copy device_num value to place on device memory, hereby actually + designating its device number into effect. */ + gomp_copy_host2dev (devicep, NULL, (void *) device_num_var->start, + &device_num_val, sizeof (int), false, NULL); + } + } + free (target_table); } diff --git a/libgomp/testsuite/lib/libgomp.exp b/libgomp/testsuite/lib/libgomp.exp index a2050151e84..ba8a73275c5 100644 --- a/libgomp/testsuite/lib/libgomp.exp +++ b/libgomp/testsuite/lib/libgomp.exp @@ -374,6 +374,11 @@ proc check_effective_target_offload_target_amdgcn { } { return [libgomp_check_effective_target_offload_target "amdgcn"] } +# Return 1 if compiling for offload target intelmic +proc check_effective_target_offload_target_intelmic { } { + return [libgomp_check_effective_target_offload_target "*-intelmic"] +} + # Return 1 if offload device is available. proc check_effective_target_offload_device { } { return [check_runtime_nocache offload_device_available_ { diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-45.c b/libgomp/testsuite/libgomp.c-c++-common/target-45.c new file mode 100644 index 00000000000..ec0d202e51c --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/target-45.c @@ -0,0 +1,30 @@ +/* { dg-do run { target { ! offload_target_intelmic } } } */ + +#include +#include + +int main (void) +{ + + int host_device_num = omp_get_device_num (); + + if (host_device_num != omp_get_initial_device ()) + abort (); + + int device_num; + int initial_device; + + #pragma omp target map(from: device_num, initial_device) + { + initial_device = omp_is_initial_device (); + device_num = omp_get_device_num (); + } + + if (initial_device && host_device_num != device_num) + abort (); + + if (!initial_device && host_device_num == device_num) + abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.fortran/target10.f90 b/libgomp/testsuite/libgomp.fortran/target10.f90 new file mode 100644 index 00000000000..0b939ad7a0d --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/target10.f90 @@ -0,0 +1,20 @@ +! { dg-do run { target { ! offload_target_intelmic } } } + +program main + use omp_lib + implicit none + integer :: device_num, host_device_num + logical :: initial_device + + host_device_num = omp_get_device_num () + if (host_device_num .ne. omp_get_initial_device ()) stop 1 + + !$omp target map(from: device_num, initial_device) + initial_device = omp_is_initial_device () + device_num = omp_get_device_num () + !$omp end target + + if (initial_device .and. (host_device_num .ne. device_num)) stop 2 + if ((.not. initial_device) .and. (host_device_num .eq. device_num)) stop 3 + +end program main