diff --git a/gcc/config/gcn/gcn-hsa.h b/gcc/config/gcn/gcn-hsa.h index 4611bc55392..03220555075 100644 --- a/gcc/config/gcn/gcn-hsa.h +++ b/gcc/config/gcn/gcn-hsa.h @@ -80,7 +80,7 @@ extern unsigned int gcn_local_sym_hash (const char *name); writes a new AMD GPU object file and the ABI version needs to be the same. - LLVM <= 17 defaults to 4 while LLVM >= 18 defaults to 5. GCC supports LLVM >= 13.0.1 and only LLVM >= 14 supports version 5. - Note that Fiji is only suppored with LLVM <= 17 as version 3 is no longer + Note that Fiji is only supported with LLVM <= 17 as version 3 is no longer supported in LLVM >= 18. */ #define ABI_VERSION_SPEC "march=fiji:--amdhsa-code-object-version=3;" \ "!march=*|march=*:--amdhsa-code-object-version=4" diff --git a/include/hsa.h b/include/hsa.h index f9b5d9daf85..3c7be95d7fd 100644 --- a/include/hsa.h +++ b/include/hsa.h @@ -466,7 +466,9 @@ typedef enum { /** * String containing the ROCr build identifier. */ - HSA_AMD_SYSTEM_INFO_BUILD_VERSION = 0x200 + HSA_AMD_SYSTEM_INFO_BUILD_VERSION = 0x200, + + HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT = 0x202 } hsa_system_info_t; /** diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi index 22868635230..e79bd7a3392 100644 --- a/libgomp/libgomp.texi +++ b/libgomp/libgomp.texi @@ -6360,8 +6360,13 @@ The implementation remark: such that the next reverse offload region is only executed after the previous one returned. @item OpenMP code that has a @code{requires} directive with - @code{unified_shared_memory} will remove any GCN device from the list of - available devices (``host fallback''). + @code{unified_shared_memory} is only supported if all AMD GPUs have the + @code{HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT} property; for + discrete GPUs, this may require setting the @code{HSA_XNACK} environment + variable to @samp{1}; for systems with both an APU and a discrete GPU that + does not support XNACK, consider using @code{ROCR_VISIBLE_DEVICES} to + enable only the APU. If not supported, all AMD GPU devices are removed + from the list of available devices (``host fallback''). @item The available stack size can be changed using the @code{GCN_STACK_SIZE} environment variable; the default is 32 kiB per thread. @item Low-latency memory (@code{omp_low_lat_mem_space}) is supported when the diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c index 3cdc7ba929f..3d882b5ab63 100644 --- a/libgomp/plugin/plugin-gcn.c +++ b/libgomp/plugin/plugin-gcn.c @@ -3355,8 +3355,25 @@ GOMP_OFFLOAD_get_num_devices (unsigned int omp_requires_mask) if (hsa_context.agent_count > 0 && ((omp_requires_mask & ~(GOMP_REQUIRES_UNIFIED_ADDRESS + | GOMP_REQUIRES_UNIFIED_SHARED_MEMORY | GOMP_REQUIRES_REVERSE_OFFLOAD)) != 0)) return -1; + /* Check whether host page access is supported; this is per system level + (all GPUs supported by HSA). While intrinsically true for APUs, it + requires XNACK support for discrete GPUs. */ + if (hsa_context.agent_count > 0 + && (omp_requires_mask & GOMP_REQUIRES_UNIFIED_SHARED_MEMORY)) + { + bool b; + hsa_system_info_t type = HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT; + hsa_status_t status = hsa_fns.hsa_system_get_info_fn (type, &b); + if (status != HSA_STATUS_SUCCESS) + GOMP_PLUGIN_error ("HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT " + "failed"); + if (!b) + return -1; + } + return hsa_context.agent_count; }