[GCN] Handle generic ISA names in libgomp's plugin-gcn.c
libgomp/ChangeLog: * plugin/plugin-gcn.c (ELFABIVERSION_AMDGPU_HSA_V6, EF_AMDGPU_GENERIC_VERSION_V, EF_AMDGPU_GENERIC_VERSION_OFFSET, GET_GENERIC_VERSION): New #define. (elf_gcn_isa_is_generic): New. (isa_matches_agent): Accept all generic code objects on the first go; extend the diagnostic and handle runtime-failed case. (create_and_finalize_hsa_program): Call it also after loading the code failed, pass the status.
This commit is contained in:
parent
bad9a7303a
commit
8561e4e290
1 changed files with 93 additions and 27 deletions
|
@ -66,6 +66,14 @@
|
|||
#define R_AMDGPU_RELATIVE64 13 /* B + A */
|
||||
#endif
|
||||
|
||||
#define ELFABIVERSION_AMDGPU_HSA_V6 4
|
||||
|
||||
#define EF_AMDGPU_GENERIC_VERSION_V 0xff000000 /* Mask. */
|
||||
#define EF_AMDGPU_GENERIC_VERSION_OFFSET 24
|
||||
|
||||
#define GET_GENERIC_VERSION(VAR) ((VAR & EF_AMDGPU_GENERIC_VERSION_V) \
|
||||
>> EF_AMDGPU_GENERIC_VERSION_OFFSET)
|
||||
|
||||
/* GCN specific definitions for asynchronous queues. */
|
||||
|
||||
#define ASYNC_QUEUE_SIZE 64
|
||||
|
@ -242,7 +250,7 @@ struct kernel_dispatch
|
|||
};
|
||||
|
||||
/* Structure of the kernargs segment, supporting console output.
|
||||
|
||||
|
||||
This needs to match the definitions in Newlib, and the expectations
|
||||
in libgomp target code. */
|
||||
|
||||
|
@ -1668,6 +1676,13 @@ elf_gcn_isa_field (Elf64_Ehdr *image)
|
|||
return image->e_flags & EF_AMDGPU_MACH_MASK;
|
||||
}
|
||||
|
||||
static int
|
||||
elf_gcn_isa_is_generic (Elf64_Ehdr *image)
|
||||
{
|
||||
return (image->e_ident[8] == ELFABIVERSION_AMDGPU_HSA_V6
|
||||
&& GET_GENERIC_VERSION (image->e_flags));
|
||||
}
|
||||
|
||||
/* Returns the name that the HSA runtime uses for the ISA or NULL if we do not
|
||||
support the ISA. */
|
||||
|
||||
|
@ -2399,38 +2414,88 @@ init_basic_kernel_info (struct kernel_info *kernel,
|
|||
return true;
|
||||
}
|
||||
|
||||
/* Check that the GCN ISA of the given image matches the ISA of the agent. */
|
||||
/* If status is SUCCESS, assume that the code runs if either the ISA of agent
|
||||
and code is the same - or it is generic code.
|
||||
Otherwise, execution failed with the provided status code; try to give
|
||||
some useful diagnostic. */
|
||||
|
||||
static bool
|
||||
isa_matches_agent (struct agent_info *agent, Elf64_Ehdr *image)
|
||||
isa_matches_agent (struct agent_info *agent, Elf64_Ehdr *image,
|
||||
hsa_status_t status)
|
||||
{
|
||||
/* Generic image - assume that it works and only return to here
|
||||
when it fails, i.e. fatal == true. */
|
||||
if (status == HSA_STATUS_SUCCESS && elf_gcn_isa_is_generic (image))
|
||||
return true;
|
||||
|
||||
int isa_field = elf_gcn_isa_field (image);
|
||||
const char* isa_s = isa_name (isa_field);
|
||||
if (!isa_s)
|
||||
if (status == HSA_STATUS_SUCCESS && isa_field == agent->device_isa)
|
||||
return true;
|
||||
|
||||
/* If we get here, either the binary is non-generic and has a mismatch of
|
||||
the ISA - or is generic but not handled by the ROCm (e.g. because ROCm
|
||||
is too old). */
|
||||
|
||||
char msg[340];
|
||||
char agent_isa_xs[8];
|
||||
char device_isa_xs[8];
|
||||
const char *agent_isa_s = isa_name (agent->device_isa);
|
||||
const char *device_isa_s = isa_name (isa_field);
|
||||
if (agent_isa_s == NULL)
|
||||
{
|
||||
hsa_error ("Unsupported ISA in GCN code object.", HSA_STATUS_ERROR);
|
||||
return false;
|
||||
snprintf (agent_isa_xs, sizeof agent_isa_xs,
|
||||
"0x%X", agent->device_isa);
|
||||
agent_isa_s = agent_isa_xs;
|
||||
}
|
||||
if (device_isa_s == NULL)
|
||||
{
|
||||
snprintf (device_isa_xs, sizeof device_isa_xs, "0x%X", isa_field);
|
||||
device_isa_s = device_isa_xs;
|
||||
}
|
||||
|
||||
if (isa_field != agent->device_isa)
|
||||
{
|
||||
char msg[204];
|
||||
const char *agent_isa_s = isa_name (agent->device_isa);
|
||||
assert (agent_isa_s);
|
||||
/* Some error which should be unrelated to the ISA. */
|
||||
if (status != HSA_STATUS_SUCCESS
|
||||
&& status != HSA_STATUS_ERROR_INVALID_CODE_OBJECT
|
||||
&& status != HSA_STATUS_ERROR_INVALID_ISA_NAME
|
||||
&& status != HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS)
|
||||
snprintf (msg, sizeof msg,
|
||||
"Could not load GCN code object with ISA %s on GPU with "
|
||||
"ISA %s (device %d).\n"
|
||||
"Consider using ROCR_VISIBLE_DEVICES to disable incompatible "
|
||||
"devices or run with LOADER_ENABLE_LOGGING=1 for more details.",
|
||||
device_isa_s, agent_isa_s, agent->device_id);
|
||||
else if (status == HSA_STATUS_ERROR_INVALID_ISA_NAME
|
||||
&& elf_gcn_isa_is_generic (image))
|
||||
snprintf (msg, sizeof msg,
|
||||
"Unsupported generic ISA %s on GPU with ISA %s (device %d).\n"
|
||||
"%s%s%s"
|
||||
"Consider using ROCR_VISIBLE_DEVICES to disable incompatible "
|
||||
"devices, run with LOADER_ENABLE_LOGGING=1 for more details, "
|
||||
"or try updating to a ROCm that supports this generic ISA.",
|
||||
device_isa_s, agent_isa_s, agent->device_id,
|
||||
agent_isa_s[0] != '0'
|
||||
? "Try to recompile with '-foffload-options=-march=" : "",
|
||||
agent_isa_s[0] != '0' ? agent_isa_s : "",
|
||||
agent_isa_s[0] != '0' ? ".\n" : "");
|
||||
else if (agent_isa_s[0] == '0')
|
||||
snprintf (msg, sizeof msg,
|
||||
"GCN code object ISA '%s' is incompatible with GPU ISA '%s' "
|
||||
"(device %d).\n"
|
||||
"Consider using ROCR_VISIBLE_DEVICES to disable incompatible "
|
||||
"devices or run with LOADER_ENABLE_LOGGING=1 for more details.",
|
||||
device_isa_s, agent_isa_s, agent->device_id);
|
||||
else
|
||||
snprintf (msg, sizeof msg,
|
||||
"GCN code object ISA '%s' is incompatible with GPU ISA '%s' "
|
||||
"(device %d).\n"
|
||||
"Try to recompile with '-foffload-options=-march=%s',\n"
|
||||
"or use ROCR_VISIBLE_DEVICES to disable incompatible "
|
||||
"devices.\n",
|
||||
device_isa_s, agent_isa_s, agent->device_id, agent_isa_s);
|
||||
|
||||
snprintf (msg, sizeof msg,
|
||||
"GCN code object ISA '%s' does not match GPU ISA '%s' "
|
||||
"(device %d).\n"
|
||||
"Try to recompile with '-foffload-options=-march=%s',\n"
|
||||
"or use ROCR_VISIBLE_DEVICES to disable incompatible "
|
||||
"devices.\n",
|
||||
isa_s, agent_isa_s, agent->device_id, agent_isa_s);
|
||||
|
||||
hsa_error (msg, HSA_STATUS_ERROR);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
hsa_error (msg, status != HSA_STATUS_SUCCESS
|
||||
? status : HSA_STATUS_ERROR_INVALID_CODE_OBJECT);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Create and finalize the program consisting of all loaded modules. */
|
||||
|
@ -2464,7 +2529,8 @@ create_and_finalize_hsa_program (struct agent_info *agent)
|
|||
{
|
||||
Elf64_Ehdr *image = (Elf64_Ehdr *)module->image_desc->gcn_image->image;
|
||||
|
||||
if (!isa_matches_agent (agent, image))
|
||||
/* Check the ISA early because older ROCm had unhelpful errors. */
|
||||
if (!isa_matches_agent (agent, image, HSA_STATUS_SUCCESS))
|
||||
goto fail;
|
||||
|
||||
hsa_code_object_t co = { 0 };
|
||||
|
@ -2482,7 +2548,7 @@ create_and_finalize_hsa_program (struct agent_info *agent)
|
|||
(agent->executable, agent->id, co, "");
|
||||
if (status != HSA_STATUS_SUCCESS)
|
||||
{
|
||||
hsa_error ("Could not load GCN code object", status);
|
||||
isa_matches_agent (agent, image, status);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue