target.c (gomp_device_copy): New function.
libgomp/ 2016-05-26 Chung-Lin Tang <cltang@codesourcery.com> * target.c (gomp_device_copy): New function. (gomp_copy_host2dev): Likewise. (gomp_copy_dev2host): Likewise. (gomp_free_device_memory): Likewise. (gomp_map_vars_existing): Adjust to call gomp_copy_host2dev. (gomp_map_pointer): Likewise. (gomp_map_vars): Adjust to call gomp_copy_host2dev, handle NULL value from alloc_func plugin hook. (gomp_unmap_tgt): Adjust to call gomp_free_device_memory. (gomp_copy_from_async): Adjust to call gomp_copy_dev2host. (gomp_unmap_vars): Likewise. (gomp_update): Adjust to call gomp_copy_dev2host and gomp_copy_host2dev functions. (gomp_unload_image_from_device): Handle false value from unload_image_func plugin hook. (gomp_init_device): Handle false value from init_device_func plugin hook. (gomp_exit_data): Adjust to call gomp_copy_dev2host. (omp_target_free): Adjust to call gomp_free_device_memory. (omp_target_memcpy): Handle return values from host2dev_func, dev2host_func, and dev2dev_func plugin hooks. (omp_target_memcpy_rect_worker): Likewise. (gomp_target_fini): Handle false value from fini_device_func plugin hook. * libgomp.h (struct gomp_device_descr): Adjust return type of init_device_func, fini_device_func, unload_image_func, free_func, dev2host_func,host2dev_func, and dev2dev_func plugin hooks to 'bool'. * oacc-init.c (acc_shutdown_1): Handle false value from fini_device_func plugin hook. * oacc-host.c (host_init_device): Change return type to bool. (host_fini_device): Likewise. (host_unload_image): Likewise. (host_free): Likewise. (host_dev2host): Likewise. (host_host2dev): Likewise. * oacc-mem.c (acc_free): Handle plugin hook fatal error case. (acc_memcpy_to_device): Likewise. (acc_memcpy_from_device): Likewise. (delete_copyout): Add libfnname parameter, handle free_func hook fatal error case. (acc_delete): Adjust delete_copyout call. (acc_copyout): Likewise. (update_dev_host): Move gomp_mutex_unlock to after host2dev/dev2host hook calls. * plugin/plugin-hsa.c (hsa_warn): Adjust 'hsa_error' local variable to 'hsa_error_msg', for clarity. (hsa_fatal): Likewise. (hsa_error): New function. (init_hsa_context): Change return type to bool, adjust to return false on error. (GOMP_OFFLOAD_get_num_devices): Adjust to handle init_hsa_context return value. (GOMP_OFFLOAD_init_device): Change return type to bool, adjust to return false on error. (get_agent_info): Adjust to return NULL on error. (destroy_hsa_program): Change return type to bool, adjust to return false on error. (GOMP_OFFLOAD_load_image): Adjust to return -1 on error. (destroy_module): Change return type to bool, adjust to return false on error. (GOMP_OFFLOAD_unload_image): Likewise. (GOMP_OFFLOAD_fini_device): Likewise. (GOMP_OFFLOAD_alloc): Change to return NULL when called. (GOMP_OFFLOAD_free): Change to return false when called. (GOMP_OFFLOAD_dev2host): Likewise. (GOMP_OFFLOAD_host2dev): Likewise. (GOMP_OFFLOAD_dev2dev): Likewise. * plugin/plugin-nvptx.c (CUDA_CALL_ERET): New convenience macro. (CUDA_CALL): Likewise. (CUDA_CALL_ASSERT): Likewise. (map_init): Change return type to bool, use CUDA_CALL* macros. (map_fini): Likewise. (init_streams_for_device): Change return type to bool, adjust call to map_init. (fini_streams_for_device): Change return type to bool, adjust call to map_fini. (select_stream_for_async): Release stream_lock before calls to GOMP_PLUGIN_fatal, adjust call to map_init. (nvptx_init): Use CUDA_CALL* macros. (nvptx_attach_host_thread_to_device): Change return type to bool, use CUDA_CALL* macros. (nvptx_open_device): Use CUDA_CALL* macros. (nvptx_close_device): Change return type to bool, use CUDA_CALL* macros. (nvptx_get_num_devices): Use CUDA_CALL* macros. (link_ptx): Change return type to bool, use CUDA_CALL* macros. (nvptx_exec): Use CUDA_CALL* macros. (nvptx_alloc): Use CUDA_CALL* macros. (nvptx_free): Change return type to bool, use CUDA_CALL* macros. (nvptx_host2dev): Likewise. (nvptx_dev2host): Likewise. (nvptx_wait): Use CUDA_CALL* macros. (nvptx_wait_async): Likewise. (nvptx_wait_all): Likewise. (nvptx_wait_all_async): Likewise. (nvptx_set_cuda_stream): Adjust order of stream_lock acquire, use CUDA_CALL* macros, adjust call to map_fini. (GOMP_OFFLOAD_init_device): Change return type to bool, adjust code accordingly. (GOMP_OFFLOAD_fini_device): Likewise. (GOMP_OFFLOAD_load_image): Adjust calls to nvptx_attach_host_thread_to_device/link_ptx to handle errors, use CUDA_CALL* macros. (GOMP_OFFLOAD_unload_image): Change return type to bool, adjust return code. (GOMP_OFFLOAD_alloc): Adjust calls to code to handle error return. (GOMP_OFFLOAD_free): Change return type to bool, adjust calls to handle error return. (GOMP_OFFLOAD_dev2host): Likewise. (GOMP_OFFLOAD_host2dev): Likewise. (GOMP_OFFLOAD_openacc_register_async_cleanup): Use CUDA_CALL* macros. (GOMP_OFFLOAD_openacc_create_thread_data): Likewise. liboffloadmic/ 2016-05-26 Chung-Lin Tang <cltang@codesourcery.com> * plugin/libgomp-plugin-intelmic.cpp (offload): Change return type to bool, adjust return code. (GOMP_OFFLOAD_init_device): Likewise. (GOMP_OFFLOAD_fini_device): Likewise. (get_target_table): Likewise. (offload_image): Likwise. (GOMP_OFFLOAD_load_image): Adjust call to offload_image(), change to return -1 on error. (GOMP_OFFLOAD_unload_image): Change return type to bool, adjust return code. (GOMP_OFFLOAD_alloc): Likewise. (GOMP_OFFLOAD_free): Likewise. (GOMP_OFFLOAD_host2dev): Likewise. (GOMP_OFFLOAD_dev2host): Likewise. (GOMP_OFFLOAD_dev2dev): Likewise. From-SVN: r236768
This commit is contained in:
parent
1875b9a02d
commit
6ce1307231
10 changed files with 852 additions and 573 deletions
|
@ -1,3 +1,120 @@
|
|||
2016-05-26 Chung-Lin Tang <cltang@codesourcery.com>
|
||||
|
||||
* target.c (gomp_device_copy): New function.
|
||||
(gomp_copy_host2dev): Likewise.
|
||||
(gomp_copy_dev2host): Likewise.
|
||||
(gomp_free_device_memory): Likewise.
|
||||
(gomp_map_vars_existing): Adjust to call gomp_copy_host2dev.
|
||||
(gomp_map_pointer): Likewise.
|
||||
(gomp_map_vars): Adjust to call gomp_copy_host2dev, handle
|
||||
NULL value from alloc_func plugin hook.
|
||||
(gomp_unmap_tgt): Adjust to call gomp_free_device_memory.
|
||||
(gomp_copy_from_async): Adjust to call gomp_copy_dev2host.
|
||||
(gomp_unmap_vars): Likewise.
|
||||
(gomp_update): Adjust to call gomp_copy_dev2host and
|
||||
gomp_copy_host2dev functions.
|
||||
(gomp_unload_image_from_device): Handle false value from
|
||||
unload_image_func plugin hook.
|
||||
(gomp_init_device): Handle false value from init_device_func
|
||||
plugin hook.
|
||||
(gomp_exit_data): Adjust to call gomp_copy_dev2host.
|
||||
(omp_target_free): Adjust to call gomp_free_device_memory.
|
||||
(omp_target_memcpy): Handle return values from host2dev_func,
|
||||
dev2host_func, and dev2dev_func plugin hooks.
|
||||
(omp_target_memcpy_rect_worker): Likewise.
|
||||
(gomp_target_fini): Handle false value from fini_device_func
|
||||
plugin hook.
|
||||
* libgomp.h (struct gomp_device_descr): Adjust return type of
|
||||
init_device_func, fini_device_func, unload_image_func, free_func,
|
||||
dev2host_func,host2dev_func, and dev2dev_func plugin hooks to 'bool'.
|
||||
* oacc-init.c (acc_shutdown_1): Handle false value from
|
||||
fini_device_func plugin hook.
|
||||
* oacc-host.c (host_init_device): Change return type to bool.
|
||||
(host_fini_device): Likewise.
|
||||
(host_unload_image): Likewise.
|
||||
(host_free): Likewise.
|
||||
(host_dev2host): Likewise.
|
||||
(host_host2dev): Likewise.
|
||||
* oacc-mem.c (acc_free): Handle plugin hook fatal error case.
|
||||
(acc_memcpy_to_device): Likewise.
|
||||
(acc_memcpy_from_device): Likewise.
|
||||
(delete_copyout): Add libfnname parameter, handle free_func
|
||||
hook fatal error case.
|
||||
(acc_delete): Adjust delete_copyout call.
|
||||
(acc_copyout): Likewise.
|
||||
(update_dev_host): Move gomp_mutex_unlock to after
|
||||
host2dev/dev2host hook calls.
|
||||
|
||||
* plugin/plugin-hsa.c (hsa_warn): Adjust 'hsa_error' local variable
|
||||
to 'hsa_error_msg', for clarity.
|
||||
(hsa_fatal): Likewise.
|
||||
(hsa_error): New function.
|
||||
(init_hsa_context): Change return type to bool, adjust to return
|
||||
false on error.
|
||||
(GOMP_OFFLOAD_get_num_devices): Adjust to handle init_hsa_context
|
||||
return value.
|
||||
(GOMP_OFFLOAD_init_device): Change return type to bool, adjust to
|
||||
return false on error.
|
||||
(get_agent_info): Adjust to return NULL on error.
|
||||
(destroy_hsa_program): Change return type to bool, adjust to
|
||||
return false on error.
|
||||
(GOMP_OFFLOAD_load_image): Adjust to return -1 on error.
|
||||
(destroy_module): Change return type to bool, adjust to
|
||||
return false on error.
|
||||
(GOMP_OFFLOAD_unload_image): Likewise.
|
||||
(GOMP_OFFLOAD_fini_device): Likewise.
|
||||
(GOMP_OFFLOAD_alloc): Change to return NULL when called.
|
||||
(GOMP_OFFLOAD_free): Change to return false when called.
|
||||
(GOMP_OFFLOAD_dev2host): Likewise.
|
||||
(GOMP_OFFLOAD_host2dev): Likewise.
|
||||
(GOMP_OFFLOAD_dev2dev): Likewise.
|
||||
|
||||
* plugin/plugin-nvptx.c (CUDA_CALL_ERET): New convenience macro.
|
||||
(CUDA_CALL): Likewise.
|
||||
(CUDA_CALL_ASSERT): Likewise.
|
||||
(map_init): Change return type to bool, use CUDA_CALL* macros.
|
||||
(map_fini): Likewise.
|
||||
(init_streams_for_device): Change return type to bool, adjust
|
||||
call to map_init.
|
||||
(fini_streams_for_device): Change return type to bool, adjust
|
||||
call to map_fini.
|
||||
(select_stream_for_async): Release stream_lock before calls to
|
||||
GOMP_PLUGIN_fatal, adjust call to map_init.
|
||||
(nvptx_init): Use CUDA_CALL* macros.
|
||||
(nvptx_attach_host_thread_to_device): Change return type to bool,
|
||||
use CUDA_CALL* macros.
|
||||
(nvptx_open_device): Use CUDA_CALL* macros.
|
||||
(nvptx_close_device): Change return type to bool, use CUDA_CALL*
|
||||
macros.
|
||||
(nvptx_get_num_devices): Use CUDA_CALL* macros.
|
||||
(link_ptx): Change return type to bool, use CUDA_CALL* macros.
|
||||
(nvptx_exec): Use CUDA_CALL* macros.
|
||||
(nvptx_alloc): Use CUDA_CALL* macros.
|
||||
(nvptx_free): Change return type to bool, use CUDA_CALL* macros.
|
||||
(nvptx_host2dev): Likewise.
|
||||
(nvptx_dev2host): Likewise.
|
||||
(nvptx_wait): Use CUDA_CALL* macros.
|
||||
(nvptx_wait_async): Likewise.
|
||||
(nvptx_wait_all): Likewise.
|
||||
(nvptx_wait_all_async): Likewise.
|
||||
(nvptx_set_cuda_stream): Adjust order of stream_lock acquire,
|
||||
use CUDA_CALL* macros, adjust call to map_fini.
|
||||
(GOMP_OFFLOAD_init_device): Change return type to bool,
|
||||
adjust code accordingly.
|
||||
(GOMP_OFFLOAD_fini_device): Likewise.
|
||||
(GOMP_OFFLOAD_load_image): Adjust calls to
|
||||
nvptx_attach_host_thread_to_device/link_ptx to handle errors,
|
||||
use CUDA_CALL* macros.
|
||||
(GOMP_OFFLOAD_unload_image): Change return type to bool, adjust
|
||||
return code.
|
||||
(GOMP_OFFLOAD_alloc): Adjust calls to code to handle error return.
|
||||
(GOMP_OFFLOAD_free): Change return type to bool, adjust calls to
|
||||
handle error return.
|
||||
(GOMP_OFFLOAD_dev2host): Likewise.
|
||||
(GOMP_OFFLOAD_host2dev): Likewise.
|
||||
(GOMP_OFFLOAD_openacc_register_async_cleanup): Use CUDA_CALL* macros.
|
||||
(GOMP_OFFLOAD_openacc_create_thread_data): Likewise.
|
||||
|
||||
2016-05-24 Cesar Philippidis <cesar@codesourcery.com>
|
||||
|
||||
* oacc-mem.c (acc_malloc): Update handling of shared-memory targets.
|
||||
|
|
|
@ -930,16 +930,16 @@ struct gomp_device_descr
|
|||
unsigned int (*get_caps_func) (void);
|
||||
int (*get_type_func) (void);
|
||||
int (*get_num_devices_func) (void);
|
||||
void (*init_device_func) (int);
|
||||
void (*fini_device_func) (int);
|
||||
bool (*init_device_func) (int);
|
||||
bool (*fini_device_func) (int);
|
||||
unsigned (*version_func) (void);
|
||||
int (*load_image_func) (int, unsigned, const void *, struct addr_pair **);
|
||||
void (*unload_image_func) (int, unsigned, const void *);
|
||||
bool (*unload_image_func) (int, unsigned, const void *);
|
||||
void *(*alloc_func) (int, size_t);
|
||||
void (*free_func) (int, void *);
|
||||
void *(*dev2host_func) (int, void *, const void *, size_t);
|
||||
void *(*host2dev_func) (int, void *, const void *, size_t);
|
||||
void *(*dev2dev_func) (int, void *, const void *, size_t);
|
||||
bool (*free_func) (int, void *);
|
||||
bool (*dev2host_func) (int, void *, const void *, size_t);
|
||||
bool (*host2dev_func) (int, void *, const void *, size_t);
|
||||
bool (*dev2dev_func) (int, void *, const void *, size_t);
|
||||
bool (*can_run_func) (void *);
|
||||
void (*run_func) (int, void *, void *, void **);
|
||||
void (*async_run_func) (int, void *, void *, void **, void *);
|
||||
|
|
|
@ -60,14 +60,16 @@ host_get_num_devices (void)
|
|||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
static bool
|
||||
host_init_device (int n __attribute__ ((unused)))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
static bool
|
||||
host_fini_device (int n __attribute__ ((unused)))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static unsigned
|
||||
|
@ -85,11 +87,12 @@ host_load_image (int n __attribute__ ((unused)),
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
static bool
|
||||
host_unload_image (int n __attribute__ ((unused)),
|
||||
unsigned v __attribute__ ((unused)),
|
||||
const void *t __attribute__ ((unused)))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static void *
|
||||
|
@ -98,28 +101,29 @@ host_alloc (int n __attribute__ ((unused)), size_t s)
|
|||
return gomp_malloc (s);
|
||||
}
|
||||
|
||||
static void
|
||||
static bool
|
||||
host_free (int n __attribute__ ((unused)), void *p)
|
||||
{
|
||||
free (p);
|
||||
return true;
|
||||
}
|
||||
|
||||
static void *
|
||||
static bool
|
||||
host_dev2host (int n __attribute__ ((unused)),
|
||||
void *h __attribute__ ((unused)),
|
||||
const void *d __attribute__ ((unused)),
|
||||
size_t s __attribute__ ((unused)))
|
||||
{
|
||||
return NULL;
|
||||
return true;
|
||||
}
|
||||
|
||||
static void *
|
||||
static bool
|
||||
host_host2dev (int n __attribute__ ((unused)),
|
||||
void *d __attribute__ ((unused)),
|
||||
const void *h __attribute__ ((unused)),
|
||||
size_t s __attribute__ ((unused)))
|
||||
{
|
||||
return NULL;
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
@ -300,8 +300,8 @@ acc_shutdown_1 (acc_device_t d)
|
|||
|
||||
gomp_mutex_unlock (&goacc_thread_lock);
|
||||
|
||||
|
||||
/* Close all the devices of this type that have been opened. */
|
||||
bool ret = true;
|
||||
for (i = 0; i < ndevs; i++)
|
||||
{
|
||||
struct gomp_device_descr *acc_dev = &base_dev[i];
|
||||
|
@ -309,12 +309,15 @@ acc_shutdown_1 (acc_device_t d)
|
|||
if (acc_dev->state == GOMP_DEVICE_INITIALIZED)
|
||||
{
|
||||
devices_active = true;
|
||||
acc_dev->fini_device_func (acc_dev->target_id);
|
||||
ret &= acc_dev->fini_device_func (acc_dev->target_id);
|
||||
acc_dev->state = GOMP_DEVICE_UNINITIALIZED;
|
||||
}
|
||||
gomp_mutex_unlock (&acc_dev->lock);
|
||||
}
|
||||
|
||||
if (!ret)
|
||||
gomp_fatal ("device finalization failed");
|
||||
|
||||
if (!devices_active)
|
||||
gomp_fatal ("no device initialized");
|
||||
}
|
||||
|
|
|
@ -149,7 +149,8 @@ acc_free (void *d)
|
|||
else
|
||||
gomp_mutex_unlock (&acc_dev->lock);
|
||||
|
||||
acc_dev->free_func (acc_dev->target_id, d);
|
||||
if (!acc_dev->free_func (acc_dev->target_id, d))
|
||||
gomp_fatal ("error in freeing device memory in %s", __FUNCTION__);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -167,7 +168,8 @@ acc_memcpy_to_device (void *d, void *h, size_t s)
|
|||
return;
|
||||
}
|
||||
|
||||
thr->dev->host2dev_func (thr->dev->target_id, d, h, s);
|
||||
if (!thr->dev->host2dev_func (thr->dev->target_id, d, h, s))
|
||||
gomp_fatal ("error in %s", __FUNCTION__);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -185,7 +187,8 @@ acc_memcpy_from_device (void *h, void *d, size_t s)
|
|||
return;
|
||||
}
|
||||
|
||||
thr->dev->dev2host_func (thr->dev->target_id, h, d, s);
|
||||
if (!thr->dev->dev2host_func (thr->dev->target_id, h, d, s))
|
||||
gomp_fatal ("error in %s", __FUNCTION__);
|
||||
}
|
||||
|
||||
/* Return the device pointer that corresponds to host data H. Or NULL
|
||||
|
@ -520,7 +523,7 @@ acc_present_or_copyin (void *h, size_t s)
|
|||
#define FLAG_COPYOUT (1 << 0)
|
||||
|
||||
static void
|
||||
delete_copyout (unsigned f, void *h, size_t s)
|
||||
delete_copyout (unsigned f, void *h, size_t s, const char *libfnname)
|
||||
{
|
||||
size_t host_size;
|
||||
splay_tree_key n;
|
||||
|
@ -563,18 +566,20 @@ delete_copyout (unsigned f, void *h, size_t s)
|
|||
|
||||
acc_unmap_data (h);
|
||||
|
||||
acc_dev->free_func (acc_dev->target_id, d);
|
||||
if (!acc_dev->free_func (acc_dev->target_id, d))
|
||||
gomp_fatal ("error in freeing device memory in %s", libfnname);
|
||||
}
|
||||
|
||||
void
|
||||
acc_delete (void *h , size_t s)
|
||||
{
|
||||
delete_copyout (0, h, s);
|
||||
delete_copyout (0, h, s, __FUNCTION__);
|
||||
}
|
||||
|
||||
void acc_copyout (void *h, size_t s)
|
||||
void
|
||||
acc_copyout (void *h, size_t s)
|
||||
{
|
||||
delete_copyout (FLAG_COPYOUT, h, s);
|
||||
delete_copyout (FLAG_COPYOUT, h, s, __FUNCTION__);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -604,12 +609,12 @@ update_dev_host (int is_dev, void *h, size_t s)
|
|||
d = (void *) (n->tgt->tgt_start + n->tgt_offset
|
||||
+ (uintptr_t) h - n->host_start);
|
||||
|
||||
gomp_mutex_unlock (&acc_dev->lock);
|
||||
|
||||
if (is_dev)
|
||||
acc_dev->host2dev_func (acc_dev->target_id, d, h, s);
|
||||
else
|
||||
acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
|
||||
|
||||
gomp_mutex_unlock (&acc_dev->lock);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
@ -175,10 +175,10 @@ hsa_warn (const char *str, hsa_status_t status)
|
|||
if (!debug)
|
||||
return;
|
||||
|
||||
const char *hsa_error;
|
||||
hsa_status_string (status, &hsa_error);
|
||||
const char *hsa_error_msg;
|
||||
hsa_status_string (status, &hsa_error_msg);
|
||||
|
||||
fprintf (stderr, "HSA warning: %s\nRuntime message: %s", str, hsa_error);
|
||||
fprintf (stderr, "HSA warning: %s\nRuntime message: %s", str, hsa_error_msg);
|
||||
}
|
||||
|
||||
/* Report a fatal error STR together with the HSA error corresponding to STATUS
|
||||
|
@ -187,10 +187,23 @@ hsa_warn (const char *str, hsa_status_t status)
|
|||
static void
|
||||
hsa_fatal (const char *str, hsa_status_t status)
|
||||
{
|
||||
const char *hsa_error;
|
||||
hsa_status_string (status, &hsa_error);
|
||||
const char *hsa_error_msg;
|
||||
hsa_status_string (status, &hsa_error_msg);
|
||||
GOMP_PLUGIN_fatal ("HSA fatal error: %s\nRuntime message: %s", str,
|
||||
hsa_error);
|
||||
hsa_error_msg);
|
||||
}
|
||||
|
||||
/* Like hsa_fatal, except only report error message, and return FALSE
|
||||
for propagating error processing to outside of plugin. */
|
||||
|
||||
static bool
|
||||
hsa_error (const char *str, hsa_status_t status)
|
||||
{
|
||||
const char *hsa_error_msg;
|
||||
hsa_status_string (status, &hsa_error_msg);
|
||||
GOMP_PLUGIN_error ("HSA fatal error: %s\nRuntime message: %s", str,
|
||||
hsa_error_msg);
|
||||
return false;
|
||||
}
|
||||
|
||||
struct hsa_kernel_description
|
||||
|
@ -418,24 +431,25 @@ assign_agent_ids (hsa_agent_t agent, void *data)
|
|||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
/* Initialize hsa_context if it has not already been done. */
|
||||
/* Initialize hsa_context if it has not already been done.
|
||||
Return TRUE on success. */
|
||||
|
||||
static void
|
||||
static bool
|
||||
init_hsa_context (void)
|
||||
{
|
||||
hsa_status_t status;
|
||||
int agent_index = 0;
|
||||
|
||||
if (hsa_context.initialized)
|
||||
return;
|
||||
return true;
|
||||
init_enviroment_variables ();
|
||||
status = hsa_init ();
|
||||
if (status != HSA_STATUS_SUCCESS)
|
||||
hsa_fatal ("Run-time could not be initialized", status);
|
||||
return hsa_error ("Run-time could not be initialized", status);
|
||||
HSA_DEBUG ("HSA run-time initialized\n");
|
||||
status = hsa_iterate_agents (count_gpu_agents, NULL);
|
||||
if (status != HSA_STATUS_SUCCESS)
|
||||
hsa_fatal ("HSA GPU devices could not be enumerated", status);
|
||||
return hsa_error ("HSA GPU devices could not be enumerated", status);
|
||||
HSA_DEBUG ("There are %i HSA GPU devices.\n", hsa_context.agent_count);
|
||||
|
||||
hsa_context.agents
|
||||
|
@ -443,8 +457,12 @@ init_hsa_context (void)
|
|||
* sizeof (struct agent_info));
|
||||
status = hsa_iterate_agents (assign_agent_ids, &agent_index);
|
||||
if (agent_index != hsa_context.agent_count)
|
||||
GOMP_PLUGIN_fatal ("Failed to assign IDs to all HSA agents");
|
||||
{
|
||||
GOMP_PLUGIN_error ("Failed to assign IDs to all HSA agents");
|
||||
return false;
|
||||
}
|
||||
hsa_context.initialized = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Callback of dispatch queues to report errors. */
|
||||
|
@ -492,75 +510,100 @@ get_kernarg_memory_region (hsa_region_t region, void *data)
|
|||
int
|
||||
GOMP_OFFLOAD_get_num_devices (void)
|
||||
{
|
||||
init_hsa_context ();
|
||||
if (!init_hsa_context ())
|
||||
return 0;
|
||||
return hsa_context.agent_count;
|
||||
}
|
||||
|
||||
/* Part of the libgomp plugin interface. Initialize agent number N so that it
|
||||
can be used for computation. */
|
||||
can be used for computation. Return TRUE on success. */
|
||||
|
||||
void
|
||||
bool
|
||||
GOMP_OFFLOAD_init_device (int n)
|
||||
{
|
||||
init_hsa_context ();
|
||||
if (!init_hsa_context ())
|
||||
return false;
|
||||
if (n >= hsa_context.agent_count)
|
||||
GOMP_PLUGIN_fatal ("Request to initialize non-existing HSA device %i", n);
|
||||
{
|
||||
GOMP_PLUGIN_error ("Request to initialize non-existing HSA device %i", n);
|
||||
return false;
|
||||
}
|
||||
struct agent_info *agent = &hsa_context.agents[n];
|
||||
|
||||
if (agent->initialized)
|
||||
return;
|
||||
return true;
|
||||
|
||||
if (pthread_rwlock_init (&agent->modules_rwlock, NULL))
|
||||
GOMP_PLUGIN_fatal ("Failed to initialize an HSA agent rwlock");
|
||||
{
|
||||
GOMP_PLUGIN_error ("Failed to initialize an HSA agent rwlock");
|
||||
return false;
|
||||
}
|
||||
if (pthread_mutex_init (&agent->prog_mutex, NULL))
|
||||
GOMP_PLUGIN_fatal ("Failed to initialize an HSA agent program mutex");
|
||||
{
|
||||
GOMP_PLUGIN_error ("Failed to initialize an HSA agent program mutex");
|
||||
return false;
|
||||
}
|
||||
|
||||
uint32_t queue_size;
|
||||
hsa_status_t status;
|
||||
status = hsa_agent_get_info (agent->id, HSA_AGENT_INFO_QUEUE_MAX_SIZE,
|
||||
&queue_size);
|
||||
if (status != HSA_STATUS_SUCCESS)
|
||||
hsa_fatal ("Error requesting maximum queue size of the HSA agent", status);
|
||||
return hsa_error ("Error requesting maximum queue size of the HSA agent",
|
||||
status);
|
||||
status = hsa_agent_get_info (agent->id, HSA_AGENT_INFO_ISA, &agent->isa);
|
||||
if (status != HSA_STATUS_SUCCESS)
|
||||
hsa_fatal ("Error querying the ISA of the agent", status);
|
||||
return hsa_error ("Error querying the ISA of the agent", status);
|
||||
status = hsa_queue_create (agent->id, queue_size, HSA_QUEUE_TYPE_MULTI,
|
||||
queue_callback, NULL, UINT32_MAX, UINT32_MAX,
|
||||
&agent->command_q);
|
||||
if (status != HSA_STATUS_SUCCESS)
|
||||
hsa_fatal ("Error creating command queue", status);
|
||||
return hsa_error ("Error creating command queue", status);
|
||||
|
||||
status = hsa_queue_create (agent->id, queue_size, HSA_QUEUE_TYPE_MULTI,
|
||||
queue_callback, NULL, UINT32_MAX, UINT32_MAX,
|
||||
&agent->kernel_dispatch_command_q);
|
||||
if (status != HSA_STATUS_SUCCESS)
|
||||
hsa_fatal ("Error creating kernel dispatch command queue", status);
|
||||
return hsa_error ("Error creating kernel dispatch command queue", status);
|
||||
|
||||
agent->kernarg_region.handle = (uint64_t) -1;
|
||||
status = hsa_agent_iterate_regions (agent->id, get_kernarg_memory_region,
|
||||
&agent->kernarg_region);
|
||||
if (agent->kernarg_region.handle == (uint64_t) -1)
|
||||
GOMP_PLUGIN_fatal ("Could not find suitable memory region for kernel "
|
||||
"arguments");
|
||||
{
|
||||
GOMP_PLUGIN_error ("Could not find suitable memory region for kernel "
|
||||
"arguments");
|
||||
return false;
|
||||
}
|
||||
HSA_DEBUG ("HSA agent initialized, queue has id %llu\n",
|
||||
(long long unsigned) agent->command_q->id);
|
||||
HSA_DEBUG ("HSA agent initialized, kernel dispatch queue has id %llu\n",
|
||||
(long long unsigned) agent->kernel_dispatch_command_q->id);
|
||||
agent->initialized = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Verify that hsa_context has already been initialized and return the
|
||||
agent_info structure describing device number N. */
|
||||
agent_info structure describing device number N. Return NULL on error. */
|
||||
|
||||
static struct agent_info *
|
||||
get_agent_info (int n)
|
||||
{
|
||||
if (!hsa_context.initialized)
|
||||
GOMP_PLUGIN_fatal ("Attempt to use uninitialized HSA context.");
|
||||
{
|
||||
GOMP_PLUGIN_error ("Attempt to use uninitialized HSA context.");
|
||||
return NULL;
|
||||
}
|
||||
if (n >= hsa_context.agent_count)
|
||||
GOMP_PLUGIN_fatal ("Request to operate on anon-existing HSA device %i", n);
|
||||
{
|
||||
GOMP_PLUGIN_error ("Request to operate on anon-existing HSA device %i", n);
|
||||
return NULL;
|
||||
}
|
||||
if (!hsa_context.agents[n].initialized)
|
||||
GOMP_PLUGIN_fatal ("Attempt to use an uninitialized HSA agent.");
|
||||
{
|
||||
GOMP_PLUGIN_error ("Attempt to use an uninitialized HSA agent.");
|
||||
return NULL;
|
||||
}
|
||||
return &hsa_context.agents[n];
|
||||
}
|
||||
|
||||
|
@ -590,13 +633,14 @@ remove_module_from_agent (struct agent_info *agent, struct module_info *module)
|
|||
}
|
||||
|
||||
/* Free the HSA program in agent and everything associated with it and set
|
||||
agent->prog_finalized and the initialized flags of all kernels to false. */
|
||||
agent->prog_finalized and the initialized flags of all kernels to false.
|
||||
Return TRUE on success. */
|
||||
|
||||
static void
|
||||
static bool
|
||||
destroy_hsa_program (struct agent_info *agent)
|
||||
{
|
||||
if (!agent->prog_finalized || agent->prog_finalized_error)
|
||||
return;
|
||||
return true;
|
||||
|
||||
hsa_status_t status;
|
||||
|
||||
|
@ -604,7 +648,7 @@ destroy_hsa_program (struct agent_info *agent)
|
|||
|
||||
status = hsa_executable_destroy (agent->executable);
|
||||
if (status != HSA_STATUS_SUCCESS)
|
||||
hsa_fatal ("Could not destroy HSA executable", status);
|
||||
return hsa_error ("Could not destroy HSA executable", status);
|
||||
|
||||
struct module_info *module;
|
||||
for (module = agent->first_module; module; module = module->next)
|
||||
|
@ -614,6 +658,7 @@ destroy_hsa_program (struct agent_info *agent)
|
|||
module->kernels[i].initialized = false;
|
||||
}
|
||||
agent->prog_finalized = false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Part of the libgomp plugin interface. Load BRIG module described by struct
|
||||
|
@ -625,9 +670,12 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, void *target_data,
|
|||
struct addr_pair **target_table)
|
||||
{
|
||||
if (GOMP_VERSION_DEV (version) > GOMP_VERSION_HSA)
|
||||
GOMP_PLUGIN_fatal ("Offload data incompatible with HSA plugin"
|
||||
" (expected %u, received %u)",
|
||||
GOMP_VERSION_HSA, GOMP_VERSION_DEV (version));
|
||||
{
|
||||
GOMP_PLUGIN_error ("Offload data incompatible with HSA plugin"
|
||||
" (expected %u, received %u)",
|
||||
GOMP_VERSION_HSA, GOMP_VERSION_DEV (version));
|
||||
return -1;
|
||||
}
|
||||
|
||||
struct brig_image_desc *image_desc = (struct brig_image_desc *) target_data;
|
||||
struct agent_info *agent;
|
||||
|
@ -637,10 +685,17 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, void *target_data,
|
|||
int kernel_count = image_desc->kernel_count;
|
||||
|
||||
agent = get_agent_info (ord);
|
||||
if (!agent)
|
||||
return -1;
|
||||
|
||||
if (pthread_rwlock_wrlock (&agent->modules_rwlock))
|
||||
GOMP_PLUGIN_fatal ("Unable to write-lock an HSA agent rwlock");
|
||||
if (agent->prog_finalized)
|
||||
destroy_hsa_program (agent);
|
||||
{
|
||||
GOMP_PLUGIN_error ("Unable to write-lock an HSA agent rwlock");
|
||||
return -1;
|
||||
}
|
||||
if (agent->prog_finalized
|
||||
&& !destroy_hsa_program (agent))
|
||||
return -1;
|
||||
|
||||
HSA_DEBUG ("Encountered %d kernels in an image\n", kernel_count);
|
||||
pair = GOMP_PLUGIN_malloc (kernel_count * sizeof (struct addr_pair));
|
||||
|
@ -668,7 +723,10 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, void *target_data,
|
|||
kernel->dependencies_count = d->kernel_dependencies_count;
|
||||
kernel->dependencies = d->kernel_dependencies;
|
||||
if (pthread_mutex_init (&kernel->init_mutex, NULL))
|
||||
GOMP_PLUGIN_fatal ("Failed to initialize an HSA kernel mutex");
|
||||
{
|
||||
GOMP_PLUGIN_error ("Failed to initialize an HSA kernel mutex");
|
||||
return -1;
|
||||
}
|
||||
|
||||
kernel++;
|
||||
pair++;
|
||||
|
@ -676,7 +734,10 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, void *target_data,
|
|||
|
||||
add_module_to_agent (agent, module);
|
||||
if (pthread_rwlock_unlock (&agent->modules_rwlock))
|
||||
GOMP_PLUGIN_fatal ("Unable to unlock an HSA agent rwlock");
|
||||
{
|
||||
GOMP_PLUGIN_error ("Unable to unlock an HSA agent rwlock");
|
||||
return -1;
|
||||
}
|
||||
return kernel_count;
|
||||
}
|
||||
|
||||
|
@ -1373,34 +1434,47 @@ GOMP_OFFLOAD_async_run (int device, void *tgt_fn, void *tgt_vars,
|
|||
}
|
||||
|
||||
/* Deinitialize all information associated with MODULE and kernels within
|
||||
it. */
|
||||
it. Return TRUE on success. */
|
||||
|
||||
void
|
||||
static bool
|
||||
destroy_module (struct module_info *module)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < module->kernel_count; i++)
|
||||
if (pthread_mutex_destroy (&module->kernels[i].init_mutex))
|
||||
GOMP_PLUGIN_fatal ("Failed to destroy an HSA kernel initialization "
|
||||
"mutex");
|
||||
{
|
||||
GOMP_PLUGIN_error ("Failed to destroy an HSA kernel initialization "
|
||||
"mutex");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Part of the libgomp plugin interface. Unload BRIG module described by
|
||||
struct brig_image_desc in TARGET_DATA from agent number N. */
|
||||
struct brig_image_desc in TARGET_DATA from agent number N. Return
|
||||
TRUE on success. */
|
||||
|
||||
void
|
||||
bool
|
||||
GOMP_OFFLOAD_unload_image (int n, unsigned version, void *target_data)
|
||||
{
|
||||
if (GOMP_VERSION_DEV (version) > GOMP_VERSION_HSA)
|
||||
GOMP_PLUGIN_fatal ("Offload data incompatible with HSA plugin"
|
||||
" (expected %u, received %u)",
|
||||
GOMP_VERSION_HSA, GOMP_VERSION_DEV (version));
|
||||
{
|
||||
GOMP_PLUGIN_error ("Offload data incompatible with HSA plugin"
|
||||
" (expected %u, received %u)",
|
||||
GOMP_VERSION_HSA, GOMP_VERSION_DEV (version));
|
||||
return false;
|
||||
}
|
||||
|
||||
struct agent_info *agent;
|
||||
agent = get_agent_info (n);
|
||||
if (pthread_rwlock_wrlock (&agent->modules_rwlock))
|
||||
GOMP_PLUGIN_fatal ("Unable to write-lock an HSA agent rwlock");
|
||||
if (!agent)
|
||||
return false;
|
||||
|
||||
if (pthread_rwlock_wrlock (&agent->modules_rwlock))
|
||||
{
|
||||
GOMP_PLUGIN_error ("Unable to write-lock an HSA agent rwlock");
|
||||
return false;
|
||||
}
|
||||
struct module_info *module = agent->first_module;
|
||||
while (module)
|
||||
{
|
||||
|
@ -1409,54 +1483,75 @@ GOMP_OFFLOAD_unload_image (int n, unsigned version, void *target_data)
|
|||
module = module->next;
|
||||
}
|
||||
if (!module)
|
||||
GOMP_PLUGIN_fatal ("Attempt to unload an image that has never been "
|
||||
"loaded before");
|
||||
{
|
||||
GOMP_PLUGIN_error ("Attempt to unload an image that has never been "
|
||||
"loaded before");
|
||||
return false;
|
||||
}
|
||||
|
||||
remove_module_from_agent (agent, module);
|
||||
destroy_module (module);
|
||||
if (!destroy_module (module))
|
||||
return false;
|
||||
free (module);
|
||||
destroy_hsa_program (agent);
|
||||
if (!destroy_hsa_program (agent))
|
||||
return false;
|
||||
if (pthread_rwlock_unlock (&agent->modules_rwlock))
|
||||
GOMP_PLUGIN_fatal ("Unable to unlock an HSA agent rwlock");
|
||||
{
|
||||
GOMP_PLUGIN_error ("Unable to unlock an HSA agent rwlock");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Part of the libgomp plugin interface. Deinitialize all information and
|
||||
status associated with agent number N. We do not attempt any
|
||||
synchronization, assuming the user and libgomp will not attempt
|
||||
deinitialization of a device that is in any way being used at the same
|
||||
time. */
|
||||
time. Return TRUE on success. */
|
||||
|
||||
void
|
||||
bool
|
||||
GOMP_OFFLOAD_fini_device (int n)
|
||||
{
|
||||
struct agent_info *agent = get_agent_info (n);
|
||||
if (!agent)
|
||||
return false;
|
||||
|
||||
if (!agent->initialized)
|
||||
return;
|
||||
return true;
|
||||
|
||||
struct module_info *next_module = agent->first_module;
|
||||
while (next_module)
|
||||
{
|
||||
struct module_info *module = next_module;
|
||||
next_module = module->next;
|
||||
destroy_module (module);
|
||||
if (!destroy_module (module))
|
||||
return false;
|
||||
free (module);
|
||||
}
|
||||
agent->first_module = NULL;
|
||||
destroy_hsa_program (agent);
|
||||
if (!destroy_hsa_program (agent))
|
||||
return false;
|
||||
|
||||
release_agent_shared_libraries (agent);
|
||||
|
||||
hsa_status_t status = hsa_queue_destroy (agent->command_q);
|
||||
if (status != HSA_STATUS_SUCCESS)
|
||||
hsa_fatal ("Error destroying command queue", status);
|
||||
return hsa_error ("Error destroying command queue", status);
|
||||
status = hsa_queue_destroy (agent->kernel_dispatch_command_q);
|
||||
if (status != HSA_STATUS_SUCCESS)
|
||||
hsa_fatal ("Error destroying kernel dispatch command queue", status);
|
||||
return hsa_error ("Error destroying kernel dispatch command queue", status);
|
||||
if (pthread_mutex_destroy (&agent->prog_mutex))
|
||||
GOMP_PLUGIN_fatal ("Failed to destroy an HSA agent program mutex");
|
||||
{
|
||||
GOMP_PLUGIN_error ("Failed to destroy an HSA agent program mutex");
|
||||
return false;
|
||||
}
|
||||
if (pthread_rwlock_destroy (&agent->modules_rwlock))
|
||||
GOMP_PLUGIN_fatal ("Failed to destroy an HSA agent rwlock");
|
||||
{
|
||||
GOMP_PLUGIN_error ("Failed to destroy an HSA agent rwlock");
|
||||
return false;
|
||||
}
|
||||
agent->initialized = false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Part of the libgomp plugin interface. Not implemented as it is not required
|
||||
|
@ -1465,46 +1560,51 @@ GOMP_OFFLOAD_fini_device (int n)
|
|||
void *
|
||||
GOMP_OFFLOAD_alloc (int ord, size_t size)
|
||||
{
|
||||
GOMP_PLUGIN_fatal ("HSA GOMP_OFFLOAD_alloc is not implemented because "
|
||||
GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_alloc is not implemented because "
|
||||
"it should never be called");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Part of the libgomp plugin interface. Not implemented as it is not required
|
||||
for HSA. */
|
||||
|
||||
void
|
||||
bool
|
||||
GOMP_OFFLOAD_free (int ord, void *ptr)
|
||||
{
|
||||
GOMP_PLUGIN_fatal ("HSA GOMP_OFFLOAD_free is not implemented because "
|
||||
GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_free is not implemented because "
|
||||
"it should never be called");
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Part of the libgomp plugin interface. Not implemented as it is not required
|
||||
for HSA. */
|
||||
|
||||
void *
|
||||
bool
|
||||
GOMP_OFFLOAD_dev2host (int ord, void *dst, const void *src, size_t n)
|
||||
{
|
||||
GOMP_PLUGIN_fatal ("HSA GOMP_OFFLOAD_dev2host is not implemented because "
|
||||
GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_dev2host is not implemented because "
|
||||
"it should never be called");
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Part of the libgomp plugin interface. Not implemented as it is not required
|
||||
for HSA. */
|
||||
|
||||
void *
|
||||
bool
|
||||
GOMP_OFFLOAD_host2dev (int ord, void *dst, const void *src, size_t n)
|
||||
{
|
||||
GOMP_PLUGIN_fatal ("HSA GOMP_OFFLOAD_host2dev is not implemented because "
|
||||
GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_host2dev is not implemented because "
|
||||
"it should never be called");
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Part of the libgomp plugin interface. Not implemented as it is not required
|
||||
for HSA. */
|
||||
|
||||
void *
|
||||
bool
|
||||
GOMP_OFFLOAD_dev2dev (int ord, void *dst, const void *src, size_t n)
|
||||
{
|
||||
GOMP_PLUGIN_fatal ("HSA GOMP_OFFLOAD_dev2dev is not implemented because "
|
||||
GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_dev2dev is not implemented because "
|
||||
"it should never be called");
|
||||
return false;
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load diff
252
libgomp/target.c
252
libgomp/target.c
|
@ -162,6 +162,45 @@ gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
|
|||
return n;
|
||||
}
|
||||
|
||||
static inline void
|
||||
gomp_device_copy (struct gomp_device_descr *devicep,
|
||||
bool (*copy_func) (int, void *, const void *, size_t),
|
||||
const char *dst, void *dstaddr,
|
||||
const char *src, const void *srcaddr,
|
||||
size_t size)
|
||||
{
|
||||
if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
|
||||
{
|
||||
gomp_mutex_unlock (&devicep->lock);
|
||||
gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
|
||||
src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
gomp_copy_host2dev (struct gomp_device_descr *devicep,
|
||||
void *d, const void *h, size_t sz)
|
||||
{
|
||||
gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
|
||||
}
|
||||
|
||||
static void
|
||||
gomp_copy_dev2host (struct gomp_device_descr *devicep,
|
||||
void *h, const void *d, size_t sz)
|
||||
{
|
||||
gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
|
||||
}
|
||||
|
||||
static void
|
||||
gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
|
||||
{
|
||||
if (!devicep->free_func (devicep->target_id, devptr))
|
||||
{
|
||||
gomp_mutex_unlock (&devicep->lock);
|
||||
gomp_fatal ("error in freeing device memory block at %p", devptr);
|
||||
}
|
||||
}
|
||||
|
||||
/* Handle the case where gomp_map_lookup, splay_tree_lookup or
|
||||
gomp_map_0len_lookup found oldn for newn.
|
||||
Helper function of gomp_map_vars. */
|
||||
|
@ -189,11 +228,12 @@ gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
|
|||
}
|
||||
|
||||
if (GOMP_MAP_ALWAYS_TO_P (kind))
|
||||
devicep->host2dev_func (devicep->target_id,
|
||||
(void *) (oldn->tgt->tgt_start + oldn->tgt_offset
|
||||
+ newn->host_start - oldn->host_start),
|
||||
(void *) newn->host_start,
|
||||
newn->host_end - newn->host_start);
|
||||
gomp_copy_host2dev (devicep,
|
||||
(void *) (oldn->tgt->tgt_start + oldn->tgt_offset
|
||||
+ newn->host_start - oldn->host_start),
|
||||
(void *) newn->host_start,
|
||||
newn->host_end - newn->host_start);
|
||||
|
||||
if (oldn->refcount != REFCOUNT_INFINITY)
|
||||
oldn->refcount++;
|
||||
}
|
||||
|
@ -218,10 +258,10 @@ gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
|
|||
{
|
||||
cur_node.tgt_offset = (uintptr_t) NULL;
|
||||
/* FIXME: see comment about coalescing host/dev transfers below. */
|
||||
devicep->host2dev_func (devicep->target_id,
|
||||
(void *) (tgt->tgt_start + target_offset),
|
||||
(void *) &cur_node.tgt_offset,
|
||||
sizeof (void *));
|
||||
gomp_copy_host2dev (devicep,
|
||||
(void *) (tgt->tgt_start + target_offset),
|
||||
(void *) &cur_node.tgt_offset,
|
||||
sizeof (void *));
|
||||
return;
|
||||
}
|
||||
/* Add bias to the pointer value. */
|
||||
|
@ -241,10 +281,8 @@ gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
|
|||
to initialize the pointer with. */
|
||||
cur_node.tgt_offset -= bias;
|
||||
/* FIXME: see comment about coalescing host/dev transfers below. */
|
||||
devicep->host2dev_func (devicep->target_id,
|
||||
(void *) (tgt->tgt_start + target_offset),
|
||||
(void *) &cur_node.tgt_offset,
|
||||
sizeof (void *));
|
||||
gomp_copy_host2dev (devicep, (void *) (tgt->tgt_start + target_offset),
|
||||
(void *) &cur_node.tgt_offset, sizeof (void *));
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -515,6 +553,12 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
|
|||
memory. */
|
||||
tgt->to_free = devicep->alloc_func (devicep->target_id,
|
||||
tgt_size + tgt_align - 1);
|
||||
if (!tgt->to_free)
|
||||
{
|
||||
gomp_mutex_unlock (&devicep->lock);
|
||||
gomp_fatal ("device memory allocation fail");
|
||||
}
|
||||
|
||||
tgt->tgt_start = (uintptr_t) tgt->to_free;
|
||||
tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
|
||||
tgt->tgt_end = tgt->tgt_start + tgt_size;
|
||||
|
@ -554,9 +598,9 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
|
|||
tgt_size = (tgt_size + align - 1) & ~(align - 1);
|
||||
tgt->list[i].offset = tgt_size;
|
||||
len = sizes[i];
|
||||
devicep->host2dev_func (devicep->target_id,
|
||||
(void *) (tgt->tgt_start + tgt_size),
|
||||
(void *) hostaddrs[i], len);
|
||||
gomp_copy_host2dev (devicep,
|
||||
(void *) (tgt->tgt_start + tgt_size),
|
||||
(void *) hostaddrs[i], len);
|
||||
tgt_size += len;
|
||||
continue;
|
||||
case GOMP_MAP_FIRSTPRIVATE_INT:
|
||||
|
@ -608,13 +652,13 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
|
|||
cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
|
||||
if (cur_node.tgt_offset)
|
||||
cur_node.tgt_offset -= sizes[i];
|
||||
devicep->host2dev_func (devicep->target_id,
|
||||
(void *) (n->tgt->tgt_start
|
||||
+ n->tgt_offset
|
||||
+ cur_node.host_start
|
||||
- n->host_start),
|
||||
(void *) &cur_node.tgt_offset,
|
||||
sizeof (void *));
|
||||
gomp_copy_host2dev (devicep,
|
||||
(void *) (n->tgt->tgt_start
|
||||
+ n->tgt_offset
|
||||
+ cur_node.host_start
|
||||
- n->host_start),
|
||||
(void *) &cur_node.tgt_offset,
|
||||
sizeof (void *));
|
||||
cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
|
||||
+ cur_node.host_start - n->host_start;
|
||||
continue;
|
||||
|
@ -685,11 +729,11 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
|
|||
/* FIXME: Perhaps add some smarts, like if copying
|
||||
several adjacent fields from host to target, use some
|
||||
host buffer to avoid sending each var individually. */
|
||||
devicep->host2dev_func (devicep->target_id,
|
||||
(void *) (tgt->tgt_start
|
||||
+ k->tgt_offset),
|
||||
(void *) k->host_start,
|
||||
k->host_end - k->host_start);
|
||||
gomp_copy_host2dev (devicep,
|
||||
(void *) (tgt->tgt_start
|
||||
+ k->tgt_offset),
|
||||
(void *) k->host_start,
|
||||
k->host_end - k->host_start);
|
||||
break;
|
||||
case GOMP_MAP_POINTER:
|
||||
gomp_map_pointer (tgt, (uintptr_t) *(void **) k->host_start,
|
||||
|
@ -697,11 +741,11 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
|
|||
break;
|
||||
case GOMP_MAP_TO_PSET:
|
||||
/* FIXME: see above FIXME comment. */
|
||||
devicep->host2dev_func (devicep->target_id,
|
||||
(void *) (tgt->tgt_start
|
||||
+ k->tgt_offset),
|
||||
(void *) k->host_start,
|
||||
k->host_end - k->host_start);
|
||||
gomp_copy_host2dev (devicep,
|
||||
(void *) (tgt->tgt_start
|
||||
+ k->tgt_offset),
|
||||
(void *) k->host_start,
|
||||
k->host_end - k->host_start);
|
||||
|
||||
for (j = i + 1; j < mapnum; j++)
|
||||
if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds,
|
||||
|
@ -748,12 +792,11 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
|
|||
break;
|
||||
case GOMP_MAP_FORCE_DEVICEPTR:
|
||||
assert (k->host_end - k->host_start == sizeof (void *));
|
||||
|
||||
devicep->host2dev_func (devicep->target_id,
|
||||
(void *) (tgt->tgt_start
|
||||
+ k->tgt_offset),
|
||||
(void *) k->host_start,
|
||||
sizeof (void *));
|
||||
gomp_copy_host2dev (devicep,
|
||||
(void *) (tgt->tgt_start
|
||||
+ k->tgt_offset),
|
||||
(void *) k->host_start,
|
||||
sizeof (void *));
|
||||
break;
|
||||
default:
|
||||
gomp_mutex_unlock (&devicep->lock);
|
||||
|
@ -781,11 +824,9 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
|
|||
{
|
||||
cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
|
||||
/* FIXME: see above FIXME comment. */
|
||||
devicep->host2dev_func (devicep->target_id,
|
||||
(void *) (tgt->tgt_start
|
||||
+ i * sizeof (void *)),
|
||||
(void *) &cur_node.tgt_offset,
|
||||
sizeof (void *));
|
||||
gomp_copy_host2dev (devicep,
|
||||
(void *) (tgt->tgt_start + i * sizeof (void *)),
|
||||
(void *) &cur_node.tgt_offset, sizeof (void *));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -807,7 +848,7 @@ gomp_unmap_tgt (struct target_mem_desc *tgt)
|
|||
{
|
||||
/* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
|
||||
if (tgt->tgt_end)
|
||||
tgt->device_descr->free_func (tgt->device_descr->target_id, tgt->to_free);
|
||||
gomp_free_device_memory (tgt->device_descr, tgt->to_free);
|
||||
|
||||
free (tgt->array);
|
||||
free (tgt);
|
||||
|
@ -839,9 +880,9 @@ gomp_copy_from_async (struct target_mem_desc *tgt)
|
|||
{
|
||||
splay_tree_key k = tgt->list[i].key;
|
||||
if (tgt->list[i].copy_from)
|
||||
devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
|
||||
(void *) (k->tgt->tgt_start + k->tgt_offset),
|
||||
k->host_end - k->host_start);
|
||||
gomp_copy_dev2host (devicep, (void *) k->host_start,
|
||||
(void *) (k->tgt->tgt_start + k->tgt_offset),
|
||||
k->host_end - k->host_start);
|
||||
}
|
||||
|
||||
gomp_mutex_unlock (&devicep->lock);
|
||||
|
@ -894,11 +935,11 @@ gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
|
|||
|
||||
if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
|
||||
|| tgt->list[i].always_copy_from)
|
||||
devicep->dev2host_func (devicep->target_id,
|
||||
(void *) (k->host_start + tgt->list[i].offset),
|
||||
(void *) (k->tgt->tgt_start + k->tgt_offset
|
||||
+ tgt->list[i].offset),
|
||||
tgt->list[i].length);
|
||||
gomp_copy_dev2host (devicep,
|
||||
(void *) (k->host_start + tgt->list[i].offset),
|
||||
(void *) (k->tgt->tgt_start + k->tgt_offset
|
||||
+ tgt->list[i].offset),
|
||||
tgt->list[i].length);
|
||||
if (do_unmap)
|
||||
{
|
||||
splay_tree_remove (&devicep->mem_map, k);
|
||||
|
@ -961,22 +1002,17 @@ gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
|
|||
(void *) n->host_start,
|
||||
(void *) n->host_end);
|
||||
}
|
||||
|
||||
|
||||
void *hostaddr = (void *) cur_node.host_start;
|
||||
void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
|
||||
+ cur_node.host_start - n->host_start);
|
||||
size_t size = cur_node.host_end - cur_node.host_start;
|
||||
|
||||
if (GOMP_MAP_COPY_TO_P (kind & typemask))
|
||||
devicep->host2dev_func (devicep->target_id,
|
||||
(void *) (n->tgt->tgt_start
|
||||
+ n->tgt_offset
|
||||
+ cur_node.host_start
|
||||
- n->host_start),
|
||||
(void *) cur_node.host_start,
|
||||
cur_node.host_end - cur_node.host_start);
|
||||
gomp_copy_host2dev (devicep, devaddr, hostaddr, size);
|
||||
if (GOMP_MAP_COPY_FROM_P (kind & typemask))
|
||||
devicep->dev2host_func (devicep->target_id,
|
||||
(void *) cur_node.host_start,
|
||||
(void *) (n->tgt->tgt_start
|
||||
+ n->tgt_offset
|
||||
+ cur_node.host_start
|
||||
- n->host_start),
|
||||
cur_node.host_end - cur_node.host_start);
|
||||
gomp_copy_dev2host (devicep, hostaddr, devaddr, size);
|
||||
}
|
||||
}
|
||||
gomp_mutex_unlock (&devicep->lock);
|
||||
|
@ -1114,7 +1150,11 @@ gomp_unload_image_from_device (struct gomp_device_descr *devicep,
|
|||
node = splay_tree_lookup (&devicep->mem_map, &k);
|
||||
}
|
||||
|
||||
devicep->unload_image_func (devicep->target_id, version, target_data);
|
||||
if (!devicep->unload_image_func (devicep->target_id, version, target_data))
|
||||
{
|
||||
gomp_mutex_unlock (&devicep->lock);
|
||||
gomp_fatal ("image unload fail");
|
||||
}
|
||||
|
||||
/* Remove mappings from splay tree. */
|
||||
int i;
|
||||
|
@ -1261,7 +1301,11 @@ attribute_hidden void
|
|||
gomp_init_device (struct gomp_device_descr *devicep)
|
||||
{
|
||||
int i;
|
||||
devicep->init_device_func (devicep->target_id);
|
||||
if (!devicep->init_device_func (devicep->target_id))
|
||||
{
|
||||
gomp_mutex_unlock (&devicep->lock);
|
||||
gomp_fatal ("device initialization failed");
|
||||
}
|
||||
|
||||
/* Load to device all images registered by the moment. */
|
||||
for (i = 0; i < num_offload_images; i++)
|
||||
|
@ -1765,12 +1809,11 @@ gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
|
|||
|
||||
if ((kind == GOMP_MAP_FROM && k->refcount == 0)
|
||||
|| kind == GOMP_MAP_ALWAYS_FROM)
|
||||
devicep->dev2host_func (devicep->target_id,
|
||||
(void *) cur_node.host_start,
|
||||
(void *) (k->tgt->tgt_start + k->tgt_offset
|
||||
+ cur_node.host_start
|
||||
- k->host_start),
|
||||
cur_node.host_end - cur_node.host_start);
|
||||
gomp_copy_dev2host (devicep, (void *) cur_node.host_start,
|
||||
(void *) (k->tgt->tgt_start + k->tgt_offset
|
||||
+ cur_node.host_start
|
||||
- k->host_start),
|
||||
cur_node.host_end - cur_node.host_start);
|
||||
if (k->refcount == 0)
|
||||
{
|
||||
splay_tree_remove (&devicep->mem_map, k);
|
||||
|
@ -2001,7 +2044,7 @@ omp_target_free (void *device_ptr, int device_num)
|
|||
}
|
||||
|
||||
gomp_mutex_lock (&devicep->lock);
|
||||
devicep->free_func (devicep->target_id, device_ptr);
|
||||
gomp_free_device_memory (devicep, device_ptr);
|
||||
gomp_mutex_unlock (&devicep->lock);
|
||||
}
|
||||
|
||||
|
@ -2042,6 +2085,7 @@ omp_target_memcpy (void *dst, void *src, size_t length, size_t dst_offset,
|
|||
size_t src_offset, int dst_device_num, int src_device_num)
|
||||
{
|
||||
struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
|
||||
bool ret;
|
||||
|
||||
if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
|
||||
{
|
||||
|
@ -2077,29 +2121,29 @@ omp_target_memcpy (void *dst, void *src, size_t length, size_t dst_offset,
|
|||
if (src_devicep == NULL)
|
||||
{
|
||||
gomp_mutex_lock (&dst_devicep->lock);
|
||||
dst_devicep->host2dev_func (dst_devicep->target_id,
|
||||
(char *) dst + dst_offset,
|
||||
(char *) src + src_offset, length);
|
||||
ret = dst_devicep->host2dev_func (dst_devicep->target_id,
|
||||
(char *) dst + dst_offset,
|
||||
(char *) src + src_offset, length);
|
||||
gomp_mutex_unlock (&dst_devicep->lock);
|
||||
return 0;
|
||||
return (ret ? 0 : EINVAL);
|
||||
}
|
||||
if (dst_devicep == NULL)
|
||||
{
|
||||
gomp_mutex_lock (&src_devicep->lock);
|
||||
src_devicep->dev2host_func (src_devicep->target_id,
|
||||
(char *) dst + dst_offset,
|
||||
(char *) src + src_offset, length);
|
||||
ret = src_devicep->dev2host_func (src_devicep->target_id,
|
||||
(char *) dst + dst_offset,
|
||||
(char *) src + src_offset, length);
|
||||
gomp_mutex_unlock (&src_devicep->lock);
|
||||
return 0;
|
||||
return (ret ? 0 : EINVAL);
|
||||
}
|
||||
if (src_devicep == dst_devicep)
|
||||
{
|
||||
gomp_mutex_lock (&src_devicep->lock);
|
||||
src_devicep->dev2dev_func (src_devicep->target_id,
|
||||
(char *) dst + dst_offset,
|
||||
(char *) src + src_offset, length);
|
||||
ret = src_devicep->dev2dev_func (src_devicep->target_id,
|
||||
(char *) dst + dst_offset,
|
||||
(char *) src + src_offset, length);
|
||||
gomp_mutex_unlock (&src_devicep->lock);
|
||||
return 0;
|
||||
return (ret ? 0 : EINVAL);
|
||||
}
|
||||
return EINVAL;
|
||||
}
|
||||
|
@ -2126,22 +2170,25 @@ omp_target_memcpy_rect_worker (void *dst, void *src, size_t element_size,
|
|||
|| __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
|
||||
return EINVAL;
|
||||
if (dst_devicep == NULL && src_devicep == NULL)
|
||||
memcpy ((char *) dst + dst_off, (char *) src + src_off, length);
|
||||
{
|
||||
memcpy ((char *) dst + dst_off, (char *) src + src_off, length);
|
||||
ret = 1;
|
||||
}
|
||||
else if (src_devicep == NULL)
|
||||
dst_devicep->host2dev_func (dst_devicep->target_id,
|
||||
(char *) dst + dst_off,
|
||||
(char *) src + src_off, length);
|
||||
ret = dst_devicep->host2dev_func (dst_devicep->target_id,
|
||||
(char *) dst + dst_off,
|
||||
(char *) src + src_off, length);
|
||||
else if (dst_devicep == NULL)
|
||||
src_devicep->dev2host_func (src_devicep->target_id,
|
||||
(char *) dst + dst_off,
|
||||
(char *) src + src_off, length);
|
||||
ret = src_devicep->dev2host_func (src_devicep->target_id,
|
||||
(char *) dst + dst_off,
|
||||
(char *) src + src_off, length);
|
||||
else if (src_devicep == dst_devicep)
|
||||
src_devicep->dev2dev_func (src_devicep->target_id,
|
||||
(char *) dst + dst_off,
|
||||
(char *) src + src_off, length);
|
||||
ret = src_devicep->dev2dev_func (src_devicep->target_id,
|
||||
(char *) dst + dst_off,
|
||||
(char *) src + src_off, length);
|
||||
else
|
||||
return EINVAL;
|
||||
return 0;
|
||||
ret = 0;
|
||||
return ret ? 0 : EINVAL;
|
||||
}
|
||||
|
||||
/* FIXME: it would be nice to have some plugin function to handle
|
||||
|
@ -2456,14 +2503,17 @@ gomp_target_fini (void)
|
|||
int i;
|
||||
for (i = 0; i < num_devices; i++)
|
||||
{
|
||||
bool ret = true;
|
||||
struct gomp_device_descr *devicep = &devices[i];
|
||||
gomp_mutex_lock (&devicep->lock);
|
||||
if (devicep->state == GOMP_DEVICE_INITIALIZED)
|
||||
{
|
||||
devicep->fini_device_func (devicep->target_id);
|
||||
ret = devicep->fini_device_func (devicep->target_id);
|
||||
devicep->state = GOMP_DEVICE_FINALIZED;
|
||||
}
|
||||
gomp_mutex_unlock (&devicep->lock);
|
||||
if (!ret)
|
||||
gomp_fatal ("device finalization failed");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,3 +1,21 @@
|
|||
2016-05-26 Chung-Lin Tang <cltang@codesourcery.com>
|
||||
|
||||
* plugin/libgomp-plugin-intelmic.cpp (offload): Change return type
|
||||
to bool, adjust return code.
|
||||
(GOMP_OFFLOAD_init_device): Likewise.
|
||||
(GOMP_OFFLOAD_fini_device): Likewise.
|
||||
(get_target_table): Likewise.
|
||||
(offload_image): Likwise.
|
||||
(GOMP_OFFLOAD_load_image): Adjust call to offload_image(), change
|
||||
to return -1 on error.
|
||||
(GOMP_OFFLOAD_unload_image): Change return type to bool, adjust return
|
||||
code.
|
||||
(GOMP_OFFLOAD_alloc): Likewise.
|
||||
(GOMP_OFFLOAD_free): Likewise.
|
||||
(GOMP_OFFLOAD_host2dev): Likewise.
|
||||
(GOMP_OFFLOAD_dev2host): Likewise.
|
||||
(GOMP_OFFLOAD_dev2dev): Likewise.
|
||||
|
||||
2016-01-20 Ilya Verbin <ilya.verbin@intel.com>
|
||||
|
||||
* plugin/libgomp-plugin-intelmic.cpp (GOMP_OFFLOAD_run): Pass extra NULL
|
||||
|
|
|
@ -205,7 +205,7 @@ GOMP_OFFLOAD_get_num_devices (void)
|
|||
return num_devices;
|
||||
}
|
||||
|
||||
static void
|
||||
static bool
|
||||
offload (const char *file, uint64_t line, int device, const char *name,
|
||||
int num_vars, VarDesc *vars, const void **async_data)
|
||||
{
|
||||
|
@ -213,20 +213,21 @@ offload (const char *file, uint64_t line, int device, const char *name,
|
|||
if (ofld)
|
||||
{
|
||||
if (async_data == NULL)
|
||||
__offload_offload1 (ofld, name, 0, num_vars, vars, NULL, 0, NULL, NULL);
|
||||
return __offload_offload1 (ofld, name, 0, num_vars, vars, NULL, 0,
|
||||
NULL, NULL);
|
||||
else
|
||||
{
|
||||
OffloadFlags flags;
|
||||
flags.flags = 0;
|
||||
flags.bits.omp_async = 1;
|
||||
__offload_offload3 (ofld, name, 0, num_vars, vars, NULL, 0, NULL,
|
||||
async_data, 0, NULL, flags, NULL);
|
||||
return __offload_offload3 (ofld, name, 0, num_vars, vars, NULL, 0,
|
||||
NULL, async_data, 0, NULL, flags, NULL);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf (stderr, "%s:%d: Offload target acquire failed\n", file, line);
|
||||
exit (1);
|
||||
GOMP_PLUGIN_error ("%s:%d: Offload target acquire failed\n", file, line);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -244,25 +245,26 @@ register_main_image ()
|
|||
|
||||
/* liboffloadmic loads and runs offload_target_main on all available devices
|
||||
during a first call to offload (). */
|
||||
extern "C" void
|
||||
extern "C" bool
|
||||
GOMP_OFFLOAD_init_device (int device)
|
||||
{
|
||||
TRACE ("(device = %d)", device);
|
||||
pthread_once (&main_image_is_registered, register_main_image);
|
||||
offload (__FILE__, __LINE__, device, "__offload_target_init_proc", 0, NULL,
|
||||
NULL);
|
||||
return offload (__FILE__, __LINE__, device, "__offload_target_init_proc", 0,
|
||||
NULL, NULL);
|
||||
}
|
||||
|
||||
extern "C" void
|
||||
extern "C" bool
|
||||
GOMP_OFFLOAD_fini_device (int device)
|
||||
{
|
||||
TRACE ("(device = %d)", device);
|
||||
|
||||
/* liboffloadmic will finalize target processes on all available devices. */
|
||||
__offload_unregister_image (&main_target_image);
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
static bool
|
||||
get_target_table (int device, int &num_funcs, int &num_vars, void **&table)
|
||||
{
|
||||
VarDesc vd1[2] = { vd_tgt2host, vd_tgt2host };
|
||||
|
@ -271,8 +273,9 @@ get_target_table (int device, int &num_funcs, int &num_vars, void **&table)
|
|||
vd1[1].ptr = &num_vars;
|
||||
vd1[1].size = sizeof (num_vars);
|
||||
|
||||
offload (__FILE__, __LINE__, device, "__offload_target_table_p1", 2, vd1,
|
||||
NULL);
|
||||
if (!offload (__FILE__, __LINE__, device, "__offload_target_table_p1", 2,
|
||||
vd1, NULL))
|
||||
return false;
|
||||
|
||||
int table_size = num_funcs + 2 * num_vars;
|
||||
if (table_size > 0)
|
||||
|
@ -284,15 +287,16 @@ get_target_table (int device, int &num_funcs, int &num_vars, void **&table)
|
|||
vd2.ptr = table;
|
||||
vd2.size = table_size * sizeof (void *);
|
||||
|
||||
offload (__FILE__, __LINE__, device, "__offload_target_table_p2", 1, &vd2,
|
||||
NULL);
|
||||
return offload (__FILE__, __LINE__, device, "__offload_target_table_p2",
|
||||
1, &vd2, NULL);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Offload TARGET_IMAGE to all available devices and fill address_table with
|
||||
corresponding target addresses. */
|
||||
|
||||
static void
|
||||
static bool
|
||||
offload_image (const void *target_image)
|
||||
{
|
||||
void *image_start = ((void **) target_image)[0];
|
||||
|
@ -306,8 +310,8 @@ offload_image (const void *target_image)
|
|||
+ image_size);
|
||||
if (!image)
|
||||
{
|
||||
fprintf (stderr, "%s: Can't allocate memory\n", __FILE__);
|
||||
exit (1);
|
||||
GOMP_PLUGIN_error ("%s: Can't allocate memory\n", __FILE__);
|
||||
return false;
|
||||
}
|
||||
|
||||
image->size = image_size;
|
||||
|
@ -322,13 +326,14 @@ offload_image (const void *target_image)
|
|||
|
||||
/* Receive tables for target_image from all devices. */
|
||||
DevAddrVect dev_table;
|
||||
bool ret = true;
|
||||
for (int dev = 0; dev < num_devices; dev++)
|
||||
{
|
||||
int num_funcs = 0;
|
||||
int num_vars = 0;
|
||||
void **table = NULL;
|
||||
|
||||
get_target_table (dev, num_funcs, num_vars, table);
|
||||
ret &= get_target_table (dev, num_funcs, num_vars, table);
|
||||
|
||||
AddrVect curr_dev_table;
|
||||
|
||||
|
@ -357,6 +362,7 @@ offload_image (const void *target_image)
|
|||
|
||||
address_table->insert (std::make_pair (target_image, dev_table));
|
||||
image_descriptors->insert (std::make_pair (target_image, image));
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Return the libgomp version number we're compatible with. There is
|
||||
|
@ -375,22 +381,29 @@ GOMP_OFFLOAD_load_image (int device, const unsigned version,
|
|||
TRACE ("(device = %d, target_image = %p)", device, target_image);
|
||||
|
||||
if (GOMP_VERSION_DEV (version) > GOMP_VERSION_INTEL_MIC)
|
||||
GOMP_PLUGIN_fatal ("Offload data incompatible with intelmic plugin"
|
||||
" (expected %u, received %u)",
|
||||
GOMP_VERSION_INTEL_MIC, GOMP_VERSION_DEV (version));
|
||||
{
|
||||
GOMP_PLUGIN_error ("Offload data incompatible with intelmic plugin"
|
||||
" (expected %u, received %u)",
|
||||
GOMP_VERSION_INTEL_MIC, GOMP_VERSION_DEV (version));
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* If target_image is already present in address_table, then there is no need
|
||||
to offload it. */
|
||||
if (address_table->count (target_image) == 0)
|
||||
offload_image (target_image);
|
||||
{
|
||||
/* If fail, return -1 as error code. */
|
||||
if (!offload_image (target_image))
|
||||
return -1;
|
||||
}
|
||||
|
||||
AddrVect *curr_dev_table = &(*address_table)[target_image][device];
|
||||
int table_size = curr_dev_table->size ();
|
||||
addr_pair *table = (addr_pair *) malloc (table_size * sizeof (addr_pair));
|
||||
if (table == NULL)
|
||||
{
|
||||
fprintf (stderr, "%s: Can't allocate memory\n", __FILE__);
|
||||
exit (1);
|
||||
GOMP_PLUGIN_error ("%s: Can't allocate memory\n", __FILE__);
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::copy (curr_dev_table->begin (), curr_dev_table->end (), table);
|
||||
|
@ -398,12 +411,17 @@ GOMP_OFFLOAD_load_image (int device, const unsigned version,
|
|||
return table_size;
|
||||
}
|
||||
|
||||
extern "C" void
|
||||
extern "C" bool
|
||||
GOMP_OFFLOAD_unload_image (int device, unsigned version,
|
||||
const void *target_image)
|
||||
{
|
||||
if (GOMP_VERSION_DEV (version) > GOMP_VERSION_INTEL_MIC)
|
||||
return;
|
||||
{
|
||||
GOMP_PLUGIN_error ("Offload data incompatible with intelmic plugin"
|
||||
" (expected %u, received %u)",
|
||||
GOMP_VERSION_INTEL_MIC, GOMP_VERSION_DEV (version));
|
||||
return false;
|
||||
}
|
||||
|
||||
TRACE ("(device = %d, target_image = %p)", device, target_image);
|
||||
|
||||
|
@ -417,6 +435,7 @@ GOMP_OFFLOAD_unload_image (int device, unsigned version,
|
|||
address_table->erase (target_image);
|
||||
image_descriptors->erase (target_image);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
extern "C" void *
|
||||
|
@ -431,12 +450,14 @@ GOMP_OFFLOAD_alloc (int device, size_t size)
|
|||
vd[1].ptr = &tgt_ptr;
|
||||
vd[1].size = sizeof (void *);
|
||||
|
||||
offload (__FILE__, __LINE__, device, "__offload_target_alloc", 2, vd, NULL);
|
||||
if (!offload (__FILE__, __LINE__, device, "__offload_target_alloc", 2,
|
||||
vd, NULL))
|
||||
return NULL;
|
||||
|
||||
return tgt_ptr;
|
||||
}
|
||||
|
||||
extern "C" void
|
||||
extern "C" bool
|
||||
GOMP_OFFLOAD_free (int device, void *tgt_ptr)
|
||||
{
|
||||
TRACE ("(device = %d, tgt_ptr = %p)", device, tgt_ptr);
|
||||
|
@ -445,17 +466,18 @@ GOMP_OFFLOAD_free (int device, void *tgt_ptr)
|
|||
vd.ptr = &tgt_ptr;
|
||||
vd.size = sizeof (void *);
|
||||
|
||||
offload (__FILE__, __LINE__, device, "__offload_target_free", 1, &vd, NULL);
|
||||
return offload (__FILE__, __LINE__, device, "__offload_target_free", 1,
|
||||
&vd, NULL);
|
||||
}
|
||||
|
||||
extern "C" void *
|
||||
extern "C" bool
|
||||
GOMP_OFFLOAD_host2dev (int device, void *tgt_ptr, const void *host_ptr,
|
||||
size_t size)
|
||||
{
|
||||
TRACE ("(device = %d, tgt_ptr = %p, host_ptr = %p, size = %d)",
|
||||
device, tgt_ptr, host_ptr, size);
|
||||
if (!size)
|
||||
return tgt_ptr;
|
||||
return true;
|
||||
|
||||
VarDesc vd1[2] = { vd_host2tgt, vd_host2tgt };
|
||||
vd1[0].ptr = &tgt_ptr;
|
||||
|
@ -463,27 +485,26 @@ GOMP_OFFLOAD_host2dev (int device, void *tgt_ptr, const void *host_ptr,
|
|||
vd1[1].ptr = &size;
|
||||
vd1[1].size = sizeof (size);
|
||||
|
||||
offload (__FILE__, __LINE__, device, "__offload_target_host2tgt_p1", 2, vd1,
|
||||
NULL);
|
||||
if (!offload (__FILE__, __LINE__, device, "__offload_target_host2tgt_p1", 2,
|
||||
vd1, NULL))
|
||||
return false;
|
||||
|
||||
VarDesc vd2 = vd_host2tgt;
|
||||
vd2.ptr = (void *) host_ptr;
|
||||
vd2.size = size;
|
||||
|
||||
offload (__FILE__, __LINE__, device, "__offload_target_host2tgt_p2", 1, &vd2,
|
||||
NULL);
|
||||
|
||||
return tgt_ptr;
|
||||
return offload (__FILE__, __LINE__, device, "__offload_target_host2tgt_p2", 1,
|
||||
&vd2, NULL);
|
||||
}
|
||||
|
||||
extern "C" void *
|
||||
extern "C" bool
|
||||
GOMP_OFFLOAD_dev2host (int device, void *host_ptr, const void *tgt_ptr,
|
||||
size_t size)
|
||||
{
|
||||
TRACE ("(device = %d, host_ptr = %p, tgt_ptr = %p, size = %d)",
|
||||
device, host_ptr, tgt_ptr, size);
|
||||
if (!size)
|
||||
return host_ptr;
|
||||
return true;
|
||||
|
||||
VarDesc vd1[2] = { vd_host2tgt, vd_host2tgt };
|
||||
vd1[0].ptr = &tgt_ptr;
|
||||
|
@ -491,27 +512,26 @@ GOMP_OFFLOAD_dev2host (int device, void *host_ptr, const void *tgt_ptr,
|
|||
vd1[1].ptr = &size;
|
||||
vd1[1].size = sizeof (size);
|
||||
|
||||
offload (__FILE__, __LINE__, device, "__offload_target_tgt2host_p1", 2, vd1,
|
||||
NULL);
|
||||
if (!offload (__FILE__, __LINE__, device, "__offload_target_tgt2host_p1", 2,
|
||||
vd1, NULL))
|
||||
return false;
|
||||
|
||||
VarDesc vd2 = vd_tgt2host;
|
||||
vd2.ptr = (void *) host_ptr;
|
||||
vd2.size = size;
|
||||
|
||||
offload (__FILE__, __LINE__, device, "__offload_target_tgt2host_p2", 1, &vd2,
|
||||
NULL);
|
||||
|
||||
return host_ptr;
|
||||
return offload (__FILE__, __LINE__, device, "__offload_target_tgt2host_p2", 1,
|
||||
&vd2, NULL);
|
||||
}
|
||||
|
||||
extern "C" void *
|
||||
extern "C" bool
|
||||
GOMP_OFFLOAD_dev2dev (int device, void *dst_ptr, const void *src_ptr,
|
||||
size_t size)
|
||||
{
|
||||
TRACE ("(device = %d, dst_ptr = %p, src_ptr = %p, size = %d)",
|
||||
device, dst_ptr, src_ptr, size);
|
||||
if (!size)
|
||||
return dst_ptr;
|
||||
return true;
|
||||
|
||||
VarDesc vd[3] = { vd_host2tgt, vd_host2tgt, vd_host2tgt };
|
||||
vd[0].ptr = &dst_ptr;
|
||||
|
@ -521,9 +541,8 @@ GOMP_OFFLOAD_dev2dev (int device, void *dst_ptr, const void *src_ptr,
|
|||
vd[2].ptr = &size;
|
||||
vd[2].size = sizeof (size);
|
||||
|
||||
offload (__FILE__, __LINE__, device, "__offload_target_tgt2tgt", 3, vd, NULL);
|
||||
|
||||
return dst_ptr;
|
||||
return offload (__FILE__, __LINE__, device, "__offload_target_tgt2tgt", 3,
|
||||
vd, NULL);
|
||||
}
|
||||
|
||||
extern "C" void
|
||||
|
|
Loading…
Add table
Reference in a new issue