Commit a51df54e by Ilya Verbin Committed by Ilya Verbin

libgomp: rework initialization of offloading

gcc/
	* config/i386/intelmic-mkoffload.c (generate_host_descr_file): Call
	GOMP_offload_unregister from the destructor.
libgomp/
	* libgomp-plugin.h (struct mapping_table): Replace with addr_pair.
	* libgomp.h (struct gomp_memory_mapping): Remove.
	(struct target_mem_desc): Change type of mem_map from
	gomp_memory_mapping * to splay_tree_s *.
	(struct gomp_device_descr): Remove register_image_func, get_table_func.
	Add load_image_func, unload_image_func.
	Change type of mem_map from gomp_memory_mapping to splay_tree_s.
	Remove offload_regions_registered.
	(gomp_init_tables): Remove.
	(gomp_free_memmap): Change type of argument from gomp_memory_mapping *
	to splay_tree_s *.
	* libgomp.map (GOMP_4.0.1): Add GOMP_offload_unregister.
	* oacc-host.c (host_dispatch): Do not initialize register_image_func,
	get_table_func, mem_map.is_initialized, mem_map.splay_tree.root,
	offload_regions_registered.
	Initialize load_image_func, unload_image_func, mem_map.root.
	(goacc_host_init): Do not initialize host_dispatch.mem_map.lock.
	* oacc-init.c (lazy_open): Don't call gomp_init_tables.
	(acc_shutdown_1): Use dev's lock and splay_tree instead of mem_map's.
	* oacc-mem.c (lookup_host): Get gomp_device_descr *dev instead of
	gomp_memory_mapping *.  Use dev's lock and splay_tree.
	(lookup_dev): Use dev's lock.
	(acc_deviceptr): Pass dev to lookup_host instead of mem_map.
	(acc_is_present): Likewise.
	(acc_map_data): Likewise.
	(acc_unmap_data): Likewise.  Use dev's lock.
	(present_create_copy): Likewise.
	(delete_copyout): Pass dev to lookup_host instead of mem_map.
	(update_dev_host): Likewise.
	(gomp_acc_remove_pointer): Likewise.  Use dev's lock.
	* oacc-parallel.c (GOACC_parallel): Use dev's lock and splay_tree.
	* plugin/plugin-host.c (GOMP_OFFLOAD_register_image): Remove.
	(GOMP_OFFLOAD_get_table): Remove
	(GOMP_OFFLOAD_load_image): New function.
	(GOMP_OFFLOAD_unload_image): New function.
	* target.c (register_lock): New mutex for offload image registration.
	(num_devices): Do not guard with PLUGIN_SUPPORT.
	(gomp_realloc_unlock): New static function.
	(gomp_map_vars_existing): Add device descriptor argument.  Unlock mutex
	before gomp_fatal.
	(gomp_map_vars): Use dev's lock and splay_tree instead of mem_map's.
	Pass devicep to gomp_map_vars_existing.  Unlock mutex before gomp_fatal.
	(gomp_copy_from_async): Use dev's lock and splay_tree instead of
	mem_map's.
	(gomp_unmap_vars): Likewise.
	(gomp_update): Remove gomp_memory_mapping argument.  Use dev's lock and
	splay_tree instead of mm's.  Unlock mutex before gomp_fatal.
	(gomp_offload_image_to_device): New static function.
	(GOMP_offload_register): Add mutex lock.
	Call gomp_offload_image_to_device for all initialized devices.
	Replace gomp_realloc with gomp_realloc_unlock.
	(GOMP_offload_unregister): New function.
	(gomp_init_tables): Replace with gomp_init_device.  Replace a call to
	get_table_func from the plugin with calls to init_device_func and
	gomp_offload_image_to_device.
	(gomp_free_memmap): Change type of argument from gomp_memory_mapping *
	to splay_tree_s *.
	(GOMP_target): Do not call gomp_init_tables.  Use dev's lock and
	splay_tree instead of mem_map's.  Unlock mutex before gomp_fatal.
	(GOMP_target_data): Do not call gomp_init_tables.
	(GOMP_target_update): Likewise.  Remove argument from gomp_update.
	(gomp_load_plugin_for_device): Replace register_image and get_table
	with load_image and unload_image in DLSYM ().
	(gomp_register_images_for_device): Remove function.
	(gomp_target_init): Do not initialize current_device.mem_map.*,
	current_device.offload_regions_registered.
	Remove call to gomp_register_images_for_device.
	Do not free offload_images and num_offload_images.
liboffloadmic/
	* plugin/libgomp-plugin-intelmic.cpp: Include map.
	(AddrVect, DevAddrVect, ImgDevAddrMap): New typedefs.
	(num_devices, num_images, address_table): New static vars.
	(num_libraries, lib_descrs): Remove static vars.
	(set_mic_lib_path): Rename to ...
	(init): ... this.  Allocate address_table and get num_devices.
	(GOMP_OFFLOAD_get_num_devices): return num_devices.
	(load_lib_and_get_table): Remove static function.
	(offload_image): New static function.
	(GOMP_OFFLOAD_get_table): Remove function.
	(GOMP_OFFLOAD_load_image, GOMP_OFFLOAD_unload_image): New functions.

From-SVN: r221878
parent 71671f5d
2015-04-06 Ilya Verbin <ilya.verbin@intel.com>
* config/i386/intelmic-mkoffload.c (generate_host_descr_file): Call
GOMP_offload_unregister from the destructor.
2015-04-06 Ilya Enkovich <ilya.enkovich@intel.com> 2015-04-06 Ilya Enkovich <ilya.enkovich@intel.com>
* ipa-chkp.c (chkp_maybe_create_clone): Reset cdtor * ipa-chkp.c (chkp_maybe_create_clone): Reset cdtor
......
...@@ -350,14 +350,27 @@ generate_host_descr_file (const char *host_compiler) ...@@ -350,14 +350,27 @@ generate_host_descr_file (const char *host_compiler)
"#ifdef __cplusplus\n" "#ifdef __cplusplus\n"
"extern \"C\"\n" "extern \"C\"\n"
"#endif\n" "#endif\n"
"void GOMP_offload_register (void *, int, void *);\n\n" "void GOMP_offload_register (void *, int, void *);\n"
"#ifdef __cplusplus\n"
"extern \"C\"\n"
"#endif\n"
"void GOMP_offload_unregister (void *, int, void *);\n\n"
"__attribute__((constructor))\n" "__attribute__((constructor))\n"
"static void\n" "static void\n"
"init (void)\n" "init (void)\n"
"{\n" "{\n"
" GOMP_offload_register (&__OFFLOAD_TABLE__, %d, __offload_target_data);\n" " GOMP_offload_register (&__OFFLOAD_TABLE__, %d, __offload_target_data);\n"
"}\n\n", GOMP_DEVICE_INTEL_MIC);
fprintf (src_file,
"__attribute__((destructor))\n"
"static void\n"
"fini (void)\n"
"{\n"
" GOMP_offload_unregister (&__OFFLOAD_TABLE__, %d, __offload_target_data);\n"
"}\n", GOMP_DEVICE_INTEL_MIC); "}\n", GOMP_DEVICE_INTEL_MIC);
fclose (src_file); fclose (src_file);
unsigned new_argc = 0; unsigned new_argc = 0;
......
2015-04-06 Ilya Verbin <ilya.verbin@intel.com>
* libgomp-plugin.h (struct mapping_table): Replace with addr_pair.
* libgomp.h (struct gomp_memory_mapping): Remove.
(struct target_mem_desc): Change type of mem_map from
gomp_memory_mapping * to splay_tree_s *.
(struct gomp_device_descr): Remove register_image_func, get_table_func.
Add load_image_func, unload_image_func.
Change type of mem_map from gomp_memory_mapping to splay_tree_s.
Remove offload_regions_registered.
(gomp_init_tables): Remove.
(gomp_free_memmap): Change type of argument from gomp_memory_mapping *
to splay_tree_s *.
* libgomp.map (GOMP_4.0.1): Add GOMP_offload_unregister.
* oacc-host.c (host_dispatch): Do not initialize register_image_func,
get_table_func, mem_map.is_initialized, mem_map.splay_tree.root,
offload_regions_registered.
Initialize load_image_func, unload_image_func, mem_map.root.
(goacc_host_init): Do not initialize host_dispatch.mem_map.lock.
* oacc-init.c (lazy_open): Don't call gomp_init_tables.
(acc_shutdown_1): Use dev's lock and splay_tree instead of mem_map's.
* oacc-mem.c (lookup_host): Get gomp_device_descr *dev instead of
gomp_memory_mapping *. Use dev's lock and splay_tree.
(lookup_dev): Use dev's lock.
(acc_deviceptr): Pass dev to lookup_host instead of mem_map.
(acc_is_present): Likewise.
(acc_map_data): Likewise.
(acc_unmap_data): Likewise. Use dev's lock.
(present_create_copy): Likewise.
(delete_copyout): Pass dev to lookup_host instead of mem_map.
(update_dev_host): Likewise.
(gomp_acc_remove_pointer): Likewise. Use dev's lock.
* oacc-parallel.c (GOACC_parallel): Use dev's lock and splay_tree.
* plugin/plugin-host.c (GOMP_OFFLOAD_register_image): Remove.
(GOMP_OFFLOAD_get_table): Remove
(GOMP_OFFLOAD_load_image): New function.
(GOMP_OFFLOAD_unload_image): New function.
* target.c (register_lock): New mutex for offload image registration.
(num_devices): Do not guard with PLUGIN_SUPPORT.
(gomp_realloc_unlock): New static function.
(gomp_map_vars_existing): Add device descriptor argument. Unlock mutex
before gomp_fatal.
(gomp_map_vars): Use dev's lock and splay_tree instead of mem_map's.
Pass devicep to gomp_map_vars_existing. Unlock mutex before gomp_fatal.
(gomp_copy_from_async): Use dev's lock and splay_tree instead of
mem_map's.
(gomp_unmap_vars): Likewise.
(gomp_update): Remove gomp_memory_mapping argument. Use dev's lock and
splay_tree instead of mm's. Unlock mutex before gomp_fatal.
(gomp_offload_image_to_device): New static function.
(GOMP_offload_register): Add mutex lock.
Call gomp_offload_image_to_device for all initialized devices.
Replace gomp_realloc with gomp_realloc_unlock.
(GOMP_offload_unregister): New function.
(gomp_init_tables): Replace with gomp_init_device. Replace a call to
get_table_func from the plugin with calls to init_device_func and
gomp_offload_image_to_device.
(gomp_free_memmap): Change type of argument from gomp_memory_mapping *
to splay_tree_s *.
(GOMP_target): Do not call gomp_init_tables. Use dev's lock and
splay_tree instead of mem_map's. Unlock mutex before gomp_fatal.
(GOMP_target_data): Do not call gomp_init_tables.
(GOMP_target_update): Likewise. Remove argument from gomp_update.
(gomp_load_plugin_for_device): Replace register_image and get_table
with load_image and unload_image in DLSYM ().
(gomp_register_images_for_device): Remove function.
(gomp_target_init): Do not initialize current_device.mem_map.*,
current_device.offload_regions_registered.
Remove call to gomp_register_images_for_device.
Do not free offload_images and num_offload_images.
2015-03-30 Jakub Jelinek <jakub@redhat.com> 2015-03-30 Jakub Jelinek <jakub@redhat.com>
PR fortran/65597 PR fortran/65597
......
...@@ -51,14 +51,12 @@ enum offload_target_type ...@@ -51,14 +51,12 @@ enum offload_target_type
OFFLOAD_TARGET_TYPE_INTEL_MIC = 6 OFFLOAD_TARGET_TYPE_INTEL_MIC = 6
}; };
/* Auxiliary struct, used for transferring a host-target address range mapping /* Auxiliary struct, used for transferring pairs of addresses from plugin
from plugin to libgomp. */ to libgomp. */
struct mapping_table struct addr_pair
{ {
uintptr_t host_start; uintptr_t start;
uintptr_t host_end; uintptr_t end;
uintptr_t tgt_start;
uintptr_t tgt_end;
}; };
/* Miscellaneous functions. */ /* Miscellaneous functions. */
......
...@@ -224,7 +224,6 @@ struct gomp_team_state ...@@ -224,7 +224,6 @@ struct gomp_team_state
}; };
struct target_mem_desc; struct target_mem_desc;
struct gomp_memory_mapping;
/* These are the OpenMP 4.0 Internal Control Variables described in /* These are the OpenMP 4.0 Internal Control Variables described in
section 2.3.1. Those described as having one copy per task are section 2.3.1. Those described as having one copy per task are
...@@ -657,7 +656,7 @@ struct target_mem_desc { ...@@ -657,7 +656,7 @@ struct target_mem_desc {
struct gomp_device_descr *device_descr; struct gomp_device_descr *device_descr;
/* Memory mapping info for the thread that created this descriptor. */ /* Memory mapping info for the thread that created this descriptor. */
struct gomp_memory_mapping *mem_map; struct splay_tree_s *mem_map;
/* List of splay keys to remove (or decrease refcount) /* List of splay keys to remove (or decrease refcount)
at the end of region. */ at the end of region. */
...@@ -683,20 +682,6 @@ struct splay_tree_key_s { ...@@ -683,20 +682,6 @@ struct splay_tree_key_s {
#include "splay-tree.h" #include "splay-tree.h"
/* Information about mapped memory regions (per device/context). */
struct gomp_memory_mapping
{
/* Mutex for operating with the splay tree and other shared structures. */
gomp_mutex_t lock;
/* True when tables have been added to this memory map. */
bool is_initialized;
/* Splay tree containing information about mapped memory regions. */
struct splay_tree_s splay_tree;
};
typedef struct acc_dispatch_t typedef struct acc_dispatch_t
{ {
/* This is a linked list of data mapped using the /* This is a linked list of data mapped using the
...@@ -773,19 +758,18 @@ struct gomp_device_descr ...@@ -773,19 +758,18 @@ struct gomp_device_descr
unsigned int (*get_caps_func) (void); unsigned int (*get_caps_func) (void);
int (*get_type_func) (void); int (*get_type_func) (void);
int (*get_num_devices_func) (void); int (*get_num_devices_func) (void);
void (*register_image_func) (void *, void *);
void (*init_device_func) (int); void (*init_device_func) (int);
void (*fini_device_func) (int); void (*fini_device_func) (int);
int (*get_table_func) (int, struct mapping_table **); int (*load_image_func) (int, void *, struct addr_pair **);
void (*unload_image_func) (int, void *);
void *(*alloc_func) (int, size_t); void *(*alloc_func) (int, size_t);
void (*free_func) (int, void *); void (*free_func) (int, void *);
void *(*dev2host_func) (int, void *, const void *, size_t); void *(*dev2host_func) (int, void *, const void *, size_t);
void *(*host2dev_func) (int, void *, const void *, size_t); void *(*host2dev_func) (int, void *, const void *, size_t);
void (*run_func) (int, void *, void *); void (*run_func) (int, void *, void *);
/* Memory-mapping info for this device instance. */ /* Splay tree containing information about mapped memory regions. */
/* Uses a separate lock. */ struct splay_tree_s mem_map;
struct gomp_memory_mapping mem_map;
/* Mutex for the mutable data. */ /* Mutex for the mutable data. */
gomp_mutex_t lock; gomp_mutex_t lock;
...@@ -793,9 +777,6 @@ struct gomp_device_descr ...@@ -793,9 +777,6 @@ struct gomp_device_descr
/* Set to true when device is initialized. */ /* Set to true when device is initialized. */
bool is_initialized; bool is_initialized;
/* True when offload regions have been registered with this device. */
bool offload_regions_registered;
/* OpenACC-specific data and functions. */ /* OpenACC-specific data and functions. */
/* This is mutable because of its mutable data_environ and target_data /* This is mutable because of its mutable data_environ and target_data
members. */ members. */
...@@ -811,9 +792,7 @@ extern struct target_mem_desc *gomp_map_vars (struct gomp_device_descr *, ...@@ -811,9 +792,7 @@ extern struct target_mem_desc *gomp_map_vars (struct gomp_device_descr *,
extern void gomp_copy_from_async (struct target_mem_desc *); extern void gomp_copy_from_async (struct target_mem_desc *);
extern void gomp_unmap_vars (struct target_mem_desc *, bool); extern void gomp_unmap_vars (struct target_mem_desc *, bool);
extern void gomp_init_device (struct gomp_device_descr *); extern void gomp_init_device (struct gomp_device_descr *);
extern void gomp_init_tables (struct gomp_device_descr *, extern void gomp_free_memmap (struct splay_tree_s *);
struct gomp_memory_mapping *);
extern void gomp_free_memmap (struct gomp_memory_mapping *);
extern void gomp_fini_device (struct gomp_device_descr *); extern void gomp_fini_device (struct gomp_device_descr *);
/* work.c */ /* work.c */
......
...@@ -231,6 +231,7 @@ GOMP_4.0 { ...@@ -231,6 +231,7 @@ GOMP_4.0 {
GOMP_4.0.1 { GOMP_4.0.1 {
global: global:
GOMP_offload_register; GOMP_offload_register;
GOMP_offload_unregister;
} GOMP_4.0; } GOMP_4.0;
OACC_2.0 { OACC_2.0 {
......
...@@ -43,20 +43,18 @@ static struct gomp_device_descr host_dispatch = ...@@ -43,20 +43,18 @@ static struct gomp_device_descr host_dispatch =
.get_caps_func = GOMP_OFFLOAD_get_caps, .get_caps_func = GOMP_OFFLOAD_get_caps,
.get_type_func = GOMP_OFFLOAD_get_type, .get_type_func = GOMP_OFFLOAD_get_type,
.get_num_devices_func = GOMP_OFFLOAD_get_num_devices, .get_num_devices_func = GOMP_OFFLOAD_get_num_devices,
.register_image_func = GOMP_OFFLOAD_register_image,
.init_device_func = GOMP_OFFLOAD_init_device, .init_device_func = GOMP_OFFLOAD_init_device,
.fini_device_func = GOMP_OFFLOAD_fini_device, .fini_device_func = GOMP_OFFLOAD_fini_device,
.get_table_func = GOMP_OFFLOAD_get_table, .load_image_func = GOMP_OFFLOAD_load_image,
.unload_image_func = GOMP_OFFLOAD_unload_image,
.alloc_func = GOMP_OFFLOAD_alloc, .alloc_func = GOMP_OFFLOAD_alloc,
.free_func = GOMP_OFFLOAD_free, .free_func = GOMP_OFFLOAD_free,
.dev2host_func = GOMP_OFFLOAD_dev2host, .dev2host_func = GOMP_OFFLOAD_dev2host,
.host2dev_func = GOMP_OFFLOAD_host2dev, .host2dev_func = GOMP_OFFLOAD_host2dev,
.run_func = GOMP_OFFLOAD_run, .run_func = GOMP_OFFLOAD_run,
.mem_map.is_initialized = false, .mem_map.root = NULL,
.mem_map.splay_tree.root = NULL,
.is_initialized = false, .is_initialized = false,
.offload_regions_registered = false,
.openacc = { .openacc = {
.open_device_func = GOMP_OFFLOAD_openacc_open_device, .open_device_func = GOMP_OFFLOAD_openacc_open_device,
...@@ -94,7 +92,6 @@ static struct gomp_device_descr host_dispatch = ...@@ -94,7 +92,6 @@ static struct gomp_device_descr host_dispatch =
static __attribute__ ((constructor)) static __attribute__ ((constructor))
void goacc_host_init (void) void goacc_host_init (void)
{ {
gomp_mutex_init (&host_dispatch.mem_map.lock);
gomp_mutex_init (&host_dispatch.lock); gomp_mutex_init (&host_dispatch.lock);
goacc_register (&host_dispatch); goacc_register (&host_dispatch);
} }
...@@ -284,12 +284,6 @@ lazy_open (int ord) ...@@ -284,12 +284,6 @@ lazy_open (int ord)
= acc_dev->openacc.create_thread_data_func (acc_dev->openacc.target_data); = acc_dev->openacc.create_thread_data_func (acc_dev->openacc.target_data);
acc_dev->openacc.async_set_async_func (acc_async_sync); acc_dev->openacc.async_set_async_func (acc_async_sync);
struct gomp_memory_mapping *mem_map = &acc_dev->mem_map;
gomp_mutex_lock (&mem_map->lock);
if (!mem_map->is_initialized)
gomp_init_tables (acc_dev, mem_map);
gomp_mutex_unlock (&mem_map->lock);
} }
/* OpenACC 2.0a (3.2.12, 3.2.13) doesn't specify whether the serialization of /* OpenACC 2.0a (3.2.12, 3.2.13) doesn't specify whether the serialization of
...@@ -351,10 +345,9 @@ acc_shutdown_1 (acc_device_t d) ...@@ -351,10 +345,9 @@ acc_shutdown_1 (acc_device_t d)
walk->dev->openacc.target_data = target_data = NULL; walk->dev->openacc.target_data = target_data = NULL;
struct gomp_memory_mapping *mem_map = &walk->dev->mem_map; gomp_mutex_lock (&walk->dev->lock);
gomp_mutex_lock (&mem_map->lock); gomp_free_memmap (&walk->dev->mem_map);
gomp_free_memmap (mem_map); gomp_mutex_unlock (&walk->dev->lock);
gomp_mutex_unlock (&mem_map->lock);
walk->dev = NULL; walk->dev = NULL;
} }
......
...@@ -38,7 +38,7 @@ ...@@ -38,7 +38,7 @@
/* Return block containing [H->S), or NULL if not contained. */ /* Return block containing [H->S), or NULL if not contained. */
static splay_tree_key static splay_tree_key
lookup_host (struct gomp_memory_mapping *mem_map, void *h, size_t s) lookup_host (struct gomp_device_descr *dev, void *h, size_t s)
{ {
struct splay_tree_key_s node; struct splay_tree_key_s node;
splay_tree_key key; splay_tree_key key;
...@@ -46,11 +46,9 @@ lookup_host (struct gomp_memory_mapping *mem_map, void *h, size_t s) ...@@ -46,11 +46,9 @@ lookup_host (struct gomp_memory_mapping *mem_map, void *h, size_t s)
node.host_start = (uintptr_t) h; node.host_start = (uintptr_t) h;
node.host_end = (uintptr_t) h + s; node.host_end = (uintptr_t) h + s;
gomp_mutex_lock (&mem_map->lock); gomp_mutex_lock (&dev->lock);
key = splay_tree_lookup (&dev->mem_map, &node);
key = splay_tree_lookup (&mem_map->splay_tree, &node); gomp_mutex_unlock (&dev->lock);
gomp_mutex_unlock (&mem_map->lock);
return key; return key;
} }
...@@ -65,14 +63,11 @@ lookup_dev (struct target_mem_desc *tgt, void *d, size_t s) ...@@ -65,14 +63,11 @@ lookup_dev (struct target_mem_desc *tgt, void *d, size_t s)
{ {
int i; int i;
struct target_mem_desc *t; struct target_mem_desc *t;
struct gomp_memory_mapping *mem_map;
if (!tgt) if (!tgt)
return NULL; return NULL;
mem_map = tgt->mem_map; gomp_mutex_lock (&tgt->device_descr->lock);
gomp_mutex_lock (&mem_map->lock);
for (t = tgt; t != NULL; t = t->prev) for (t = tgt; t != NULL; t = t->prev)
{ {
...@@ -80,7 +75,7 @@ lookup_dev (struct target_mem_desc *tgt, void *d, size_t s) ...@@ -80,7 +75,7 @@ lookup_dev (struct target_mem_desc *tgt, void *d, size_t s)
break; break;
} }
gomp_mutex_unlock (&mem_map->lock); gomp_mutex_unlock (&tgt->device_descr->lock);
if (!t) if (!t)
return NULL; return NULL;
...@@ -176,7 +171,7 @@ acc_deviceptr (void *h) ...@@ -176,7 +171,7 @@ acc_deviceptr (void *h)
struct goacc_thread *thr = goacc_thread (); struct goacc_thread *thr = goacc_thread ();
n = lookup_host (&thr->dev->mem_map, h, 1); n = lookup_host (thr->dev, h, 1);
if (!n) if (!n)
return NULL; return NULL;
...@@ -229,7 +224,7 @@ acc_is_present (void *h, size_t s) ...@@ -229,7 +224,7 @@ acc_is_present (void *h, size_t s)
struct goacc_thread *thr = goacc_thread (); struct goacc_thread *thr = goacc_thread ();
struct gomp_device_descr *acc_dev = thr->dev; struct gomp_device_descr *acc_dev = thr->dev;
n = lookup_host (&acc_dev->mem_map, h, s); n = lookup_host (acc_dev, h, s);
if (n && ((uintptr_t)h < n->host_start if (n && ((uintptr_t)h < n->host_start
|| (uintptr_t)h + s > n->host_end || (uintptr_t)h + s > n->host_end
...@@ -271,7 +266,7 @@ acc_map_data (void *h, void *d, size_t s) ...@@ -271,7 +266,7 @@ acc_map_data (void *h, void *d, size_t s)
gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map", gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
(void *)h, (int)s, (void *)d, (int)s); (void *)h, (int)s, (void *)d, (int)s);
if (lookup_host (&acc_dev->mem_map, h, s)) if (lookup_host (acc_dev, h, s))
gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h, gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h,
(int)s); (int)s);
...@@ -296,7 +291,7 @@ acc_unmap_data (void *h) ...@@ -296,7 +291,7 @@ acc_unmap_data (void *h)
/* No need to call lazy open, as the address must have been mapped. */ /* No need to call lazy open, as the address must have been mapped. */
size_t host_size; size_t host_size;
splay_tree_key n = lookup_host (&acc_dev->mem_map, h, 1); splay_tree_key n = lookup_host (acc_dev, h, 1);
struct target_mem_desc *t; struct target_mem_desc *t;
if (!n) if (!n)
...@@ -320,7 +315,7 @@ acc_unmap_data (void *h) ...@@ -320,7 +315,7 @@ acc_unmap_data (void *h)
t->tgt_end = 0; t->tgt_end = 0;
t->to_free = 0; t->to_free = 0;
gomp_mutex_lock (&acc_dev->mem_map.lock); gomp_mutex_lock (&acc_dev->lock);
for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL; for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
tp = t, t = t->prev) tp = t, t = t->prev)
...@@ -334,7 +329,7 @@ acc_unmap_data (void *h) ...@@ -334,7 +329,7 @@ acc_unmap_data (void *h)
break; break;
} }
gomp_mutex_unlock (&acc_dev->mem_map.lock); gomp_mutex_unlock (&acc_dev->lock);
} }
gomp_unmap_vars (t, true); gomp_unmap_vars (t, true);
...@@ -358,7 +353,7 @@ present_create_copy (unsigned f, void *h, size_t s) ...@@ -358,7 +353,7 @@ present_create_copy (unsigned f, void *h, size_t s)
struct goacc_thread *thr = goacc_thread (); struct goacc_thread *thr = goacc_thread ();
struct gomp_device_descr *acc_dev = thr->dev; struct gomp_device_descr *acc_dev = thr->dev;
n = lookup_host (&acc_dev->mem_map, h, s); n = lookup_host (acc_dev, h, s);
if (n) if (n)
{ {
/* Present. */ /* Present. */
...@@ -389,13 +384,13 @@ present_create_copy (unsigned f, void *h, size_t s) ...@@ -389,13 +384,13 @@ present_create_copy (unsigned f, void *h, size_t s)
tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, NULL, &s, &kinds, true, tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, NULL, &s, &kinds, true,
false); false);
gomp_mutex_lock (&acc_dev->mem_map.lock); gomp_mutex_lock (&acc_dev->lock);
d = tgt->to_free; d = tgt->to_free;
tgt->prev = acc_dev->openacc.data_environ; tgt->prev = acc_dev->openacc.data_environ;
acc_dev->openacc.data_environ = tgt; acc_dev->openacc.data_environ = tgt;
gomp_mutex_unlock (&acc_dev->mem_map.lock); gomp_mutex_unlock (&acc_dev->lock);
} }
return d; return d;
...@@ -436,7 +431,7 @@ delete_copyout (unsigned f, void *h, size_t s) ...@@ -436,7 +431,7 @@ delete_copyout (unsigned f, void *h, size_t s)
struct goacc_thread *thr = goacc_thread (); struct goacc_thread *thr = goacc_thread ();
struct gomp_device_descr *acc_dev = thr->dev; struct gomp_device_descr *acc_dev = thr->dev;
n = lookup_host (&acc_dev->mem_map, h, s); n = lookup_host (acc_dev, h, s);
/* No need to call lazy open, as the data must already have been /* No need to call lazy open, as the data must already have been
mapped. */ mapped. */
...@@ -479,7 +474,7 @@ update_dev_host (int is_dev, void *h, size_t s) ...@@ -479,7 +474,7 @@ update_dev_host (int is_dev, void *h, size_t s)
struct goacc_thread *thr = goacc_thread (); struct goacc_thread *thr = goacc_thread ();
struct gomp_device_descr *acc_dev = thr->dev; struct gomp_device_descr *acc_dev = thr->dev;
n = lookup_host (&acc_dev->mem_map, h, s); n = lookup_host (acc_dev, h, s);
/* No need to call lazy open, as the data must already have been /* No need to call lazy open, as the data must already have been
mapped. */ mapped. */
...@@ -532,7 +527,7 @@ gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum) ...@@ -532,7 +527,7 @@ gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum)
struct target_mem_desc *t; struct target_mem_desc *t;
int minrefs = (mapnum == 1) ? 2 : 3; int minrefs = (mapnum == 1) ? 2 : 3;
n = lookup_host (&acc_dev->mem_map, h, 1); n = lookup_host (acc_dev, h, 1);
if (!n) if (!n)
gomp_fatal ("%p is not a mapped block", (void *)h); gomp_fatal ("%p is not a mapped block", (void *)h);
...@@ -543,7 +538,7 @@ gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum) ...@@ -543,7 +538,7 @@ gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum)
struct target_mem_desc *tp; struct target_mem_desc *tp;
gomp_mutex_lock (&acc_dev->mem_map.lock); gomp_mutex_lock (&acc_dev->lock);
if (t->refcount == minrefs) if (t->refcount == minrefs)
{ {
...@@ -570,7 +565,7 @@ gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum) ...@@ -570,7 +565,7 @@ gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum)
if (force_copyfrom) if (force_copyfrom)
t->list[0]->copy_from = 1; t->list[0]->copy_from = 1;
gomp_mutex_unlock (&acc_dev->mem_map.lock); gomp_mutex_unlock (&acc_dev->lock);
/* If running synchronously, unmap immediately. */ /* If running synchronously, unmap immediately. */
if (async < acc_async_noval) if (async < acc_async_noval)
......
...@@ -144,9 +144,9 @@ GOACC_parallel (int device, void (*fn) (void *), ...@@ -144,9 +144,9 @@ GOACC_parallel (int device, void (*fn) (void *),
{ {
k.host_start = (uintptr_t) fn; k.host_start = (uintptr_t) fn;
k.host_end = k.host_start + 1; k.host_end = k.host_start + 1;
gomp_mutex_lock (&acc_dev->mem_map.lock); gomp_mutex_lock (&acc_dev->lock);
tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map.splay_tree, &k); tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map, &k);
gomp_mutex_unlock (&acc_dev->mem_map.lock); gomp_mutex_unlock (&acc_dev->lock);
if (tgt_fn_key == NULL) if (tgt_fn_key == NULL)
gomp_fatal ("target function wasn't mapped"); gomp_fatal ("target function wasn't mapped");
......
...@@ -95,12 +95,6 @@ GOMP_OFFLOAD_get_num_devices (void) ...@@ -95,12 +95,6 @@ GOMP_OFFLOAD_get_num_devices (void)
} }
STATIC void STATIC void
GOMP_OFFLOAD_register_image (void *host_table __attribute__ ((unused)),
void *target_data __attribute__ ((unused)))
{
}
STATIC void
GOMP_OFFLOAD_init_device (int n __attribute__ ((unused))) GOMP_OFFLOAD_init_device (int n __attribute__ ((unused)))
{ {
} }
...@@ -111,12 +105,19 @@ GOMP_OFFLOAD_fini_device (int n __attribute__ ((unused))) ...@@ -111,12 +105,19 @@ GOMP_OFFLOAD_fini_device (int n __attribute__ ((unused)))
} }
STATIC int STATIC int
GOMP_OFFLOAD_get_table (int n __attribute__ ((unused)), GOMP_OFFLOAD_load_image (int n __attribute__ ((unused)),
struct mapping_table **table __attribute__ ((unused))) void *i __attribute__ ((unused)),
struct addr_pair **r __attribute__ ((unused)))
{ {
return 0; return 0;
} }
STATIC void
GOMP_OFFLOAD_unload_image (int n __attribute__ ((unused)),
void *i __attribute__ ((unused)))
{
}
STATIC void * STATIC void *
GOMP_OFFLOAD_openacc_open_device (int n) GOMP_OFFLOAD_openacc_open_device (int n)
{ {
......
2015-04-06 Ilya Verbin <ilya.verbin@intel.com>
* plugin/libgomp-plugin-intelmic.cpp: Include map.
(AddrVect, DevAddrVect, ImgDevAddrMap): New typedefs.
(num_devices, num_images, address_table): New static vars.
(num_libraries, lib_descrs): Remove static vars.
(set_mic_lib_path): Rename to ...
(init): ... this. Allocate address_table and get num_devices.
(GOMP_OFFLOAD_get_num_devices): return num_devices.
(load_lib_and_get_table): Remove static function.
(offload_image): New static function.
(GOMP_OFFLOAD_get_table): Remove function.
(GOMP_OFFLOAD_load_image, GOMP_OFFLOAD_unload_image): New functions.
2015-01-15 Thomas Schwinge <thomas@codesourcery.com> 2015-01-15 Thomas Schwinge <thomas@codesourcery.com>
* plugin/libgomp-plugin-intelmic.cpp (GOMP_OFFLOAD_get_name) * plugin/libgomp-plugin-intelmic.cpp (GOMP_OFFLOAD_get_name)
......
...@@ -34,6 +34,7 @@ ...@@ -34,6 +34,7 @@
#include <string.h> #include <string.h>
#include <utility> #include <utility>
#include <vector> #include <vector>
#include <map>
#include "libgomp-plugin.h" #include "libgomp-plugin.h"
#include "compiler_if_host.h" #include "compiler_if_host.h"
#include "main_target_image.h" #include "main_target_image.h"
...@@ -53,6 +54,29 @@ fprintf (stderr, "\n"); \ ...@@ -53,6 +54,29 @@ fprintf (stderr, "\n"); \
#endif #endif
/* Start/end addresses of functions and global variables on a device. */
typedef std::vector<addr_pair> AddrVect;
/* Addresses for one image and all devices. */
typedef std::vector<AddrVect> DevAddrVect;
/* Addresses for all images and all devices. */
typedef std::map<void *, DevAddrVect> ImgDevAddrMap;
/* Total number of available devices. */
static int num_devices;
/* Total number of shared libraries with offloading to Intel MIC. */
static int num_images;
/* Two dimensional array: one key is a pointer to image,
second key is number of device. Contains a vector of pointer pairs. */
static ImgDevAddrMap *address_table;
/* Thread-safe registration of the main image. */
static pthread_once_t main_image_is_registered = PTHREAD_ONCE_INIT;
static VarDesc vd_host2tgt = { static VarDesc vd_host2tgt = {
{ 1, 1 }, /* dst, src */ { 1, 1 }, /* dst, src */
{ 1, 0 }, /* in, out */ { 1, 0 }, /* in, out */
...@@ -90,28 +114,17 @@ static VarDesc vd_tgt2host = { ...@@ -90,28 +114,17 @@ static VarDesc vd_tgt2host = {
}; };
/* Total number of shared libraries with offloading to Intel MIC. */
static int num_libraries;
/* Pointers to the descriptors, containing pointers to host-side tables and to
target images. */
static std::vector< std::pair<void *, void *> > lib_descrs;
/* Thread-safe registration of the main image. */
static pthread_once_t main_image_is_registered = PTHREAD_ONCE_INIT;
/* Add path specified in LD_LIBRARY_PATH to MIC_LD_LIBRARY_PATH, which is /* Add path specified in LD_LIBRARY_PATH to MIC_LD_LIBRARY_PATH, which is
required by liboffloadmic. */ required by liboffloadmic. */
__attribute__((constructor)) __attribute__((constructor))
static void static void
set_mic_lib_path (void) init (void)
{ {
const char *ld_lib_path = getenv (LD_LIBRARY_PATH_ENV); const char *ld_lib_path = getenv (LD_LIBRARY_PATH_ENV);
const char *mic_lib_path = getenv (MIC_LD_LIBRARY_PATH_ENV); const char *mic_lib_path = getenv (MIC_LD_LIBRARY_PATH_ENV);
if (!ld_lib_path) if (!ld_lib_path)
return; goto out;
if (!mic_lib_path) if (!mic_lib_path)
setenv (MIC_LD_LIBRARY_PATH_ENV, ld_lib_path, 1); setenv (MIC_LD_LIBRARY_PATH_ENV, ld_lib_path, 1);
...@@ -133,6 +146,10 @@ set_mic_lib_path (void) ...@@ -133,6 +146,10 @@ set_mic_lib_path (void)
if (!use_alloca) if (!use_alloca)
free (mic_lib_path_new); free (mic_lib_path_new);
} }
out:
address_table = new ImgDevAddrMap;
num_devices = _Offload_number_of_devices ();
} }
extern "C" const char * extern "C" const char *
...@@ -162,18 +179,8 @@ GOMP_OFFLOAD_get_type (void) ...@@ -162,18 +179,8 @@ GOMP_OFFLOAD_get_type (void)
extern "C" int extern "C" int
GOMP_OFFLOAD_get_num_devices (void) GOMP_OFFLOAD_get_num_devices (void)
{ {
int res = _Offload_number_of_devices (); TRACE ("(): return %d", num_devices);
TRACE ("(): return %d", res); return num_devices;
return res;
}
/* This should be called from every shared library with offloading. */
extern "C" void
GOMP_OFFLOAD_register_image (void *host_table, void *target_image)
{
TRACE ("(host_table = %p, target_image = %p)", host_table, target_image);
lib_descrs.push_back (std::make_pair (host_table, target_image));
num_libraries++;
} }
static void static void
...@@ -196,7 +203,8 @@ register_main_image () ...@@ -196,7 +203,8 @@ register_main_image ()
__offload_register_image (&main_target_image); __offload_register_image (&main_target_image);
} }
/* Load offload_target_main on target. */ /* liboffloadmic loads and runs offload_target_main on all available devices
during a first call to offload (). */
extern "C" void extern "C" void
GOMP_OFFLOAD_init_device (int device) GOMP_OFFLOAD_init_device (int device)
{ {
...@@ -243,9 +251,11 @@ get_target_table (int device, int &num_funcs, int &num_vars, void **&table) ...@@ -243,9 +251,11 @@ get_target_table (int device, int &num_funcs, int &num_vars, void **&table)
} }
} }
/* Offload TARGET_IMAGE to all available devices and fill address_table with
corresponding target addresses. */
static void static void
load_lib_and_get_table (int device, int lib_num, mapping_table *&table, offload_image (void *target_image)
int &table_size)
{ {
struct TargetImage { struct TargetImage {
int64_t size; int64_t size;
...@@ -254,19 +264,11 @@ load_lib_and_get_table (int device, int lib_num, mapping_table *&table, ...@@ -254,19 +264,11 @@ load_lib_and_get_table (int device, int lib_num, mapping_table *&table,
char data[]; char data[];
} __attribute__ ((packed)); } __attribute__ ((packed));
void ***host_table_descr = (void ***) lib_descrs[lib_num].first; void *image_start = ((void **) target_image)[0];
void **host_func_start = host_table_descr[0]; void *image_end = ((void **) target_image)[1];
void **host_func_end = host_table_descr[1];
void **host_var_start = host_table_descr[2];
void **host_var_end = host_table_descr[3];
void **target_image_descr = (void **) lib_descrs[lib_num].second;
void *image_start = target_image_descr[0];
void *image_end = target_image_descr[1];
TRACE ("() host_table_descr { %p, %p, %p, %p }", host_func_start, TRACE ("(target_image = %p { %p, %p })",
host_func_end, host_var_start, host_var_end); target_image, image_start, image_end);
TRACE ("() target_image_descr { %p, %p }", image_start, image_end);
int64_t image_size = (uintptr_t) image_end - (uintptr_t) image_start; int64_t image_size = (uintptr_t) image_end - (uintptr_t) image_start;
TargetImage *image TargetImage *image
...@@ -279,92 +281,85 @@ load_lib_and_get_table (int device, int lib_num, mapping_table *&table, ...@@ -279,92 +281,85 @@ load_lib_and_get_table (int device, int lib_num, mapping_table *&table,
} }
image->size = image_size; image->size = image_size;
sprintf (image->name, "lib%010d.so", lib_num); sprintf (image->name, "lib%010d.so", num_images++);
memcpy (image->data, image_start, image->size); memcpy (image->data, image_start, image->size);
TRACE ("() __offload_register_image %s { %p, %d }", TRACE ("() __offload_register_image %s { %p, %d }",
image->name, image_start, image->size); image->name, image_start, image->size);
__offload_register_image (image); __offload_register_image (image);
int tgt_num_funcs = 0; /* Receive tables for target_image from all devices. */
int tgt_num_vars = 0; DevAddrVect dev_table;
void **tgt_table = NULL; for (int dev = 0; dev < num_devices; dev++)
get_target_table (device, tgt_num_funcs, tgt_num_vars, tgt_table);
free (image);
/* The func table contains only addresses, the var table contains addresses
and corresponding sizes. */
int host_num_funcs = host_func_end - host_func_start;
int host_num_vars = (host_var_end - host_var_start) / 2;
TRACE ("() host_num_funcs = %d, tgt_num_funcs = %d",
host_num_funcs, tgt_num_funcs);
TRACE ("() host_num_vars = %d, tgt_num_vars = %d",
host_num_vars, tgt_num_vars);
if (host_num_funcs != tgt_num_funcs)
{ {
fprintf (stderr, "%s: Can't map target functions\n", __FILE__); int num_funcs = 0;
exit (1); int num_vars = 0;
} void **table = NULL;
if (host_num_vars != tgt_num_vars)
get_target_table (dev, num_funcs, num_vars, table);
AddrVect curr_dev_table;
for (int i = 0; i < num_funcs; i++)
{ {
fprintf (stderr, "%s: Can't map target variables\n", __FILE__); addr_pair tgt_addr;
exit (1); tgt_addr.start = (uintptr_t) table[i];
tgt_addr.end = tgt_addr.start + 1;
TRACE ("() func %d:\t0x%llx..0x%llx", i,
tgt_addr.start, tgt_addr.end);
curr_dev_table.push_back (tgt_addr);
} }
table = (mapping_table *) realloc (table, (table_size + host_num_funcs for (int i = 0; i < num_vars; i++)
+ host_num_vars)
* sizeof (mapping_table));
if (table == NULL)
{ {
fprintf (stderr, "%s: Can't allocate memory\n", __FILE__); addr_pair tgt_addr;
exit (1); tgt_addr.start = (uintptr_t) table[num_funcs+i*2];
tgt_addr.end = tgt_addr.start + (uintptr_t) table[num_funcs+i*2+1];
TRACE ("() var %d:\t0x%llx..0x%llx", i, tgt_addr.start, tgt_addr.end);
curr_dev_table.push_back (tgt_addr);
} }
for (int i = 0; i < host_num_funcs; i++) dev_table.push_back (curr_dev_table);
{ }
mapping_table t;
t.host_start = (uintptr_t) host_func_start[i];
t.host_end = t.host_start + 1;
t.tgt_start = (uintptr_t) tgt_table[i];
t.tgt_end = t.tgt_start + 1;
TRACE ("() lib %d, func %d:\t0x%llx -- 0x%llx", address_table->insert (std::make_pair (target_image, dev_table));
lib_num, i, t.host_start, t.tgt_start);
table[table_size++] = t; free (image);
} }
for (int i = 0; i < host_num_vars * 2; i += 2) extern "C" int
{ GOMP_OFFLOAD_load_image (int device, void *target_image, addr_pair **result)
mapping_table t; {
t.host_start = (uintptr_t) host_var_start[i]; TRACE ("(device = %d, target_image = %p)", device, target_image);
t.host_end = t.host_start + (uintptr_t) host_var_start[i+1];
t.tgt_start = (uintptr_t) tgt_table[tgt_num_funcs+i];
t.tgt_end = t.tgt_start + (uintptr_t) tgt_table[tgt_num_funcs+i+1];
TRACE ("() lib %d, var %d:\t0x%llx (%d) -- 0x%llx (%d)", lib_num, i/2, /* If target_image is already present in address_table, then there is no need
t.host_start, t.host_end - t.host_start, to offload it. */
t.tgt_start, t.tgt_end - t.tgt_start); if (address_table->count (target_image) == 0)
offload_image (target_image);
table[table_size++] = t; AddrVect *curr_dev_table = &(*address_table)[target_image][device];
int table_size = curr_dev_table->size ();
addr_pair *table = (addr_pair *) malloc (table_size * sizeof (addr_pair));
if (table == NULL)
{
fprintf (stderr, "%s: Can't allocate memory\n", __FILE__);
exit (1);
} }
delete [] tgt_table; std::copy (curr_dev_table->begin (), curr_dev_table->end (), table);
*result = table;
return table_size;
} }
extern "C" int extern "C" void
GOMP_OFFLOAD_get_table (int device, void *result) GOMP_OFFLOAD_unload_image (int device, void *target_image)
{ {
TRACE ("(num_libraries = %d)", num_libraries); TRACE ("(device = %d, target_image = %p)", device, target_image);
mapping_table *table = NULL;
int table_size = 0;
for (int i = 0; i < num_libraries; i++) /* TODO: Currently liboffloadmic doesn't support __offload_unregister_image
load_lib_and_get_table (device, i, table, table_size); for libraries. */
*(void **) result = table; address_table->erase (target_image);
return table_size;
} }
extern "C" void * extern "C" void *
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment