Commit 378da98f by Julian Brown Committed by Julian Brown

OpenACC reference count overhaul

	libgomp/
	* libgomp.h (struct splay_tree_key_s): Substitute dynamic_refcount
	field for virtual_refcount.
	(enum gomp_map_vars_kind): Add GOMP_MAP_VARS_OPENACC_ENTER_DATA.
	(gomp_free_memmap): Remove prototype.
	* oacc-init.c (acc_shutdown_1): Iteratively call gomp_remove_var
	instead of calling gomp_free_memmap.
	* oacc-mem.c (acc_map_data): Use virtual_refcount instead of
	dynamic_refcount.
	(acc_unmap_data): Open code instead of forcing target_mem_desc's
	to_free field to NULL then calling gomp_unmap_vars.  Handle
	REFCOUNT_INFINITY on target blocks.
	(goacc_enter_data): Rename to...
	(goacc_enter_datum): ...this.  Remove MAPNUM parameter and special
	handling for mapping groups.  Use virtual_refcount instead of
	dynamic_refcount.  Use GOMP_MAP_VARS_OPENACC_ENTER_DATA for
	map_map_vars_async call.  Re-do lookup for target pointer return value.
	(acc_create, acc_create_async, acc_copyin, acc_copyin_async): Call
	renamed goacc_enter_datum function.
	(goacc_exit_data): Rename to...
	(goacc_exit_datum): ...this.  Update for virtual_refcount semantics.
	(acc_delete, acc_delete_async, acc_delete_finalize,
	acc_delete_finalize_async, acc_copyout, acc_copyout_async,
	acc_copyout_finalize, acc_copyout_finalize_async): Call renamed
	goacc_exit_datum function.
	(gomp_acc_remove_pointer, find_pointer): Remove functions.
	(find_group_last, goacc_enter_data_internal, goacc_exit_data_internal):
	New functions.
	(GOACC_enter_exit_data): Use goacc_enter_data_internal and
	goacc_exit_data_internal helper functions.
	* target.c (gomp_map_vars_internal): Handle
	GOMP_MAP_VARS_OPENACC_ENTER_DATA.  Update for virtual_refcount
	semantics.
	(gomp_unmap_vars_internal): Update for virtual_refcount semantics.
	(gomp_load_image_to_device, omp_target_associate_ptr): Zero-initialise
	virtual_refcount field instead of dynamic_refcount.
	(gomp_free_memmap): Remove function.
	* testsuite/libgomp.oacc-c-c++-common/unmap-infinity-1.c: New test.
	* testsuite/libgomp.c-c++-common/unmap-infinity-2.c: New test.
	* testsuite/libgomp.oacc-c-c++-common/pr92843-1.c: Add XFAIL.

From-SVN: r279621
parent 2a656a93
2019-12-19 Julian Brown <julian@codesourcery.com> 2019-12-19 Julian Brown <julian@codesourcery.com>
* libgomp.h (struct splay_tree_key_s): Substitute dynamic_refcount
field for virtual_refcount.
(enum gomp_map_vars_kind): Add GOMP_MAP_VARS_OPENACC_ENTER_DATA.
(gomp_free_memmap): Remove prototype.
* oacc-init.c (acc_shutdown_1): Iteratively call gomp_remove_var
instead of calling gomp_free_memmap.
* oacc-mem.c (acc_map_data): Use virtual_refcount instead of
dynamic_refcount.
(acc_unmap_data): Open code instead of forcing target_mem_desc's
to_free field to NULL then calling gomp_unmap_vars. Handle
REFCOUNT_INFINITY on target blocks.
(goacc_enter_data): Rename to...
(goacc_enter_datum): ...this. Remove MAPNUM parameter and special
handling for mapping groups. Use virtual_refcount instead of
dynamic_refcount. Use GOMP_MAP_VARS_OPENACC_ENTER_DATA for
map_map_vars_async call. Re-do lookup for target pointer return value.
(acc_create, acc_create_async, acc_copyin, acc_copyin_async): Call
renamed goacc_enter_datum function.
(goacc_exit_data): Rename to...
(goacc_exit_datum): ...this. Update for virtual_refcount semantics.
(acc_delete, acc_delete_async, acc_delete_finalize,
acc_delete_finalize_async, acc_copyout, acc_copyout_async,
acc_copyout_finalize, acc_copyout_finalize_async): Call renamed
goacc_exit_datum function.
(gomp_acc_remove_pointer, find_pointer): Remove functions.
(find_group_last, goacc_enter_data_internal, goacc_exit_data_internal):
New functions.
(GOACC_enter_exit_data): Use goacc_enter_data_internal and
goacc_exit_data_internal helper functions.
* target.c (gomp_map_vars_internal): Handle
GOMP_MAP_VARS_OPENACC_ENTER_DATA. Update for virtual_refcount
semantics.
(gomp_unmap_vars_internal): Update for virtual_refcount semantics.
(gomp_load_image_to_device, omp_target_associate_ptr): Zero-initialise
virtual_refcount field instead of dynamic_refcount.
(gomp_free_memmap): Remove function.
* testsuite/libgomp.oacc-c-c++-common/unmap-infinity-1.c: New test.
* testsuite/libgomp.c-c++-common/unmap-infinity-2.c: New test.
* testsuite/libgomp.oacc-c-c++-common/pr92843-1.c: Add XFAIL.
2019-12-19 Julian Brown <julian@codesourcery.com>
Thomas Schwinge <thomas@codesourcery.com> Thomas Schwinge <thomas@codesourcery.com>
* libgomp.h (struct splay_tree_aux): New. * libgomp.h (struct splay_tree_aux): New.
......
...@@ -1007,8 +1007,11 @@ struct splay_tree_key_s { ...@@ -1007,8 +1007,11 @@ struct splay_tree_key_s {
uintptr_t tgt_offset; uintptr_t tgt_offset;
/* Reference count. */ /* Reference count. */
uintptr_t refcount; uintptr_t refcount;
/* Dynamic reference count. */ /* Reference counts beyond those that represent genuine references in the
uintptr_t dynamic_refcount; linked splay tree key/target memory structures, e.g. for multiple OpenACC
"present increment" operations (via "acc enter data") referring to the same
host-memory block. */
uintptr_t virtual_refcount;
struct splay_tree_aux *aux; struct splay_tree_aux *aux;
}; };
...@@ -1139,6 +1142,7 @@ struct gomp_device_descr ...@@ -1139,6 +1142,7 @@ struct gomp_device_descr
enum gomp_map_vars_kind enum gomp_map_vars_kind
{ {
GOMP_MAP_VARS_OPENACC, GOMP_MAP_VARS_OPENACC,
GOMP_MAP_VARS_OPENACC_ENTER_DATA,
GOMP_MAP_VARS_TARGET, GOMP_MAP_VARS_TARGET,
GOMP_MAP_VARS_DATA, GOMP_MAP_VARS_DATA,
GOMP_MAP_VARS_ENTER_DATA GOMP_MAP_VARS_ENTER_DATA
...@@ -1168,7 +1172,6 @@ extern void gomp_unmap_vars_async (struct target_mem_desc *, bool, ...@@ -1168,7 +1172,6 @@ extern void gomp_unmap_vars_async (struct target_mem_desc *, bool,
struct goacc_asyncqueue *); struct goacc_asyncqueue *);
extern void gomp_init_device (struct gomp_device_descr *); extern void gomp_init_device (struct gomp_device_descr *);
extern bool gomp_fini_device (struct gomp_device_descr *); extern bool gomp_fini_device (struct gomp_device_descr *);
extern void gomp_free_memmap (struct splay_tree_s *);
extern void gomp_unload_device (struct gomp_device_descr *); extern void gomp_unload_device (struct gomp_device_descr *);
extern bool gomp_remove_var (struct gomp_device_descr *, splay_tree_key); extern bool gomp_remove_var (struct gomp_device_descr *, splay_tree_key);
extern void gomp_remove_var_async (struct gomp_device_descr *, splay_tree_key, extern void gomp_remove_var_async (struct gomp_device_descr *, splay_tree_key,
......
...@@ -370,7 +370,15 @@ acc_shutdown_1 (acc_device_t d) ...@@ -370,7 +370,15 @@ acc_shutdown_1 (acc_device_t d)
if (walk->dev) if (walk->dev)
{ {
gomp_mutex_lock (&walk->dev->lock); gomp_mutex_lock (&walk->dev->lock);
gomp_free_memmap (&walk->dev->mem_map);
while (walk->dev->mem_map.root)
{
splay_tree_key k = &walk->dev->mem_map.root->key;
if (k->aux)
k->aux->link_key = NULL;
gomp_remove_var (walk->dev, k);
}
gomp_mutex_unlock (&walk->dev->lock); gomp_mutex_unlock (&walk->dev->lock);
walk->dev = NULL; walk->dev = NULL;
......
...@@ -407,7 +407,7 @@ acc_map_data (void *h, void *d, size_t s) ...@@ -407,7 +407,7 @@ acc_map_data (void *h, void *d, size_t s)
assert (tgt); assert (tgt);
splay_tree_key n = tgt->list[0].key; splay_tree_key n = tgt->list[0].key;
assert (n->refcount == 1); assert (n->refcount == 1);
assert (n->dynamic_refcount == 0); assert (n->virtual_refcount == 0);
/* Special reference counting behavior. */ /* Special reference counting behavior. */
n->refcount = REFCOUNT_INFINITY; n->refcount = REFCOUNT_INFINITY;
...@@ -435,12 +435,9 @@ acc_unmap_data (void *h) ...@@ -435,12 +435,9 @@ acc_unmap_data (void *h)
acc_api_info api_info; acc_api_info api_info;
bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
size_t host_size;
gomp_mutex_lock (&acc_dev->lock); gomp_mutex_lock (&acc_dev->lock);
splay_tree_key n = lookup_host (acc_dev, h, 1); splay_tree_key n = lookup_host (acc_dev, h, 1);
struct target_mem_desc *t;
if (!n) if (!n)
{ {
...@@ -448,7 +445,7 @@ acc_unmap_data (void *h) ...@@ -448,7 +445,7 @@ acc_unmap_data (void *h)
gomp_fatal ("%p is not a mapped block", (void *)h); gomp_fatal ("%p is not a mapped block", (void *)h);
} }
host_size = n->host_end - n->host_start; size_t host_size = n->host_end - n->host_start;
if (n->host_start != (uintptr_t) h) if (n->host_start != (uintptr_t) h)
{ {
...@@ -457,7 +454,7 @@ acc_unmap_data (void *h) ...@@ -457,7 +454,7 @@ acc_unmap_data (void *h)
(void *) n->host_start, (int) host_size, (void *) h); (void *) n->host_start, (int) host_size, (void *) h);
} }
/* TODO This currently doesn't catch 'REFCOUNT_INFINITY' usage different from /* TODO This currently doesn't catch 'REFCOUNT_INFINITY' usage different from
'acc_map_data'. Maybe 'dynamic_refcount' can be used for disambiguating 'acc_map_data'. Maybe 'virtual_refcount' can be used for disambiguating
the different 'REFCOUNT_INFINITY' cases, or simply separate the different 'REFCOUNT_INFINITY' cases, or simply separate
'REFCOUNT_INFINITY' values per different usage ('REFCOUNT_ACC_MAP_DATA' 'REFCOUNT_INFINITY' values per different usage ('REFCOUNT_ACC_MAP_DATA'
etc.)? */ etc.)? */
...@@ -469,19 +466,22 @@ acc_unmap_data (void *h) ...@@ -469,19 +466,22 @@ acc_unmap_data (void *h)
(void *) h, (int) host_size); (void *) h, (int) host_size);
} }
t = n->tgt; splay_tree_remove (&acc_dev->mem_map, n);
struct target_mem_desc *tgt = n->tgt;
if (t->refcount == 1) if (tgt->refcount == REFCOUNT_INFINITY)
{ {
/* This is the last reference, so pull the descriptor off the gomp_mutex_unlock (&acc_dev->lock);
chain. This prevents 'gomp_unmap_tgt' via 'gomp_remove_var' from gomp_fatal ("cannot unmap target block");
freeing the device memory. */ }
t->tgt_end = 0; else if (tgt->refcount > 1)
t->to_free = 0; tgt->refcount--;
else
{
free (tgt->array);
free (tgt);
} }
bool is_tgt_unmapped = gomp_remove_var (acc_dev, n);
assert (is_tgt_unmapped);
gomp_mutex_unlock (&acc_dev->lock); gomp_mutex_unlock (&acc_dev->lock);
...@@ -493,29 +493,16 @@ acc_unmap_data (void *h) ...@@ -493,29 +493,16 @@ acc_unmap_data (void *h)
} }
/* Enter dynamic mappings. /* Enter dynamic mapping for a single datum. Return the device pointer. */
The handling for MAPNUM bigger than one is special handling for
'GOMP_MAP_POINTER', 'GOMP_MAP_TO_PSET'. For these, only the first mapping
is considered in reference counting; the following ones implicitly follow
suit.
If there's just one mapping, return the device pointer. */
static void * static void *
goacc_enter_data (size_t mapnum, void **hostaddrs, size_t *sizes, void *kinds, goacc_enter_datum (void **hostaddrs, size_t *sizes, void *kinds, int async)
int async)
{ {
void *d; void *d;
splay_tree_key n; splay_tree_key n;
assert (mapnum > 0); if (!hostaddrs[0] || !sizes[0])
if (mapnum == 1
&& (!hostaddrs[0] || !sizes[0]))
gomp_fatal ("[%p,+%d] is a bad range", hostaddrs[0], (int) sizes[0]); gomp_fatal ("[%p,+%d] is a bad range", hostaddrs[0], (int) sizes[0]);
else if (mapnum > 1
&& !hostaddrs[0])
return /* n/a */ (void *) -1;
goacc_lazy_initialize (); goacc_lazy_initialize ();
...@@ -523,12 +510,7 @@ goacc_enter_data (size_t mapnum, void **hostaddrs, size_t *sizes, void *kinds, ...@@ -523,12 +510,7 @@ goacc_enter_data (size_t mapnum, void **hostaddrs, size_t *sizes, void *kinds,
struct gomp_device_descr *acc_dev = thr->dev; struct gomp_device_descr *acc_dev = thr->dev;
if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
{ return hostaddrs[0];
if (mapnum == 1)
return hostaddrs[0];
else
return /* n/a */ (void *) -1;
}
acc_prof_info prof_info; acc_prof_info prof_info;
acc_api_info api_info; acc_api_info api_info;
...@@ -542,7 +524,7 @@ goacc_enter_data (size_t mapnum, void **hostaddrs, size_t *sizes, void *kinds, ...@@ -542,7 +524,7 @@ goacc_enter_data (size_t mapnum, void **hostaddrs, size_t *sizes, void *kinds,
gomp_mutex_lock (&acc_dev->lock); gomp_mutex_lock (&acc_dev->lock);
n = lookup_host (acc_dev, hostaddrs[0], sizes[0]); n = lookup_host (acc_dev, hostaddrs[0], sizes[0]);
if (n && mapnum == 1) if (n)
{ {
void *h = hostaddrs[0]; void *h = hostaddrs[0];
size_t s = sizes[0]; size_t s = sizes[0];
...@@ -558,53 +540,31 @@ goacc_enter_data (size_t mapnum, void **hostaddrs, size_t *sizes, void *kinds, ...@@ -558,53 +540,31 @@ goacc_enter_data (size_t mapnum, void **hostaddrs, size_t *sizes, void *kinds,
assert (n->refcount != REFCOUNT_LINK); assert (n->refcount != REFCOUNT_LINK);
if (n->refcount != REFCOUNT_INFINITY) if (n->refcount != REFCOUNT_INFINITY)
n->refcount++; {
n->dynamic_refcount++; n->refcount++;
n->virtual_refcount++;
gomp_mutex_unlock (&acc_dev->lock); }
}
else if (n && mapnum > 1)
{
d = /* n/a */ (void *) -1;
assert (n->refcount != REFCOUNT_INFINITY
&& n->refcount != REFCOUNT_LINK);
bool processed = false;
struct target_mem_desc *tgt = n->tgt;
for (size_t i = 0; i < tgt->list_count; i++)
if (tgt->list[i].key == n)
{
for (size_t j = 0; j < mapnum; j++)
if (i + j < tgt->list_count && tgt->list[i + j].key)
{
tgt->list[i + j].key->refcount++;
tgt->list[i + j].key->dynamic_refcount++;
}
processed = true;
}
gomp_mutex_unlock (&acc_dev->lock); gomp_mutex_unlock (&acc_dev->lock);
if (!processed)
gomp_fatal ("dynamic refcount incrementing failed for pointer/pset");
} }
else else
{ {
const size_t mapnum = 1;
gomp_mutex_unlock (&acc_dev->lock); gomp_mutex_unlock (&acc_dev->lock);
goacc_aq aq = get_goacc_asyncqueue (async); goacc_aq aq = get_goacc_asyncqueue (async);
struct target_mem_desc *tgt gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds,
= gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, true, GOMP_MAP_VARS_OPENACC_ENTER_DATA);
kinds, true, GOMP_MAP_VARS_ENTER_DATA);
assert (tgt);
n = tgt->list[0].key;
assert (n->refcount == 1);
assert (n->dynamic_refcount == 0);
n->dynamic_refcount++;
d = tgt->to_free; gomp_mutex_lock (&acc_dev->lock);
n = lookup_host (acc_dev, hostaddrs[0], sizes[0]);
assert (n != NULL);
assert (n->tgt_offset == 0);
assert ((uintptr_t) hostaddrs[0] == n->host_start);
d = (void *) n->tgt->tgt_start;
gomp_mutex_unlock (&acc_dev->lock);
} }
if (profiling_p) if (profiling_p)
...@@ -620,14 +580,14 @@ void * ...@@ -620,14 +580,14 @@ void *
acc_create (void *h, size_t s) acc_create (void *h, size_t s)
{ {
unsigned short kinds[1] = { GOMP_MAP_ALLOC }; unsigned short kinds[1] = { GOMP_MAP_ALLOC };
return goacc_enter_data (1, &h, &s, &kinds, acc_async_sync); return goacc_enter_datum (&h, &s, &kinds, acc_async_sync);
} }
void void
acc_create_async (void *h, size_t s, int async) acc_create_async (void *h, size_t s, int async)
{ {
unsigned short kinds[1] = { GOMP_MAP_ALLOC }; unsigned short kinds[1] = { GOMP_MAP_ALLOC };
goacc_enter_data (1, &h, &s, &kinds, async); goacc_enter_datum (&h, &s, &kinds, async);
} }
/* acc_present_or_create used to be what acc_create is now. */ /* acc_present_or_create used to be what acc_create is now. */
...@@ -653,14 +613,14 @@ void * ...@@ -653,14 +613,14 @@ void *
acc_copyin (void *h, size_t s) acc_copyin (void *h, size_t s)
{ {
unsigned short kinds[1] = { GOMP_MAP_TO }; unsigned short kinds[1] = { GOMP_MAP_TO };
return goacc_enter_data (1, &h, &s, &kinds, acc_async_sync); return goacc_enter_datum (&h, &s, &kinds, acc_async_sync);
} }
void void
acc_copyin_async (void *h, size_t s, int async) acc_copyin_async (void *h, size_t s, int async)
{ {
unsigned short kinds[1] = { GOMP_MAP_TO }; unsigned short kinds[1] = { GOMP_MAP_TO };
goacc_enter_data (1, &h, &s, &kinds, async); goacc_enter_datum (&h, &s, &kinds, async);
} }
/* acc_present_or_copyin used to be what acc_copyin is now. */ /* acc_present_or_copyin used to be what acc_copyin is now. */
...@@ -683,10 +643,10 @@ acc_pcopyin (void *h, size_t s) ...@@ -683,10 +643,10 @@ acc_pcopyin (void *h, size_t s)
#endif #endif
/* Exit a dynamic mapping. */ /* Exit a dynamic mapping for a single variable. */
static void static void
goacc_exit_data (void *h, size_t s, unsigned short kind, int async) goacc_exit_datum (void *h, size_t s, unsigned short kind, int async)
{ {
/* No need to call lazy open, as the data must already have been /* No need to call lazy open, as the data must already have been
mapped. */ mapped. */
...@@ -723,28 +683,23 @@ goacc_exit_data (void *h, size_t s, unsigned short kind, int async) ...@@ -723,28 +683,23 @@ goacc_exit_data (void *h, size_t s, unsigned short kind, int async)
(void *) h, (int) s, (void *) n->host_start, (int) host_size); (void *) h, (int) s, (void *) n->host_start, (int) host_size);
} }
assert (n->refcount != REFCOUNT_LINK);
if (n->refcount != REFCOUNT_INFINITY
&& n->refcount < n->dynamic_refcount)
{
gomp_mutex_unlock (&acc_dev->lock);
gomp_fatal ("Dynamic reference counting assert fail\n");
}
bool finalize = (kind == GOMP_MAP_DELETE bool finalize = (kind == GOMP_MAP_DELETE
|| kind == GOMP_MAP_FORCE_FROM); || kind == GOMP_MAP_FORCE_FROM);
if (finalize) if (finalize)
{ {
if (n->refcount != REFCOUNT_INFINITY) if (n->refcount != REFCOUNT_INFINITY)
n->refcount -= n->dynamic_refcount; n->refcount -= n->virtual_refcount;
n->dynamic_refcount = 0; n->virtual_refcount = 0;
} }
else if (n->dynamic_refcount)
if (n->virtual_refcount > 0)
{ {
if (n->refcount != REFCOUNT_INFINITY) if (n->refcount != REFCOUNT_INFINITY)
n->refcount--; n->refcount--;
n->dynamic_refcount--; n->virtual_refcount--;
} }
else if (n->refcount > 0 && n->refcount != REFCOUNT_INFINITY)
n->refcount--;
if (n->refcount == 0) if (n->refcount == 0)
{ {
...@@ -785,49 +740,49 @@ goacc_exit_data (void *h, size_t s, unsigned short kind, int async) ...@@ -785,49 +740,49 @@ goacc_exit_data (void *h, size_t s, unsigned short kind, int async)
void void
acc_delete (void *h , size_t s) acc_delete (void *h , size_t s)
{ {
goacc_exit_data (h, s, GOMP_MAP_RELEASE, acc_async_sync); goacc_exit_datum (h, s, GOMP_MAP_RELEASE, acc_async_sync);
} }
void void
acc_delete_async (void *h , size_t s, int async) acc_delete_async (void *h , size_t s, int async)
{ {
goacc_exit_data (h, s, GOMP_MAP_RELEASE, async); goacc_exit_datum (h, s, GOMP_MAP_RELEASE, async);
} }
void void
acc_delete_finalize (void *h , size_t s) acc_delete_finalize (void *h , size_t s)
{ {
goacc_exit_data (h, s, GOMP_MAP_DELETE, acc_async_sync); goacc_exit_datum (h, s, GOMP_MAP_DELETE, acc_async_sync);
} }
void void
acc_delete_finalize_async (void *h , size_t s, int async) acc_delete_finalize_async (void *h , size_t s, int async)
{ {
goacc_exit_data (h, s, GOMP_MAP_DELETE, async); goacc_exit_datum (h, s, GOMP_MAP_DELETE, async);
} }
void void
acc_copyout (void *h, size_t s) acc_copyout (void *h, size_t s)
{ {
goacc_exit_data (h, s, GOMP_MAP_FROM, acc_async_sync); goacc_exit_datum (h, s, GOMP_MAP_FROM, acc_async_sync);
} }
void void
acc_copyout_async (void *h, size_t s, int async) acc_copyout_async (void *h, size_t s, int async)
{ {
goacc_exit_data (h, s, GOMP_MAP_FROM, async); goacc_exit_datum (h, s, GOMP_MAP_FROM, async);
} }
void void
acc_copyout_finalize (void *h, size_t s) acc_copyout_finalize (void *h, size_t s)
{ {
goacc_exit_data (h, s, GOMP_MAP_FORCE_FROM, acc_async_sync); goacc_exit_datum (h, s, GOMP_MAP_FORCE_FROM, acc_async_sync);
} }
void void
acc_copyout_finalize_async (void *h, size_t s, int async) acc_copyout_finalize_async (void *h, size_t s, int async)
{ {
goacc_exit_data (h, s, GOMP_MAP_FORCE_FROM, async); goacc_exit_datum (h, s, GOMP_MAP_FORCE_FROM, async);
} }
static void static void
...@@ -912,123 +867,145 @@ acc_update_self_async (void *h, size_t s, int async) ...@@ -912,123 +867,145 @@ acc_update_self_async (void *h, size_t s, int async)
update_dev_host (0, h, s, async); update_dev_host (0, h, s, async);
} }
/* Some types of (pointer) variables use several consecutive mappings, which
must be treated as a group for enter/exit data directives. This function
returns the last mapping in such a group (inclusive), or POS for singleton
mappings. */
/* OpenACC 'enter data', 'exit data': 'GOACC_enter_exit_data' and its helper static int
functions. */ find_group_last (int pos, size_t mapnum, unsigned short *kinds)
{
unsigned char kind0 = kinds[pos] & 0xff;
int first_pos = pos, last_pos = pos;
/* Special handling for 'GOMP_MAP_POINTER', 'GOMP_MAP_TO_PSET'. if (kind0 == GOMP_MAP_TO_PSET)
{
while (pos + 1 < mapnum && (kinds[pos + 1] & 0xff) == GOMP_MAP_POINTER)
last_pos = ++pos;
/* We expect at least one GOMP_MAP_POINTER after a GOMP_MAP_TO_PSET. */
assert (last_pos > first_pos);
}
else
{
/* GOMP_MAP_ALWAYS_POINTER can only appear directly after some other
mapping. */
if (pos + 1 < mapnum
&& (kinds[pos + 1] & 0xff) == GOMP_MAP_ALWAYS_POINTER)
return pos + 1;
/* We can have one or several GOMP_MAP_POINTER mappings after a to/from
(etc.) mapping. */
while (pos + 1 < mapnum && (kinds[pos + 1] & 0xff) == GOMP_MAP_POINTER)
last_pos = ++pos;
}
Only the first mapping is considered in reference counting; the following return last_pos;
ones implicitly follow suit. Similarly, 'copyout' is done only for the }
first mapping. */
/* Map variables for OpenACC "enter data". We can't just call
gomp_map_vars_async once, because individual mapped variables might have
"exit data" called for them at different times. */
static void static void
goacc_remove_pointer (void *h, size_t s, unsigned short kind, int async) goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
void **hostaddrs, size_t *sizes,
unsigned short *kinds, goacc_aq aq)
{ {
kind &= 0xff; for (size_t i = 0; i < mapnum; i++)
{
struct goacc_thread *thr = goacc_thread (); int group_last = find_group_last (i, mapnum, kinds);
struct gomp_device_descr *acc_dev = thr->dev;
splay_tree_key n;
struct target_mem_desc *t;
if (!acc_is_present (h, s))
return;
gomp_mutex_lock (&acc_dev->lock);
n = lookup_host (acc_dev, h, 1); gomp_map_vars_async (acc_dev, aq,
(group_last - i) + 1,
&hostaddrs[i], NULL,
&sizes[i], &kinds[i], true,
GOMP_MAP_VARS_OPENACC_ENTER_DATA);
if (!n) i = group_last;
{
gomp_mutex_unlock (&acc_dev->lock);
gomp_fatal ("%p is not a mapped block", (void *)h);
} }
}
gomp_debug (0, " %s: restore mappings\n", __FUNCTION__); /* Unmap variables for OpenACC "exit data". */
t = n->tgt;
assert (n->refcount != REFCOUNT_INFINITY static void
&& n->refcount != REFCOUNT_LINK); goacc_exit_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
if (n->refcount < n->dynamic_refcount) void **hostaddrs, size_t *sizes,
{ unsigned short *kinds, goacc_aq aq)
gomp_mutex_unlock (&acc_dev->lock); {
gomp_fatal ("Dynamic reference counting assert fail\n"); gomp_mutex_lock (&acc_dev->lock);
}
bool finalize = (kind == GOMP_MAP_DELETE for (size_t i = 0; i < mapnum; ++i)
|| kind == GOMP_MAP_FORCE_FROM);
if (finalize)
{
n->refcount -= n->dynamic_refcount;
n->dynamic_refcount = 0;
}
else if (n->dynamic_refcount)
{ {
n->refcount--; unsigned char kind = kinds[i] & 0xff;
n->dynamic_refcount--; bool copyfrom = false;
} bool finalize = false;
if (n->refcount == 0) if (kind == GOMP_MAP_FORCE_FROM
{ || kind == GOMP_MAP_DELETE)
goacc_aq aq = get_goacc_asyncqueue (async); finalize = true;
bool copyout = (kind == GOMP_MAP_FROM switch (kind)
|| kind == GOMP_MAP_FORCE_FROM);
if (copyout)
{ {
void *d = (void *) (t->tgt_start + n->tgt_offset case GOMP_MAP_FROM:
+ (uintptr_t) h - n->host_start); case GOMP_MAP_FORCE_FROM:
gomp_copy_dev2host (acc_dev, aq, h, d, s); case GOMP_MAP_ALWAYS_FROM:
} copyfrom = true;
/* Fallthrough. */
case GOMP_MAP_TO_PSET:
case GOMP_MAP_POINTER:
case GOMP_MAP_DELETE:
case GOMP_MAP_RELEASE:
{
struct splay_tree_key_s cur_node;
size_t size;
if (kind == GOMP_MAP_POINTER)
size = sizeof (void *);
else
size = sizes[i];
cur_node.host_start = (uintptr_t) hostaddrs[i];
cur_node.host_end = cur_node.host_start + size;
splay_tree_key n
= splay_tree_lookup (&acc_dev->mem_map, &cur_node);
if (n == NULL)
continue;
if (finalize)
{
if (n->refcount != REFCOUNT_INFINITY)
n->refcount -= n->virtual_refcount;
n->virtual_refcount = 0;
}
if (aq) if (n->virtual_refcount > 0)
{ {
/* TODO The way the following code is currently implemented, we need if (n->refcount != REFCOUNT_INFINITY)
the 'is_tgt_unmapped' return value from 'gomp_remove_var', so n->refcount--;
can't use 'gomp_remove_var_async' here -- see the 'gomp_unref_tgt' n->virtual_refcount--;
comment in }
<http://mid.mail-archive.com/878snl36eu.fsf@euler.schwinge.homeip.net>; else if (n->refcount > 0 && n->refcount != REFCOUNT_INFINITY)
PR92881 -- so have to synchronize here. */ n->refcount--;
if (!acc_dev->openacc.async.synchronize_func (aq))
{ if (copyfrom
gomp_mutex_unlock (&acc_dev->lock); && (kind != GOMP_MAP_FROM || n->refcount == 0))
gomp_fatal ("synchronize failed"); gomp_copy_dev2host (acc_dev, aq, (void *) cur_node.host_start,
} (void *) (n->tgt->tgt_start + n->tgt_offset
} + cur_node.host_start
bool is_tgt_unmapped = false; - n->host_start),
for (size_t i = 0; i < t->list_count; i++) cur_node.host_end - cur_node.host_start);
{
is_tgt_unmapped = gomp_remove_var (acc_dev, t->list[i].key); if (n->refcount == 0)
if (is_tgt_unmapped) gomp_remove_var_async (acc_dev, n, aq);
break; }
break;
default:
gomp_fatal (">>>> goacc_exit_data_internal UNHANDLED kind 0x%.2x",
kind);
} }
assert (is_tgt_unmapped);
} }
gomp_mutex_unlock (&acc_dev->lock); gomp_mutex_unlock (&acc_dev->lock);
gomp_debug (0, " %s: mappings restored\n", __FUNCTION__);
}
/* Return the number of mappings associated with 'GOMP_MAP_TO_PSET' or
'GOMP_MAP_POINTER'. */
static int
find_pointer (int pos, size_t mapnum, unsigned short *kinds)
{
if (pos + 1 >= mapnum)
return 0;
unsigned char kind = kinds[pos+1] & 0xff;
if (kind == GOMP_MAP_TO_PSET)
return 3;
else if (kind == GOMP_MAP_POINTER)
return 2;
return 0;
} }
void void
...@@ -1147,81 +1124,12 @@ GOACC_enter_exit_data (int flags_m, size_t mapnum, void **hostaddrs, ...@@ -1147,81 +1124,12 @@ GOACC_enter_exit_data (int flags_m, size_t mapnum, void **hostaddrs,
va_end (ap); va_end (ap);
} }
/* In c, non-pointers and arrays are represented by a single data clause. goacc_aq aq = get_goacc_asyncqueue (async);
Dynamically allocated arrays and subarrays are represented by a data
clause followed by an internal GOMP_MAP_POINTER.
In fortran, scalars and not allocated arrays are represented by a
single data clause. Allocated arrays and subarrays have three mappings:
1) the original data clause, 2) a PSET 3) a pointer to the array data.
*/
if (data_enter) if (data_enter)
{ goacc_enter_data_internal (acc_dev, mapnum, hostaddrs, sizes, kinds, aq);
for (i = 0; i < mapnum; i++)
{
/* Scan for pointers and PSETs. */
int pointer = find_pointer (i, mapnum, kinds);
if (!pointer)
{
unsigned char kind = kinds[i] & 0xff;
switch (kind)
{
case GOMP_MAP_ALLOC:
case GOMP_MAP_FORCE_ALLOC:
case GOMP_MAP_TO:
case GOMP_MAP_FORCE_TO:
break;
default:
gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
kind);
break;
}
/* We actually have one mapping. */
pointer = 1;
}
goacc_enter_data (pointer, &hostaddrs[i], &sizes[i], &kinds[i],
async);
/* If applicable, increment 'i' further; OpenACC requires fortran
arrays to be contiguous, so each PSET is associated with
one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
one MAP_POINTER. */
i += pointer - 1;
}
}
else else
for (i = 0; i < mapnum; ++i) goacc_exit_data_internal (acc_dev, mapnum, hostaddrs, sizes, kinds, aq);
{
int pointer = find_pointer (i, mapnum, kinds);
if (!pointer)
{
unsigned char kind = kinds[i] & 0xff;
switch (kind)
{
case GOMP_MAP_RELEASE:
case GOMP_MAP_DELETE:
case GOMP_MAP_FROM:
case GOMP_MAP_FORCE_FROM:
break;
default:
gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
kind);
break;
}
goacc_exit_data (hostaddrs[i], sizes[i], kinds[i], async);
}
else
{
goacc_remove_pointer (hostaddrs[i], sizes[i], kinds[i], async);
/* See the above comment. */
i += pointer - 1;
}
}
out_prof: out_prof:
if (profiling_p) if (profiling_p)
......
...@@ -537,8 +537,10 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, ...@@ -537,8 +537,10 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
struct target_mem_desc *tgt struct target_mem_desc *tgt
= gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum); = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
tgt->list_count = mapnum; tgt->list_count = mapnum;
tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1; tgt->refcount = (pragma_kind == GOMP_MAP_VARS_ENTER_DATA
|| pragma_kind == GOMP_MAP_VARS_OPENACC_ENTER_DATA) ? 0 : 1;
tgt->device_descr = devicep; tgt->device_descr = devicep;
tgt->prev = NULL;
struct gomp_coalesce_buf cbuf, *cbufp = NULL; struct gomp_coalesce_buf cbuf, *cbufp = NULL;
if (mapnum == 0) if (mapnum == 0)
...@@ -963,7 +965,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, ...@@ -963,7 +965,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
tgt->list[i].offset = 0; tgt->list[i].offset = 0;
tgt->list[i].length = k->host_end - k->host_start; tgt->list[i].length = k->host_end - k->host_start;
k->refcount = 1; k->refcount = 1;
k->dynamic_refcount = 0; k->virtual_refcount = 0;
tgt->refcount++; tgt->refcount++;
array->left = NULL; array->left = NULL;
array->right = NULL; array->right = NULL;
...@@ -1101,8 +1103,20 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, ...@@ -1101,8 +1103,20 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
/* If the variable from "omp target enter data" map-list was already mapped, /* If the variable from "omp target enter data" map-list was already mapped,
tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
gomp_exit_data. */ gomp_exit_data. */
if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0) if ((pragma_kind == GOMP_MAP_VARS_ENTER_DATA
{ || pragma_kind == GOMP_MAP_VARS_OPENACC_ENTER_DATA)
&& tgt->refcount == 0)
{
/* If we're about to discard a target_mem_desc with no "structural"
references (tgt->refcount == 0), any splay keys linked in the tgt's
list must have their virtual refcount incremented to represent that
"lost" reference in order to implement the semantics of the OpenACC
"present increment" operation properly. */
if (pragma_kind == GOMP_MAP_VARS_OPENACC_ENTER_DATA)
for (i = 0; i < tgt->list_count; i++)
if (tgt->list[i].key)
tgt->list[i].key->virtual_refcount++;
free (tgt); free (tgt);
tgt = NULL; tgt = NULL;
} }
...@@ -1240,7 +1254,14 @@ gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom, ...@@ -1240,7 +1254,14 @@ gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
continue; continue;
bool do_unmap = false; bool do_unmap = false;
if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY) if (k->tgt == tgt
&& k->virtual_refcount > 0
&& k->refcount != REFCOUNT_INFINITY)
{
k->virtual_refcount--;
k->refcount--;
}
else if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
k->refcount--; k->refcount--;
else if (k->refcount == 1) else if (k->refcount == 1)
{ {
...@@ -1405,7 +1426,7 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, ...@@ -1405,7 +1426,7 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
k->tgt = tgt; k->tgt = tgt;
k->tgt_offset = target_table[i].start; k->tgt_offset = target_table[i].start;
k->refcount = REFCOUNT_INFINITY; k->refcount = REFCOUNT_INFINITY;
k->dynamic_refcount = 0; k->virtual_refcount = 0;
k->aux = NULL; k->aux = NULL;
array->left = NULL; array->left = NULL;
array->right = NULL; array->right = NULL;
...@@ -1438,7 +1459,7 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, ...@@ -1438,7 +1459,7 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
k->tgt = tgt; k->tgt = tgt;
k->tgt_offset = target_var->start; k->tgt_offset = target_var->start;
k->refcount = target_size & link_bit ? REFCOUNT_LINK : REFCOUNT_INFINITY; k->refcount = target_size & link_bit ? REFCOUNT_LINK : REFCOUNT_INFINITY;
k->dynamic_refcount = 0; k->virtual_refcount = 0;
k->aux = NULL; k->aux = NULL;
array->left = NULL; array->left = NULL;
array->right = NULL; array->right = NULL;
...@@ -1673,22 +1694,6 @@ gomp_unload_device (struct gomp_device_descr *devicep) ...@@ -1673,22 +1694,6 @@ gomp_unload_device (struct gomp_device_descr *devicep)
} }
} }
/* Free address mapping tables. MM must be locked on entry, and remains locked
on return. */
attribute_hidden void
gomp_free_memmap (struct splay_tree_s *mem_map)
{
while (mem_map->root)
{
struct target_mem_desc *tgt = mem_map->root->key.tgt;
splay_tree_remove (mem_map, &mem_map->root->key);
free (tgt->array);
free (tgt);
}
}
/* Host fallback for GOMP_target{,_ext} routines. */ /* Host fallback for GOMP_target{,_ext} routines. */
static void static void
...@@ -2700,7 +2705,7 @@ omp_target_associate_ptr (const void *host_ptr, const void *device_ptr, ...@@ -2700,7 +2705,7 @@ omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
k->tgt = tgt; k->tgt = tgt;
k->tgt_offset = (uintptr_t) device_ptr + device_offset; k->tgt_offset = (uintptr_t) device_ptr + device_offset;
k->refcount = REFCOUNT_INFINITY; k->refcount = REFCOUNT_INFINITY;
k->dynamic_refcount = 0; k->virtual_refcount = 0;
k->aux = NULL; k->aux = NULL;
array->left = NULL; array->left = NULL;
array->right = NULL; array->right = NULL;
......
int foo[16];
#pragma omp declare target (foo)
__attribute__((used)) void bar (void)
{
#pragma omp target parallel for
for (int i = 0; i < 16; i++)
foo[i] = i;
}
int
main (int argc, char *argv[])
{
int *foo_copy = foo;
/* Try to trigger the unmapping of a REFCOUNT_INFINITY target block. This
does nothing at the time of writing. */
#pragma omp target exit data map(delete: foo_copy[0:16])
return 0;
}
/* Verify that 'acc_copyout' etc. is a no-op if there's still a structured /* Verify that 'acc_copyout' etc. is a no-op if there's still a structured
reference count. */ reference count. */
/* { dg-xfail-run-if "TODO PR92843" { *-*-* } } */
/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ /* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
#include <assert.h> #include <assert.h>
......
/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
#include <openacc.h>
int foo[16];
#pragma acc declare device_resident(foo)
int
main (int argc, char *argv[])
{
acc_init (acc_device_default);
acc_unmap_data ((void *) foo);
/* { dg-output "libgomp: cannot unmap target block" } */
return 0;
}
/* { dg-shouldfail "" } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment