Commit 4a75460b by Tom de Vries Committed by Tom de Vries

[nvptx, libgomp] Fix cuMemAlloc with size zero

Consider test-case:
...
int
main (void)
{
  #pragma acc parallel async
  ;
  #pragma acc parallel async
  ;
  #pragma acc wait

  return 0;
}
...

This fails with:
...
libgomp: cuMemAlloc error: invalid argument
Segmentation fault (core dumped)
...
The cuMemAlloc error is due to the fact that we're try to allocate 0 bytes.

Fix this by preventing calling map_push with size zero argument in nvptx_exec.

This also has the consequence that for the abort-1.c test-case, we end up
calling cuMemFree during map_fini for the struct cuda_map allocated in
map_init, which fails because an abort happened.  Fix this by calling
cuMemFree with CUDA_CALL_NOCHECK in cuda_map_destroy.

2019-01-23  Tom de Vries  <tdevries@suse.de>

	PR target/PR88946
	* plugin/plugin-nvptx.c (cuda_map_destroy): Use CUDA_CALL_NOCHECK for
	cuMemFree.
	(nvptx_exec): Don't call map_push if mapnum == 0.
	* testsuite/libgomp.oacc-c-c++-common/pr88946.c: New test.

From-SVN: r268178
parent 4fef8e4d
2019-01-23 Tom de Vries <tdevries@suse.de> 2019-01-23 Tom de Vries <tdevries@suse.de>
PR target/PR88946
* plugin/plugin-nvptx.c (cuda_map_destroy): Use CUDA_CALL_NOCHECK for
cuMemFree.
(nvptx_exec): Don't call map_push if mapnum == 0.
* testsuite/libgomp.oacc-c-c++-common/pr88946.c: New test.
2019-01-23 Tom de Vries <tdevries@suse.de>
PR target/88941 PR target/88941
PR target/88939 PR target/88939
* plugin/plugin-nvptx.c (cuda_map_destroy): Handle map->active case. * plugin/plugin-nvptx.c (cuda_map_destroy): Handle map->active case.
......
...@@ -260,7 +260,7 @@ cuda_map_destroy (struct cuda_map *map) ...@@ -260,7 +260,7 @@ cuda_map_destroy (struct cuda_map *map)
atexit handler (PR83795). */ atexit handler (PR83795). */
; ;
else else
CUDA_CALL_ASSERT (cuMemFree, map->d); CUDA_CALL_NOCHECK (cuMemFree, map->d);
free (map); free (map);
} }
...@@ -1164,7 +1164,7 @@ nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs, ...@@ -1164,7 +1164,7 @@ nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs,
struct ptx_stream *dev_str; struct ptx_stream *dev_str;
void *kargs[1]; void *kargs[1];
void *hp; void *hp;
CUdeviceptr dp; CUdeviceptr dp = 0;
struct nvptx_thread *nvthd = nvptx_thread (); struct nvptx_thread *nvthd = nvptx_thread ();
int warp_size = nvthd->ptx_dev->warp_size; int warp_size = nvthd->ptx_dev->warp_size;
const char *maybe_abort_msg = "(perhaps abort was called)"; const char *maybe_abort_msg = "(perhaps abort was called)";
...@@ -1361,23 +1361,27 @@ nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs, ...@@ -1361,23 +1361,27 @@ nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs,
dims[GOMP_DIM_VECTOR]); dims[GOMP_DIM_VECTOR]);
} }
/* This reserves a chunk of a pre-allocated page of memory mapped on both if (mapnum > 0)
the host and the device. HP is a host pointer to the new chunk, and DP is {
the corresponding device pointer. */ /* This reserves a chunk of a pre-allocated page of memory mapped on both
pthread_mutex_lock (&ptx_event_lock); the host and the device. HP is a host pointer to the new chunk, and DP is
dp = map_push (dev_str, mapnum * sizeof (void *)); the corresponding device pointer. */
pthread_mutex_unlock (&ptx_event_lock); pthread_mutex_lock (&ptx_event_lock);
dp = map_push (dev_str, mapnum * sizeof (void *));
GOMP_PLUGIN_debug (0, " %s: prepare mappings\n", __FUNCTION__); pthread_mutex_unlock (&ptx_event_lock);
/* Copy the array of arguments to the mapped page. */ GOMP_PLUGIN_debug (0, " %s: prepare mappings\n", __FUNCTION__);
hp = alloca(sizeof(void *) * mapnum);
for (i = 0; i < mapnum; i++) /* Copy the array of arguments to the mapped page. */
((void **) hp)[i] = devaddrs[i]; hp = alloca(sizeof(void *) * mapnum);
for (i = 0; i < mapnum; i++)
((void **) hp)[i] = devaddrs[i];
/* Copy the (device) pointers to arguments to the device */
CUDA_CALL_ASSERT (cuMemcpyHtoD, dp, hp,
mapnum * sizeof (void *));
}
/* Copy the (device) pointers to arguments to the device */
CUDA_CALL_ASSERT (cuMemcpyHtoD, dp, hp,
mapnum * sizeof (void *));
GOMP_PLUGIN_debug (0, " %s: kernel %s: launch" GOMP_PLUGIN_debug (0, " %s: kernel %s: launch"
" gangs=%u, workers=%u, vectors=%u\n", " gangs=%u, workers=%u, vectors=%u\n",
__FUNCTION__, targ_fn->launch->fn, dims[GOMP_DIM_GANG], __FUNCTION__, targ_fn->launch->fn, dims[GOMP_DIM_GANG],
...@@ -1422,7 +1426,8 @@ nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs, ...@@ -1422,7 +1426,8 @@ nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs,
CUDA_CALL_ASSERT (cuEventRecord, *e, dev_str->stream); CUDA_CALL_ASSERT (cuEventRecord, *e, dev_str->stream);
event_add (PTX_EVT_KNL, e, (void *)dev_str, 0); if (mapnum > 0)
event_add (PTX_EVT_KNL, e, (void *)dev_str, 0);
} }
#else #else
r = CUDA_CALL_NOCHECK (cuCtxSynchronize, ); r = CUDA_CALL_NOCHECK (cuCtxSynchronize, );
...@@ -1439,7 +1444,10 @@ nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs, ...@@ -1439,7 +1444,10 @@ nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs,
#ifndef DISABLE_ASYNC #ifndef DISABLE_ASYNC
if (async < acc_async_noval) if (async < acc_async_noval)
#endif #endif
map_pop (dev_str); {
if (mapnum > 0)
map_pop (dev_str);
}
} }
void * openacc_get_current_cuda_context (void); void * openacc_get_current_cuda_context (void);
......
/* { dg-do run } */
int
main (void)
{
#pragma acc parallel async
;
#pragma acc parallel async
;
#pragma acc wait
return 0;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment