Commit 6103184e by Alexander Monakov Committed by Alexander Monakov

OpenMP offloading to NVPTX: libgomp changes

	* Makefile.am (libgomp_la_SOURCES): Add atomic.c, icv.c, icv-device.c.
	* Makefile.in. Regenerate.
	* configure.ac [nvptx*-*-*] (libgomp_use_pthreads): Set and use it...
	(LIBGOMP_USE_PTHREADS): ...here; new define.
	* configure: Regenerate.
	* config.h.in: Likewise.
	* config/posix/affinity.c: Move to...
	* affinity.c: ...here (new file).  Guard use of Pthreads-specific
	interface by LIBGOMP_USE_PTHREADS. 
	* critical.c: Split out GOMP_atomic_{start,end} into...
	* atomic.c: ...here (new file).
	* env.c: Split out ICV definitions into...
	* icv.c: ...here (new file) and...
	* icv-device.c: ...here. New file.
	* config/linux/lock.c (gomp_init_lock_30): Move to generic lock.c.
	(gomp_destroy_lock_30): Ditto.
	(gomp_set_lock_30): Ditto.
	(gomp_unset_lock_30): Ditto.
	(gomp_test_lock_30): Ditto.
	(gomp_init_nest_lock_30): Ditto.
	(gomp_destroy_nest_lock_30): Ditto.
	(gomp_set_nest_lock_30): Ditto.
	(gomp_unset_nest_lock_30): Ditto.
	(gomp_test_nest_lock_30): Ditto.
	* lock.c: New.
	* config/nvptx/lock.c: New.
	* config/nvptx/bar.c: New.
	* config/nvptx/bar.h: New.
	* config/nvptx/doacross.h: New.
	* config/nvptx/error.c: New.
	* config/nvptx/icv-device.c: New.
	* config/nvptx/mutex.h: New.
	* config/nvptx/pool.h: New.
	* config/nvptx/proc.c: New.
	* config/nvptx/ptrlock.h: New.
	* config/nvptx/sem.h: New.
	* config/nvptx/simple-bar.h: New.
	* config/nvptx/target.c: New.
	* config/nvptx/task.c: New.
	* config/nvptx/team.c: New.
	* config/nvptx/time.c: New.
	* config/posix/simple-bar.h: New.
	* libgomp.h: Guard pthread.h inclusion.  Include simple-bar.h.
	(gomp_num_teams_var): Declare.
	(struct gomp_thread_pool): Change threads_dock member to
	gomp_simple_barrier_t.
	[__nvptx__] (gomp_thread): New implementation.
	(gomp_thread_attr): Guard by LIBGOMP_USE_PTHREADS.
	(gomp_thread_destructor): Ditto.
	(gomp_init_thread_affinity): Ditto.
	* team.c: Guard uses of Pthreads-specific interfaces by
	LIBGOMP_USE_PTHREADS.  Adjust all uses of threads_dock.
	(gomp_free_thread) [__nvptx__]: Do not call 'free'.

	* config/nvptx/alloc.c: Delete.
	* config/nvptx/barrier.c: Ditto.
	* config/nvptx/fortran.c: Ditto.
	* config/nvptx/iter.c: Ditto.
	* config/nvptx/iter_ull.c: Ditto.
	* config/nvptx/loop.c: Ditto.
	* config/nvptx/loop_ull.c: Ditto.
	* config/nvptx/ordered.c: Ditto.
	* config/nvptx/parallel.c: Ditto.
	* config/nvptx/priority_queue.c: Ditto.
	* config/nvptx/sections.c: Ditto.
	* config/nvptx/single.c: Ditto.
	* config/nvptx/splay-tree.c: Ditto.
	* config/nvptx/work.c: Ditto.

	* testsuite/libgomp.fortran/fortran.exp (lang_link_flags): Pass
	-foffload=-lgfortran in addition to -lgfortran.
	* testsuite/libgomp.oacc-fortran/fortran.exp (lang_link_flags): Ditto.

	* plugin/plugin-nvptx.c: Include <limits.h>.
	(struct targ_fn_descriptor): Add new fields.
	(struct ptx_device): Ditto.  Set them...
	(nvptx_open_device): ...here.
	(nvptx_adjust_launch_bounds): New.
	(nvptx_host2dev): Allow NULL 'nvthd'.
	(nvptx_dev2host): Ditto.
	(GOMP_OFFLOAD_get_caps): Add GOMP_OFFLOAD_CAP_OPENMP_400.
	(link_ptx): Adjust log sizes.
	(nvptx_host2dev): Allow NULL 'nvthd'.
	(nvptx_dev2host): Ditto.
	(nvptx_set_clocktick): New.  Use it...
	(GOMP_OFFLOAD_load_image): ...here.  Set new targ_fn_descriptor
	fields.
	(GOMP_OFFLOAD_dev2dev): New.
	(nvptx_adjust_launch_bounds): New.
	(nvptx_stacks_size): New.
	(nvptx_stacks_alloc): New.
	(nvptx_stacks_free): New.
	(GOMP_OFFLOAD_run): New.
	(GOMP_OFFLOAD_async_run): New (stub).

Co-Authored-By: Dmitry Melnik <dm@ispras.ru>
Co-Authored-By: Jakub Jelinek <jakub@redhat.com>

From-SVN: r242789
parent 6251fe93
2016-11-23 Alexander Monakov <amonakov@ispras.ru>
Jakub Jelinek <jakub@redhat.com>
Dmitry Melnik <dm@ispras.ru>
* Makefile.am (libgomp_la_SOURCES): Add atomic.c, icv.c, icv-device.c.
* Makefile.in. Regenerate.
* configure.ac [nvptx*-*-*] (libgomp_use_pthreads): Set and use it...
(LIBGOMP_USE_PTHREADS): ...here; new define.
* configure: Regenerate.
* config.h.in: Likewise.
* config/posix/affinity.c: Move to...
* affinity.c: ...here (new file). Guard use of Pthreads-specific
interface by LIBGOMP_USE_PTHREADS.
* critical.c: Split out GOMP_atomic_{start,end} into...
* atomic.c: ...here (new file).
* env.c: Split out ICV definitions into...
* icv.c: ...here (new file) and...
* icv-device.c: ...here. New file.
* config/linux/lock.c (gomp_init_lock_30): Move to generic lock.c.
(gomp_destroy_lock_30): Ditto.
(gomp_set_lock_30): Ditto.
(gomp_unset_lock_30): Ditto.
(gomp_test_lock_30): Ditto.
(gomp_init_nest_lock_30): Ditto.
(gomp_destroy_nest_lock_30): Ditto.
(gomp_set_nest_lock_30): Ditto.
(gomp_unset_nest_lock_30): Ditto.
(gomp_test_nest_lock_30): Ditto.
* lock.c: New.
* config/nvptx/lock.c: New.
* config/nvptx/bar.c: New.
* config/nvptx/bar.h: New.
* config/nvptx/doacross.h: New.
* config/nvptx/error.c: New.
* config/nvptx/icv-device.c: New.
* config/nvptx/mutex.h: New.
* config/nvptx/pool.h: New.
* config/nvptx/proc.c: New.
* config/nvptx/ptrlock.h: New.
* config/nvptx/sem.h: New.
* config/nvptx/simple-bar.h: New.
* config/nvptx/target.c: New.
* config/nvptx/task.c: New.
* config/nvptx/team.c: New.
* config/nvptx/time.c: New.
* config/posix/simple-bar.h: New.
* libgomp.h: Guard pthread.h inclusion. Include simple-bar.h.
(gomp_num_teams_var): Declare.
(struct gomp_thread_pool): Change threads_dock member to
gomp_simple_barrier_t.
[__nvptx__] (gomp_thread): New implementation.
(gomp_thread_attr): Guard by LIBGOMP_USE_PTHREADS.
(gomp_thread_destructor): Ditto.
(gomp_init_thread_affinity): Ditto.
* team.c: Guard uses of Pthreads-specific interfaces by
LIBGOMP_USE_PTHREADS. Adjust all uses of threads_dock.
(gomp_free_thread) [__nvptx__]: Do not call 'free'.
* config/nvptx/alloc.c: Delete.
* config/nvptx/barrier.c: Ditto.
* config/nvptx/fortran.c: Ditto.
* config/nvptx/iter.c: Ditto.
* config/nvptx/iter_ull.c: Ditto.
* config/nvptx/loop.c: Ditto.
* config/nvptx/loop_ull.c: Ditto.
* config/nvptx/ordered.c: Ditto.
* config/nvptx/parallel.c: Ditto.
* config/nvptx/priority_queue.c: Ditto.
* config/nvptx/sections.c: Ditto.
* config/nvptx/single.c: Ditto.
* config/nvptx/splay-tree.c: Ditto.
* config/nvptx/work.c: Ditto.
* testsuite/libgomp.fortran/fortran.exp (lang_link_flags): Pass
-foffload=-lgfortran in addition to -lgfortran.
* testsuite/libgomp.oacc-fortran/fortran.exp (lang_link_flags): Ditto.
* plugin/plugin-nvptx.c: Include <limits.h>.
(struct targ_fn_descriptor): Add new fields.
(struct ptx_device): Ditto. Set them...
(nvptx_open_device): ...here.
(nvptx_adjust_launch_bounds): New.
(nvptx_host2dev): Allow NULL 'nvthd'.
(nvptx_dev2host): Ditto.
(GOMP_OFFLOAD_get_caps): Add GOMP_OFFLOAD_CAP_OPENMP_400.
(link_ptx): Adjust log sizes.
(nvptx_host2dev): Allow NULL 'nvthd'.
(nvptx_dev2host): Ditto.
(nvptx_set_clocktick): New. Use it...
(GOMP_OFFLOAD_load_image): ...here. Set new targ_fn_descriptor
fields.
(GOMP_OFFLOAD_dev2dev): New.
(nvptx_adjust_launch_bounds): New.
(nvptx_stacks_size): New.
(nvptx_stacks_alloc): New.
(nvptx_stacks_free): New.
(GOMP_OFFLOAD_run): New.
(GOMP_OFFLOAD_async_run): New (stub).
2016-11-23 Martin Jambor <mjambor@suse.cz>
* testsuite/libgomp.hsa.c/bits-insns.c: New test.
......
......@@ -58,12 +58,12 @@ libgomp_la_LDFLAGS = $(libgomp_version_info) $(libgomp_version_script) \
libgomp_la_DEPENDENCIES = $(libgomp_version_dep)
libgomp_la_LINK = $(LINK) $(libgomp_la_LDFLAGS)
libgomp_la_SOURCES = alloc.c barrier.c critical.c env.c error.c iter.c \
iter_ull.c loop.c loop_ull.c ordered.c parallel.c sections.c single.c \
task.c team.c work.c lock.c mutex.c proc.c sem.c bar.c ptrlock.c \
time.c fortran.c affinity.c target.c splay-tree.c libgomp-plugin.c \
oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c oacc-async.c \
oacc-plugin.c oacc-cuda.c priority_queue.c
libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c error.c \
icv.c icv-device.c iter.c iter_ull.c loop.c loop_ull.c ordered.c \
parallel.c sections.c single.c task.c team.c work.c lock.c mutex.c \
proc.c sem.c bar.c ptrlock.c time.c fortran.c affinity.c target.c \
splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c oacc-init.c \
oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c
include $(top_srcdir)/plugin/Makefrag.am
......
......@@ -150,14 +150,14 @@ libgomp_plugin_nvptx_la_LINK = $(LIBTOOL) --tag=CC \
@PLUGIN_NVPTX_TRUE@ $(toolexeclibdir)
libgomp_la_LIBADD =
@USE_FORTRAN_TRUE@am__objects_1 = openacc.lo
am_libgomp_la_OBJECTS = alloc.lo barrier.lo critical.lo env.lo \
error.lo iter.lo iter_ull.lo loop.lo loop_ull.lo ordered.lo \
parallel.lo sections.lo single.lo task.lo team.lo work.lo \
lock.lo mutex.lo proc.lo sem.lo bar.lo ptrlock.lo time.lo \
fortran.lo affinity.lo target.lo splay-tree.lo \
libgomp-plugin.lo oacc-parallel.lo oacc-host.lo oacc-init.lo \
oacc-mem.lo oacc-async.lo oacc-plugin.lo oacc-cuda.lo \
priority_queue.lo $(am__objects_1)
am_libgomp_la_OBJECTS = alloc.lo atomic.lo barrier.lo critical.lo \
env.lo error.lo icv.lo icv-device.lo iter.lo iter_ull.lo \
loop.lo loop_ull.lo ordered.lo parallel.lo sections.lo \
single.lo task.lo team.lo work.lo lock.lo mutex.lo proc.lo \
sem.lo bar.lo ptrlock.lo time.lo fortran.lo affinity.lo \
target.lo splay-tree.lo libgomp-plugin.lo oacc-parallel.lo \
oacc-host.lo oacc-init.lo oacc-mem.lo oacc-async.lo \
oacc-plugin.lo oacc-cuda.lo priority_queue.lo $(am__objects_1)
libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS)
DEFAULT_INCLUDES = -I.@am__isrc@
depcomp = $(SHELL) $(top_srcdir)/../depcomp
......@@ -400,13 +400,14 @@ libgomp_la_LDFLAGS = $(libgomp_version_info) $(libgomp_version_script) \
libgomp_la_DEPENDENCIES = $(libgomp_version_dep)
libgomp_la_LINK = $(LINK) $(libgomp_la_LDFLAGS)
libgomp_la_SOURCES = alloc.c barrier.c critical.c env.c error.c iter.c \
iter_ull.c loop.c loop_ull.c ordered.c parallel.c sections.c \
single.c task.c team.c work.c lock.c mutex.c proc.c sem.c \
bar.c ptrlock.c time.c fortran.c affinity.c target.c \
splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c \
oacc-init.c oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c \
priority_queue.c $(am__append_3)
libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c \
error.c icv.c icv-device.c iter.c iter_ull.c loop.c loop_ull.c \
ordered.c parallel.c sections.c single.c task.c team.c work.c \
lock.c mutex.c proc.c sem.c bar.c ptrlock.c time.c fortran.c \
affinity.c target.c splay-tree.c libgomp-plugin.c \
oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c \
oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
$(am__append_3)
# Nvidia PTX OpenACC plugin.
@PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION)
......@@ -571,12 +572,15 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/affinity.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/alloc.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/atomic.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bar.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/barrier.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/critical.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/env.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/error.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fortran.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/icv-device.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/icv.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iter.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iter_ull.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgomp-plugin.Plo@am__quote@
......
......@@ -32,12 +32,14 @@ gomp_init_affinity (void)
{
}
#ifdef LIBGOMP_USE_PTHREADS
void
gomp_init_thread_affinity (pthread_attr_t *attr, unsigned int place)
{
(void) attr;
(void) place;
}
#endif
void **
gomp_affinity_alloc (unsigned long count, bool quiet)
......
/* Copyright (C) 2005-2016 Free Software Foundation, Inc.
Contributed by Richard Henderson <rth@redhat.com>.
This file is part of the GNU Offloading and Multi Processing Library
(libgomp).
Libgomp is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* This file contains helpers for the ATOMIC construct. */
#include "libgomp.h"
/* This mutex is used when atomic operations don't exist for the target
in the mode requested. The result is not globally atomic, but works so
long as all parallel references are within #pragma omp atomic directives.
According to responses received from omp@openmp.org, appears to be within
spec. Which makes sense, since that's how several other compilers
handle this situation as well. */
static gomp_mutex_t atomic_lock;
void
GOMP_atomic_start (void)
{
gomp_mutex_lock (&atomic_lock);
}
void
GOMP_atomic_end (void)
{
gomp_mutex_unlock (&atomic_lock);
}
#if !GOMP_MUTEX_INIT_0
static void __attribute__((constructor))
initialize_atomic (void)
{
gomp_mutex_init (&atomic_lock);
}
#endif
......@@ -115,6 +115,9 @@
/* Define to 1 if GNU symbol versioning is used for libgomp. */
#undef LIBGOMP_GNU_SYMBOL_VERSIONING
/* Define to 1 if libgomp should use POSIX threads. */
#undef LIBGOMP_USE_PTHREADS
/* Define to the sub-directory in which libtool stores uninstalled libraries.
*/
#undef LT_OBJDIR
......
......@@ -32,98 +32,8 @@
#include <sys/syscall.h>
#include "wait.h"
/* The internal gomp_mutex_t and the external non-recursive omp_lock_t
have the same form. Re-use it. */
void
gomp_init_lock_30 (omp_lock_t *lock)
{
gomp_mutex_init (lock);
}
void
gomp_destroy_lock_30 (omp_lock_t *lock)
{
gomp_mutex_destroy (lock);
}
void
gomp_set_lock_30 (omp_lock_t *lock)
{
gomp_mutex_lock (lock);
}
void
gomp_unset_lock_30 (omp_lock_t *lock)
{
gomp_mutex_unlock (lock);
}
int
gomp_test_lock_30 (omp_lock_t *lock)
{
int oldval = 0;
return __atomic_compare_exchange_n (lock, &oldval, 1, false,
MEMMODEL_ACQUIRE, MEMMODEL_RELAXED);
}
void
gomp_init_nest_lock_30 (omp_nest_lock_t *lock)
{
memset (lock, '\0', sizeof (*lock));
}
void
gomp_destroy_nest_lock_30 (omp_nest_lock_t *lock)
{
}
void
gomp_set_nest_lock_30 (omp_nest_lock_t *lock)
{
void *me = gomp_icv (true);
if (lock->owner != me)
{
gomp_mutex_lock (&lock->lock);
lock->owner = me;
}
lock->count++;
}
void
gomp_unset_nest_lock_30 (omp_nest_lock_t *lock)
{
if (--lock->count == 0)
{
lock->owner = NULL;
gomp_mutex_unlock (&lock->lock);
}
}
int
gomp_test_nest_lock_30 (omp_nest_lock_t *lock)
{
void *me = gomp_icv (true);
int oldval;
if (lock->owner == me)
return ++lock->count;
oldval = 0;
if (__atomic_compare_exchange_n (&lock->lock, &oldval, 1, false,
MEMMODEL_ACQUIRE, MEMMODEL_RELAXED))
{
lock->owner = me;
lock->count = 1;
return 1;
}
return 0;
}
/* Reuse the generic implementation in terms of gomp_mutex_t. */
#include "../../lock.c"
#ifdef LIBGOMP_GNU_SYMBOL_VERSIONING
/* gomp_mutex_* can be safely locked in one thread and
......
/* Copyright (C) 2015-2016 Free Software Foundation, Inc.
Contributed by Alexander Monakov <amonakov@ispras.ru>
This file is part of the GNU Offloading and Multi Processing Library
(libgomp).
Libgomp is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* This is an NVPTX specific implementation of a barrier synchronization
mechanism for libgomp. This type is private to the library. This
implementation uses atomic instructions and bar.sync instruction. */
#include <limits.h>
#include "libgomp.h"
void
gomp_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
{
if (__builtin_expect (state & BAR_WAS_LAST, 0))
{
/* Next time we'll be awaiting TOTAL threads again. */
bar->awaited = bar->total;
__atomic_store_n (&bar->generation, bar->generation + BAR_INCR,
MEMMODEL_RELEASE);
}
asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
}
void
gomp_barrier_wait (gomp_barrier_t *bar)
{
gomp_barrier_wait_end (bar, gomp_barrier_wait_start (bar));
}
/* Like gomp_barrier_wait, except that if the encountering thread
is not the last one to hit the barrier, it returns immediately.
The intended usage is that a thread which intends to gomp_barrier_destroy
this barrier calls gomp_barrier_wait, while all other threads
call gomp_barrier_wait_last. When gomp_barrier_wait returns,
the barrier can be safely destroyed. */
void
gomp_barrier_wait_last (gomp_barrier_t *bar)
{
/* Deferring to gomp_barrier_wait does not use the optimization opportunity
allowed by the interface contract for all-but-last participants. The
original implementation in config/linux/bar.c handles this better. */
gomp_barrier_wait (bar);
}
void
gomp_team_barrier_wake (gomp_barrier_t *bar, int count)
{
asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
}
void
gomp_team_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
{
unsigned int generation, gen;
if (__builtin_expect (state & BAR_WAS_LAST, 0))
{
/* Next time we'll be awaiting TOTAL threads again. */
struct gomp_thread *thr = gomp_thread ();
struct gomp_team *team = thr->ts.team;
bar->awaited = bar->total;
team->work_share_cancelled = 0;
if (__builtin_expect (team->task_count, 0))
{
gomp_barrier_handle_tasks (state);
state &= ~BAR_WAS_LAST;
}
else
{
state &= ~BAR_CANCELLED;
state += BAR_INCR - BAR_WAS_LAST;
__atomic_store_n (&bar->generation, state, MEMMODEL_RELEASE);
asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
return;
}
}
generation = state;
state &= ~BAR_CANCELLED;
do
{
asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
if (__builtin_expect (gen & BAR_TASK_PENDING, 0))
{
gomp_barrier_handle_tasks (state);
gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
}
generation |= gen & BAR_WAITING_FOR_TASK;
}
while (gen != state + BAR_INCR);
}
void
gomp_team_barrier_wait (gomp_barrier_t *bar)
{
gomp_team_barrier_wait_end (bar, gomp_barrier_wait_start (bar));
}
void
gomp_team_barrier_wait_final (gomp_barrier_t *bar)
{
gomp_barrier_state_t state = gomp_barrier_wait_final_start (bar);
if (__builtin_expect (state & BAR_WAS_LAST, 0))
bar->awaited_final = bar->total;
gomp_team_barrier_wait_end (bar, state);
}
bool
gomp_team_barrier_wait_cancel_end (gomp_barrier_t *bar,
gomp_barrier_state_t state)
{
unsigned int generation, gen;
if (__builtin_expect (state & BAR_WAS_LAST, 0))
{
/* Next time we'll be awaiting TOTAL threads again. */
/* BAR_CANCELLED should never be set in state here, because
cancellation means that at least one of the threads has been
cancelled, thus on a cancellable barrier we should never see
all threads to arrive. */
struct gomp_thread *thr = gomp_thread ();
struct gomp_team *team = thr->ts.team;
bar->awaited = bar->total;
team->work_share_cancelled = 0;
if (__builtin_expect (team->task_count, 0))
{
gomp_barrier_handle_tasks (state);
state &= ~BAR_WAS_LAST;
}
else
{
state += BAR_INCR - BAR_WAS_LAST;
__atomic_store_n (&bar->generation, state, MEMMODEL_RELEASE);
asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
return false;
}
}
if (__builtin_expect (state & BAR_CANCELLED, 0))
return true;
generation = state;
do
{
asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
if (__builtin_expect (gen & BAR_CANCELLED, 0))
return true;
if (__builtin_expect (gen & BAR_TASK_PENDING, 0))
{
gomp_barrier_handle_tasks (state);
gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
}
generation |= gen & BAR_WAITING_FOR_TASK;
}
while (gen != state + BAR_INCR);
return false;
}
bool
gomp_team_barrier_wait_cancel (gomp_barrier_t *bar)
{
return gomp_team_barrier_wait_cancel_end (bar, gomp_barrier_wait_start (bar));
}
void
gomp_team_barrier_cancel (struct gomp_team *team)
{
gomp_mutex_lock (&team->task_lock);
if (team->barrier.generation & BAR_CANCELLED)
{
gomp_mutex_unlock (&team->task_lock);
return;
}
team->barrier.generation |= BAR_CANCELLED;
gomp_mutex_unlock (&team->task_lock);
gomp_team_barrier_wake (&team->barrier, INT_MAX);
}
/* Copyright (C) 2015-2016 Free Software Foundation, Inc.
Contributed by Alexander Monakov <amonakov@ispras.ru>
This file is part of the GNU Offloading and Multi Processing Library
(libgomp).
Libgomp is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* This is an NVPTX specific implementation of a barrier synchronization
mechanism for libgomp. This type is private to the library. This
implementation uses atomic instructions and bar.sync instruction. */
#ifndef GOMP_BARRIER_H
#define GOMP_BARRIER_H 1
#include "mutex.h"
typedef struct
{
unsigned total;
unsigned generation;
unsigned awaited;
unsigned awaited_final;
} gomp_barrier_t;
typedef unsigned int gomp_barrier_state_t;
/* The generation field contains a counter in the high bits, with a few
low bits dedicated to flags. Note that TASK_PENDING and WAS_LAST can
share space because WAS_LAST is never stored back to generation. */
#define BAR_TASK_PENDING 1
#define BAR_WAS_LAST 1
#define BAR_WAITING_FOR_TASK 2
#define BAR_CANCELLED 4
#define BAR_INCR 8
static inline void gomp_barrier_init (gomp_barrier_t *bar, unsigned count)
{
bar->total = count;
bar->awaited = count;
bar->awaited_final = count;
bar->generation = 0;
}
static inline void gomp_barrier_reinit (gomp_barrier_t *bar, unsigned count)
{
__atomic_add_fetch (&bar->awaited, count - bar->total, MEMMODEL_ACQ_REL);
bar->total = count;
}
static inline void gomp_barrier_destroy (gomp_barrier_t *bar)
{
}
extern void gomp_barrier_wait (gomp_barrier_t *);
extern void gomp_barrier_wait_last (gomp_barrier_t *);
extern void gomp_barrier_wait_end (gomp_barrier_t *, gomp_barrier_state_t);
extern void gomp_team_barrier_wait (gomp_barrier_t *);
extern void gomp_team_barrier_wait_final (gomp_barrier_t *);
extern void gomp_team_barrier_wait_end (gomp_barrier_t *,
gomp_barrier_state_t);
extern bool gomp_team_barrier_wait_cancel (gomp_barrier_t *);
extern bool gomp_team_barrier_wait_cancel_end (gomp_barrier_t *,
gomp_barrier_state_t);
extern void gomp_team_barrier_wake (gomp_barrier_t *, int);
struct gomp_team;
extern void gomp_team_barrier_cancel (struct gomp_team *);
static inline gomp_barrier_state_t
gomp_barrier_wait_start (gomp_barrier_t *bar)
{
unsigned int ret = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
ret &= -BAR_INCR | BAR_CANCELLED;
/* A memory barrier is needed before exiting from the various forms
of gomp_barrier_wait, to satisfy OpenMP API version 3.1 section
2.8.6 flush Construct, which says there is an implicit flush during
a barrier region. This is a convenient place to add the barrier,
so we use MEMMODEL_ACQ_REL here rather than MEMMODEL_ACQUIRE. */
if (__atomic_add_fetch (&bar->awaited, -1, MEMMODEL_ACQ_REL) == 0)
ret |= BAR_WAS_LAST;
return ret;
}
static inline gomp_barrier_state_t
gomp_barrier_wait_cancel_start (gomp_barrier_t *bar)
{
return gomp_barrier_wait_start (bar);
}
/* This is like gomp_barrier_wait_start, except it decrements
bar->awaited_final rather than bar->awaited and should be used
for the gomp_team_end barrier only. */
static inline gomp_barrier_state_t
gomp_barrier_wait_final_start (gomp_barrier_t *bar)
{
unsigned int ret = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
ret &= -BAR_INCR | BAR_CANCELLED;
/* See above gomp_barrier_wait_start comment. */
if (__atomic_add_fetch (&bar->awaited_final, -1, MEMMODEL_ACQ_REL) == 0)
ret |= BAR_WAS_LAST;
return ret;
}
static inline bool
gomp_barrier_last_thread (gomp_barrier_state_t state)
{
return state & BAR_WAS_LAST;
}
/* All the inlines below must be called with team->task_lock
held. */
static inline void
gomp_team_barrier_set_task_pending (gomp_barrier_t *bar)
{
bar->generation |= BAR_TASK_PENDING;
}
static inline void
gomp_team_barrier_clear_task_pending (gomp_barrier_t *bar)
{
bar->generation &= ~BAR_TASK_PENDING;
}
static inline void
gomp_team_barrier_set_waiting_for_tasks (gomp_barrier_t *bar)
{
bar->generation |= BAR_WAITING_FOR_TASK;
}
static inline bool
gomp_team_barrier_waiting_for_tasks (gomp_barrier_t *bar)
{
return (bar->generation & BAR_WAITING_FOR_TASK) != 0;
}
static inline bool
gomp_team_barrier_cancelled (gomp_barrier_t *bar)
{
return __builtin_expect ((bar->generation & BAR_CANCELLED) != 0, 0);
}
static inline void
gomp_team_barrier_done (gomp_barrier_t *bar, gomp_barrier_state_t state)
{
bar->generation = (state & -BAR_INCR) + BAR_INCR;
}
#endif /* GOMP_BARRIER_H */
/* Copyright (C) 2015-2016 Free Software Foundation, Inc.
Contributed by Alexander Monakov <amonakov@ispras.ru>
This file is part of the GNU Offloading and Multi Processing Library
(libgomp).
Libgomp is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* This is the NVPTX implementation of doacross spinning. */
#ifndef GOMP_DOACROSS_H
#define GOMP_DOACROSS_H 1
#include "libgomp.h"
static int zero;
static inline int
cpu_relax (void)
{
int r;
/* Here we need a long-latency operation to make the current warp yield.
We could use ld.cv, uncached load from system (host) memory, but that
would require allocating locked memory in the plugin. Alternatively,
we can use ld.cg, which evicts from L1 and caches in L2. */
asm volatile ("ld.cg.s32 %0, [%1];" : "=r" (r) : "i" (&zero) : "memory");
return r;
}
static inline void doacross_spin (unsigned long *addr, unsigned long expected,
unsigned long cur)
{
/* Prevent compiler from optimizing based on bounds of containing object. */
asm ("" : "+r" (addr));
do
{
int i = cpu_relax ();
cur = addr[i];
}
while (cur <= expected);
}
#endif /* GOMP_DOACROSS_H */
/* Copyright (C) 2015-2016 Free Software Foundation, Inc.
Contributed by Alexander Monakov <amonakov@ispras.ru>
This file is part of the GNU Offloading and Multi Processing Library
(libgomp).
Libgomp is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* This file contains routines used to signal errors. On NVPTX, we have
one default output stream (stdout), so redirect everything there. */
#include "libgomp.h"
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#undef vfprintf
#undef fputs
#undef fputc
#define vfprintf(stream, fmt, list) vprintf (fmt, list)
#define fputs(s, stream) printf ("%s", s)
#define fputc(c, stream) printf ("%c", c)
#include "../../error.c"
/* Copyright (C) 2015-2016 Free Software Foundation, Inc.
Contributed by Alexander Monakov <amonakov@ispras.ru>
This file is part of the GNU Offloading and Multi Processing Library
(libgomp).
Libgomp is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* This file defines OpenMP API entry points that accelerator targets are
expected to replace. */
#include "libgomp.h"
void
omp_set_default_device (int device_num __attribute__((unused)))
{
}
int
omp_get_default_device (void)
{
return 0;
}
int
omp_get_num_devices (void)
{
return 0;
}
int
omp_get_num_teams (void)
{
return gomp_num_teams_var + 1;
}
int
omp_get_team_num (void)
{
int ctaid;
asm ("mov.u32 %0, %%ctaid.x;" : "=r" (ctaid));
return ctaid;
}
int
omp_is_initial_device (void)
{
/* NVPTX is an accelerator-only target. */
return 0;
}
ialias (omp_set_default_device)
ialias (omp_get_default_device)
ialias (omp_get_num_devices)
ialias (omp_get_num_teams)
ialias (omp_get_team_num)
ialias (omp_is_initial_device)
/* Copyright (C) 2016 Free Software Foundation, Inc.
Contributed by Alexander Monakov <amonakov@ispras.ru>.
This file is part of the GNU Offloading and Multi Processing Library
(libgomp).
Libgomp is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* This is a NVPTX specific implementation of the public OpenMP locking
primitives. */
/* Reuse the generic implementation in terms of gomp_mutex_t. */
#include "../../lock.c"
ialias (omp_init_lock)
ialias (omp_init_nest_lock)
ialias (omp_destroy_lock)
ialias (omp_destroy_nest_lock)
ialias (omp_set_lock)
ialias (omp_set_nest_lock)
ialias (omp_unset_lock)
ialias (omp_unset_nest_lock)
ialias (omp_test_lock)
ialias (omp_test_nest_lock)
/* Copyright (C) 2015-2016 Free Software Foundation, Inc.
Contributed by Alexander Monakov <amonakov@ispras.ru>
This file is part of the GNU Offloading and Multi Processing Library
(libgomp).
Libgomp is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* This is an NVPTX specific implementation of a mutex synchronization
mechanism for libgomp. This type is private to the library. This
implementation uses atomic instructions and busy waiting. */
#ifndef GOMP_MUTEX_H
#define GOMP_MUTEX_H 1
typedef int gomp_mutex_t;
#define GOMP_MUTEX_INIT_0 1
static inline void
gomp_mutex_init (gomp_mutex_t *mutex)
{
*mutex = 0;
}
static inline void
gomp_mutex_destroy (gomp_mutex_t *mutex)
{
}
static inline void
gomp_mutex_lock (gomp_mutex_t *mutex)
{
while (__sync_lock_test_and_set (mutex, 1))
/* spin */ ;
}
static inline void
gomp_mutex_unlock (gomp_mutex_t *mutex)
{
__sync_lock_release (mutex);
}
#endif /* GOMP_MUTEX_H */
/* OpenACC Runtime Fortran wrapper routines
Copyright (C) 2014-2016 Free Software Foundation, Inc.
Contributed by Mentor Embedded.
/* Copyright (C) 2015-2016 Free Software Foundation, Inc.
Contributed by Alexander Monakov <amonakov@ispras.ru>
This file is part of the GNU Offloading and Multi Processing Library
(libgomp).
......@@ -26,15 +23,27 @@
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* Temporary hack; this will be provided by libgfortran. */
/* This is the NVPTX implementation of the thread pool management
for libgomp. This type is private to the library. */
#ifndef GOMP_POOL_H
#define GOMP_POOL_H 1
#include "libgomp.h"
/* Get the thread pool. */
static inline struct gomp_thread_pool *
gomp_get_thread_pool (struct gomp_thread *thr, unsigned nthreads)
{
/* NVPTX is running with a fixed pool of pre-started threads. */
return thr->thread_pool;
}
extern void _gfortran_abort (void);
static inline void
gomp_release_thread_pool (struct gomp_thread_pool *pool)
{
/* Do nothing. */
}
__asm__ ("// BEGIN GLOBAL FUNCTION DECL: _gfortran_abort\n"
".visible .func _gfortran_abort;\n"
"// BEGIN GLOBAL FUNCTION DEF: _gfortran_abort\n"
".visible .func _gfortran_abort\n"
"{\n"
"trap;\n"
"ret;\n"
"}\n");
#endif /* GOMP_POOL_H */
/* Empty stub for omp task priority support. */
/* Copyright (C) 2015-2016 Free Software Foundation, Inc.
Contributed by Alexander Monakov <amonakov@ispras.ru>
This file is part of the GNU Offloading and Multi Processing Library
(libgomp).
Libgomp is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* This file contains system specific routines related to counting
online processors and dynamic load balancing. */
#include "libgomp.h"
unsigned
gomp_dynamic_max_threads (void)
{
return gomp_icv (false)->nthreads_var;
}
int
omp_get_num_procs (void)
{
return gomp_icv (false)->nthreads_var;
}
/* Copyright (C) 2015-2016 Free Software Foundation, Inc.
Contributed by Alexander Monakov <amonakov@ispras.ru>
This file is part of the GNU Offloading and Multi Processing Library
(libgomp).
Libgomp is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* This is an NVPTX specific implementation of a mutex synchronization
mechanism for libgomp. This type is private to the library. This
implementation uses atomic instructions and busy waiting.
A ptrlock has four states:
0/NULL Initial
1 Owned by me, I get to write a pointer to ptrlock.
2 Some thread is waiting on the ptrlock.
>2 Ptrlock contains a valid pointer.
It is not valid to gain the ptrlock and then write a NULL to it. */
#ifndef GOMP_PTRLOCK_H
#define GOMP_PTRLOCK_H 1
typedef void *gomp_ptrlock_t;
static inline void gomp_ptrlock_init (gomp_ptrlock_t *ptrlock, void *ptr)
{
*ptrlock = ptr;
}
static inline void *gomp_ptrlock_get (gomp_ptrlock_t *ptrlock)
{
uintptr_t v = (uintptr_t) __atomic_load_n (ptrlock, MEMMODEL_ACQUIRE);
if (v > 2)
return (void *) v;
if (v == 0
&& __atomic_compare_exchange_n (ptrlock, &v, 1, false,
MEMMODEL_ACQUIRE, MEMMODEL_ACQUIRE))
return NULL;
while (v == 1)
v = (uintptr_t) __atomic_load_n (ptrlock, MEMMODEL_ACQUIRE);
return (void *) v;
}
static inline void gomp_ptrlock_set (gomp_ptrlock_t *ptrlock, void *ptr)
{
__atomic_store_n (ptrlock, ptr, MEMMODEL_RELEASE);
}
static inline void gomp_ptrlock_destroy (gomp_ptrlock_t *ptrlock)
{
}
#endif /* GOMP_PTRLOCK_H */
/* Copyright (C) 2015-2016 Free Software Foundation, Inc.
Contributed by Alexander Monakov <amonakov@ispras.ru>
This file is part of the GNU Offloading and Multi Processing Library
(libgomp).
Libgomp is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* This is an NVPTX specific implementation of a semaphore synchronization
mechanism for libgomp. This type is private to the library. This
semaphore implementation uses atomic instructions and busy waiting. */
#ifndef GOMP_SEM_H
#define GOMP_SEM_H 1
typedef int gomp_sem_t;
static inline void
gomp_sem_init (gomp_sem_t *sem, int value)
{
*sem = value;
}
static inline void
gomp_sem_destroy (gomp_sem_t *sem)
{
}
static inline void
gomp_sem_wait (gomp_sem_t *sem)
{
int count = __atomic_load_n (sem, MEMMODEL_ACQUIRE);
for (;;)
{
while (count == 0)
count = __atomic_load_n (sem, MEMMODEL_ACQUIRE);
if (__atomic_compare_exchange_n (sem, &count, count - 1, false,
MEMMODEL_ACQUIRE, MEMMODEL_RELAXED))
return;
}
}
static inline void
gomp_sem_post (gomp_sem_t *sem)
{
(void) __atomic_add_fetch (sem, 1, MEMMODEL_RELEASE);
}
#endif /* GOMP_SEM_H */
/* Copyright (C) 2015-2016 Free Software Foundation, Inc.
Contributed by Alexander Monakov <amonakov@ispras.ru>
This file is part of the GNU Offloading and Multi Processing Library
(libgomp).
Libgomp is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* This is a simplified barrier that is suitable for thread pool
synchronizaton. Only a subset of full barrier API (bar.h) is exposed.
Here in the NVPTX-specific implementation, we expect that thread pool
corresponds to a PTX CTA (thread block). */
#ifndef GOMP_SIMPLE_BARRIER_H
#define GOMP_SIMPLE_BARRIER_H 1
typedef struct
{
unsigned count;
} gomp_simple_barrier_t;
static inline void
gomp_simple_barrier_init (gomp_simple_barrier_t *bar, unsigned count)
{
bar->count = count * 32;
}
/* Unused on NVPTX.
static inline void
gomp_simple_barrier_reinit (gomp_simple_barrier_t *bar, unsigned count)
{
bar->count = count * 32;
}
*/
static inline void
gomp_simple_barrier_destroy (gomp_simple_barrier_t *bar)
{
}
static inline void
gomp_simple_barrier_wait (gomp_simple_barrier_t *bar)
{
asm volatile ("bar.sync 0, %0;" : : "r" (bar->count) : "memory");
}
static inline void
gomp_simple_barrier_wait_last (gomp_simple_barrier_t *bar)
{
asm volatile ("bar.arrive 0, %0;" : : "r" (bar->count) : "memory");
}
#endif /* GOMP_SIMPLE_BARRIER_H */
/* Copyright (C) 2013-2016 Free Software Foundation, Inc.
Contributed by Jakub Jelinek <jakub@redhat.com>.
This file is part of the GNU Offloading and Multi Processing Library
(libgomp).
Libgomp is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "libgomp.h"
#include <limits.h>
void
GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
{
if (thread_limit)
{
struct gomp_task_icv *icv = gomp_icv (true);
icv->thread_limit_var
= thread_limit > INT_MAX ? UINT_MAX : thread_limit;
}
unsigned int num_blocks, block_id;
asm ("mov.u32 %0, %%nctaid.x;" : "=r" (num_blocks));
asm ("mov.u32 %0, %%ctaid.x;" : "=r" (block_id));
if (!num_teams || num_teams >= num_blocks)
num_teams = num_blocks;
else if (block_id >= num_teams)
{
gomp_free_thread (nvptx_thrs);
asm ("exit;");
}
gomp_num_teams_var = num_teams - 1;
}
/* Copyright (C) 2015-2016 Free Software Foundation, Inc.
Contributed by Alexander Monakov <amonakov@ispras.ru>.
This file is part of the GNU Offloading and Multi Processing Library
(libgomp).
Libgomp is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* This file handles the maintainence of tasks in response to task
creation and termination. */
#ifdef __nvptx_softstack__
#include "libgomp.h"
/* NVPTX is an accelerator-only target, so this should never be called. */
bool
gomp_target_task_fn (void *data)
{
__builtin_unreachable ();
}
#include "../../task.c"
#endif
/* Copyright (C) 2015-2016 Free Software Foundation, Inc.
Contributed by Alexander Monakov <amonakov@ispras.ru>
This file is part of the GNU Offloading and Multi Processing Library
(libgomp).
Libgomp is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* This file handles maintainance of threads on NVPTX. */
#if defined __nvptx_softstack__ && defined __nvptx_unisimt__
#include "libgomp.h"
#include <stdlib.h>
#include <string.h>
struct gomp_thread *nvptx_thrs __attribute__((shared,nocommon));
static void gomp_thread_start (struct gomp_thread_pool *);
/* This externally visible function handles target region entry. It
sets up a per-team thread pool and transfers control by calling FN (FN_DATA)
in the master thread or gomp_thread_start in other threads.
The name of this function is part of the interface with the compiler: for
each target region, GCC emits a PTX .kernel function that sets up soft-stack
and uniform-simt state and calls this function, passing in FN the original
function outlined for the target region. */
void
gomp_nvptx_main (void (*fn) (void *), void *fn_data)
{
int tid, ntids;
asm ("mov.u32 %0, %%tid.y;" : "=r" (tid));
asm ("mov.u32 %0, %%ntid.y;" : "=r" (ntids));
if (tid == 0)
{
gomp_global_icv.nthreads_var = ntids;
/* Starting additional threads is not supported. */
gomp_global_icv.dyn_var = true;
nvptx_thrs = alloca (ntids * sizeof (*nvptx_thrs));
memset (nvptx_thrs, 0, ntids * sizeof (*nvptx_thrs));
struct gomp_thread_pool *pool = alloca (sizeof (*pool));
pool->threads = alloca (ntids * sizeof (*pool->threads));
for (tid = 0; tid < ntids; tid++)
pool->threads[tid] = nvptx_thrs + tid;
pool->threads_size = ntids;
pool->threads_used = ntids;
pool->threads_busy = 1;
pool->last_team = NULL;
gomp_simple_barrier_init (&pool->threads_dock, ntids);
nvptx_thrs[0].thread_pool = pool;
asm ("bar.sync 0;");
fn (fn_data);
gomp_free_thread (nvptx_thrs);
}
else
{
asm ("bar.sync 0;");
gomp_thread_start (nvptx_thrs[0].thread_pool);
}
}
/* This function contains the idle loop in which a thread waits
to be called up to become part of a team. */
static void
gomp_thread_start (struct gomp_thread_pool *pool)
{
struct gomp_thread *thr = gomp_thread ();
gomp_sem_init (&thr->release, 0);
thr->thread_pool = pool;
do
{
gomp_simple_barrier_wait (&pool->threads_dock);
if (!thr->fn)
continue;
thr->fn (thr->data);
thr->fn = NULL;
struct gomp_task *task = thr->task;
gomp_team_barrier_wait_final (&thr->ts.team->barrier);
gomp_finish_task (task);
}
/* Work around an NVIDIA driver bug: when generating sm_50 machine code,
it can trash stack pointer R1 in loops lacking exit edges. Add a cheap
artificial exit that the driver would not be able to optimize out. */
while (nvptx_thrs);
}
/* Launch a team. */
void
gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
unsigned flags, struct gomp_team *team)
{
struct gomp_thread *thr, *nthr;
struct gomp_task *task;
struct gomp_task_icv *icv;
struct gomp_thread_pool *pool;
unsigned long nthreads_var;
thr = gomp_thread ();
pool = thr->thread_pool;
task = thr->task;
icv = task ? &task->icv : &gomp_global_icv;
/* Always save the previous state, even if this isn't a nested team.
In particular, we should save any work share state from an outer
orphaned work share construct. */
team->prev_ts = thr->ts;
thr->ts.team = team;
thr->ts.team_id = 0;
++thr->ts.level;
if (nthreads > 1)
++thr->ts.active_level;
thr->ts.work_share = &team->work_shares[0];
thr->ts.last_work_share = NULL;
thr->ts.single_count = 0;
thr->ts.static_trip = 0;
thr->task = &team->implicit_task[0];
nthreads_var = icv->nthreads_var;
gomp_init_task (thr->task, task, icv);
team->implicit_task[0].icv.nthreads_var = nthreads_var;
if (nthreads == 1)
return;
/* Release existing idle threads. */
for (unsigned i = 1; i < nthreads; ++i)
{
nthr = pool->threads[i];
nthr->ts.team = team;
nthr->ts.work_share = &team->work_shares[0];
nthr->ts.last_work_share = NULL;
nthr->ts.team_id = i;
nthr->ts.level = team->prev_ts.level + 1;
nthr->ts.active_level = thr->ts.active_level;
nthr->ts.single_count = 0;
nthr->ts.static_trip = 0;
nthr->task = &team->implicit_task[i];
gomp_init_task (nthr->task, task, icv);
team->implicit_task[i].icv.nthreads_var = nthreads_var;
nthr->fn = fn;
nthr->data = data;
team->ordered_release[i] = &nthr->release;
}
gomp_simple_barrier_wait (&pool->threads_dock);
}
#include "../../team.c"
#endif
/* Copyright (C) 2015-2016 Free Software Foundation, Inc.
Contributed by Dmitry Melnik <dm@ispras.ru>
This file is part of the GNU Offloading and Multi Processing Library
(libgomp).
Libgomp is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* This file implements timer routines for NVPTX. It uses the %clock64 cycle
counter. */
#include "libgomp.h"
/* This is set from host in plugin-nvptx.c. */
double __nvptx_clocktick = 0;
double
omp_get_wtime (void)
{
uint64_t clock;
asm ("mov.u64 %0, %%clock64;" : "=r" (clock));
return clock * __nvptx_clocktick;
}
double
omp_get_wtick (void)
{
return __nvptx_clocktick;
}
ialias (omp_get_wtime)
ialias (omp_get_wtick)
/* Copyright (C) 2015-2016 Free Software Foundation, Inc.
Contributed by Alexander Monakov <amonakov@ispras.ru>
This file is part of the GNU Offloading and Multi Processing Library
(libgomp).
Libgomp is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* This is a simplified barrier that is suitable for thread pool
synchronizaton. Only a subset of full barrier API (bar.h) is exposed. */
#ifndef GOMP_SIMPLE_BARRIER_H
#define GOMP_SIMPLE_BARRIER_H 1
#include "bar.h"
typedef struct
{
gomp_barrier_t bar;
} gomp_simple_barrier_t;
static inline void
gomp_simple_barrier_init (gomp_simple_barrier_t *bar, unsigned count)
{
gomp_barrier_init (&bar->bar, count);
}
static inline void
gomp_simple_barrier_reinit (gomp_simple_barrier_t *bar, unsigned count)
{
gomp_barrier_reinit (&bar->bar, count);
}
static inline void
gomp_simple_barrier_destroy (gomp_simple_barrier_t *bar)
{
gomp_barrier_destroy (&bar->bar);
}
static inline void
gomp_simple_barrier_wait (gomp_simple_barrier_t *bar)
{
gomp_barrier_wait (&bar->bar);
}
static inline void
gomp_simple_barrier_wait_last (gomp_simple_barrier_t *bar)
{
gomp_barrier_wait_last (&bar->bar);
}
#endif /* GOMP_SIMPLE_BARRIER_H */
......@@ -15070,6 +15070,7 @@ case "$host" in
;;
nvptx*-*-*)
# NVPTX does not support Pthreads, has its own code replacement.
libgomp_use_pthreads=no
;;
*)
# Check to see if -pthread or -lpthread is needed. Prefer the former.
......@@ -15115,6 +15116,12 @@ rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
esac
if test x$libgomp_use_pthreads != xno; then
$as_echo "#define LIBGOMP_USE_PTHREADS 1" >>confdefs.h
fi
# Plugins for offload execution, configure.ac fragment. -*- mode: autoconf -*-
#
# Copyright (C) 2014-2016 Free Software Foundation, Inc.
......
......@@ -181,6 +181,7 @@ case "$host" in
;;
nvptx*-*-*)
# NVPTX does not support Pthreads, has its own code replacement.
libgomp_use_pthreads=no
;;
*)
# Check to see if -pthread or -lpthread is needed. Prefer the former.
......@@ -202,6 +203,11 @@ case "$host" in
[AC_MSG_ERROR([Pthreads are required to build libgomp])])])
esac
if test x$libgomp_use_pthreads != xno; then
AC_DEFINE(LIBGOMP_USE_PTHREADS, 1,
[Define to 1 if libgomp should use POSIX threads.])
fi
m4_include([plugin/configfrag.ac])
# Check for functions needed.
......
......@@ -115,33 +115,11 @@ GOMP_critical_name_end (void **pptr)
gomp_mutex_unlock (plock);
}
/* This mutex is used when atomic operations don't exist for the target
in the mode requested. The result is not globally atomic, but works so
long as all parallel references are within #pragma omp atomic directives.
According to responses received from omp@openmp.org, appears to be within
spec. Which makes sense, since that's how several other compilers
handle this situation as well. */
static gomp_mutex_t atomic_lock;
void
GOMP_atomic_start (void)
{
gomp_mutex_lock (&atomic_lock);
}
void
GOMP_atomic_end (void)
{
gomp_mutex_unlock (&atomic_lock);
}
#if !GOMP_MUTEX_INIT_0
static void __attribute__((constructor))
initialize_critical (void)
{
gomp_mutex_init (&default_lock);
gomp_mutex_init (&atomic_lock);
#ifndef HAVE_SYNC_BUILTINS
gomp_mutex_init (&create_lock_lock);
#endif
......
......@@ -23,8 +23,8 @@
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* This file defines the OpenMP internal control variables, and arranges
for them to be initialized from environment variables at startup. */
/* This file arranges for OpenMP internal control variables to be initialized
from environment variables at startup. */
#include "libgomp.h"
#include "libgomp_f.h"
......@@ -55,35 +55,6 @@
# define strtoull(ptr, eptr, base) strtoul (ptr, eptr, base)
#endif
struct gomp_task_icv gomp_global_icv = {
.nthreads_var = 1,
.thread_limit_var = UINT_MAX,
.run_sched_var = GFS_DYNAMIC,
.run_sched_chunk_size = 1,
.default_device_var = 0,
.dyn_var = false,
.nest_var = false,
.bind_var = omp_proc_bind_false,
.target_data = NULL
};
unsigned long gomp_max_active_levels_var = INT_MAX;
bool gomp_cancel_var = false;
int gomp_max_task_priority_var = 0;
#ifndef HAVE_SYNC_BUILTINS
gomp_mutex_t gomp_managed_threads_lock;
#endif
unsigned long gomp_available_cpus = 1, gomp_managed_threads = 1;
unsigned long long gomp_spin_count_var, gomp_throttled_spin_count_var;
unsigned long *gomp_nthreads_var_list, gomp_nthreads_var_list_len;
char *gomp_bind_var_list;
unsigned long gomp_bind_var_list_len;
void **gomp_places_list;
unsigned long gomp_places_list_len;
int gomp_debug_var;
char *goacc_device_type;
int goacc_device_num;
/* Parse the OMP_SCHEDULE environment variable. */
static void
......@@ -1302,240 +1273,3 @@ initialize_env (void)
goacc_runtime_initialize ();
}
/* The public OpenMP API routines that access these variables. */
void
omp_set_num_threads (int n)
{
struct gomp_task_icv *icv = gomp_icv (true);
icv->nthreads_var = (n > 0 ? n : 1);
}
void
omp_set_dynamic (int val)
{
struct gomp_task_icv *icv = gomp_icv (true);
icv->dyn_var = val;
}
int
omp_get_dynamic (void)
{
struct gomp_task_icv *icv = gomp_icv (false);
return icv->dyn_var;
}
void
omp_set_nested (int val)
{
struct gomp_task_icv *icv = gomp_icv (true);
icv->nest_var = val;
}
int
omp_get_nested (void)
{
struct gomp_task_icv *icv = gomp_icv (false);
return icv->nest_var;
}
void
omp_set_schedule (omp_sched_t kind, int chunk_size)
{
struct gomp_task_icv *icv = gomp_icv (true);
switch (kind)
{
case omp_sched_static:
if (chunk_size < 1)
chunk_size = 0;
icv->run_sched_chunk_size = chunk_size;
break;
case omp_sched_dynamic:
case omp_sched_guided:
if (chunk_size < 1)
chunk_size = 1;
icv->run_sched_chunk_size = chunk_size;
break;
case omp_sched_auto:
break;
default:
return;
}
icv->run_sched_var = kind;
}
void
omp_get_schedule (omp_sched_t *kind, int *chunk_size)
{
struct gomp_task_icv *icv = gomp_icv (false);
*kind = icv->run_sched_var;
*chunk_size = icv->run_sched_chunk_size;
}
int
omp_get_max_threads (void)
{
struct gomp_task_icv *icv = gomp_icv (false);
return icv->nthreads_var;
}
int
omp_get_thread_limit (void)
{
struct gomp_task_icv *icv = gomp_icv (false);
return icv->thread_limit_var > INT_MAX ? INT_MAX : icv->thread_limit_var;
}
void
omp_set_max_active_levels (int max_levels)
{
if (max_levels >= 0)
gomp_max_active_levels_var = max_levels;
}
int
omp_get_max_active_levels (void)
{
return gomp_max_active_levels_var;
}
int
omp_get_cancellation (void)
{
return gomp_cancel_var;
}
int
omp_get_max_task_priority (void)
{
return gomp_max_task_priority_var;
}
omp_proc_bind_t
omp_get_proc_bind (void)
{
struct gomp_task_icv *icv = gomp_icv (false);
return icv->bind_var;
}
void
omp_set_default_device (int device_num)
{
struct gomp_task_icv *icv = gomp_icv (true);
icv->default_device_var = device_num >= 0 ? device_num : 0;
}
int
omp_get_default_device (void)
{
struct gomp_task_icv *icv = gomp_icv (false);
return icv->default_device_var;
}
int
omp_get_num_devices (void)
{
return gomp_get_num_devices ();
}
int
omp_get_num_teams (void)
{
/* Hardcoded to 1 on host, MIC, HSAIL? Maybe variable on PTX. */
return 1;
}
int
omp_get_team_num (void)
{
/* Hardcoded to 0 on host, MIC, HSAIL? Maybe variable on PTX. */
return 0;
}
int
omp_is_initial_device (void)
{
/* Hardcoded to 1 on host, should be 0 on MIC, HSAIL, PTX. */
return 1;
}
int
omp_get_initial_device (void)
{
return GOMP_DEVICE_HOST_FALLBACK;
}
int
omp_get_num_places (void)
{
return gomp_places_list_len;
}
int
omp_get_place_num (void)
{
if (gomp_places_list == NULL)
return -1;
struct gomp_thread *thr = gomp_thread ();
if (thr->place == 0)
gomp_init_affinity ();
return (int) thr->place - 1;
}
int
omp_get_partition_num_places (void)
{
if (gomp_places_list == NULL)
return 0;
struct gomp_thread *thr = gomp_thread ();
if (thr->place == 0)
gomp_init_affinity ();
return thr->ts.place_partition_len;
}
void
omp_get_partition_place_nums (int *place_nums)
{
if (gomp_places_list == NULL)
return;
struct gomp_thread *thr = gomp_thread ();
if (thr->place == 0)
gomp_init_affinity ();
unsigned int i;
for (i = 0; i < thr->ts.place_partition_len; i++)
*place_nums++ = thr->ts.place_partition_off + i;
}
ialias (omp_set_dynamic)
ialias (omp_set_nested)
ialias (omp_set_num_threads)
ialias (omp_get_dynamic)
ialias (omp_get_nested)
ialias (omp_set_schedule)
ialias (omp_get_schedule)
ialias (omp_get_max_threads)
ialias (omp_get_thread_limit)
ialias (omp_set_max_active_levels)
ialias (omp_get_max_active_levels)
ialias (omp_get_cancellation)
ialias (omp_get_proc_bind)
ialias (omp_set_default_device)
ialias (omp_get_default_device)
ialias (omp_get_num_devices)
ialias (omp_get_num_teams)
ialias (omp_get_team_num)
ialias (omp_is_initial_device)
ialias (omp_get_initial_device)
ialias (omp_get_max_task_priority)
ialias (omp_get_num_places)
ialias (omp_get_place_num)
ialias (omp_get_partition_num_places)
ialias (omp_get_partition_place_nums)
/* Copyright (C) 2005-2016 Free Software Foundation, Inc.
Contributed by Richard Henderson <rth@redhat.com>.
This file is part of the GNU Offloading and Multi Processing Library
(libgomp).
Libgomp is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* This file defines OpenMP API entry points that accelerator targets are
expected to replace. */
#include "libgomp.h"
void
omp_set_default_device (int device_num)
{
struct gomp_task_icv *icv = gomp_icv (true);
icv->default_device_var = device_num >= 0 ? device_num : 0;
}
int
omp_get_default_device (void)
{
struct gomp_task_icv *icv = gomp_icv (false);
return icv->default_device_var;
}
int
omp_get_num_devices (void)
{
return gomp_get_num_devices ();
}
int
omp_get_num_teams (void)
{
/* Hardcoded to 1 on host, MIC, HSAIL? Maybe variable on PTX. */
return 1;
}
int
omp_get_team_num (void)
{
/* Hardcoded to 0 on host, MIC, HSAIL? Maybe variable on PTX. */
return 0;
}
int
omp_is_initial_device (void)
{
/* Hardcoded to 1 on host, should be 0 on MIC, HSAIL, PTX. */
return 1;
}
ialias (omp_set_default_device)
ialias (omp_get_default_device)
ialias (omp_get_num_devices)
ialias (omp_get_num_teams)
ialias (omp_get_team_num)
ialias (omp_is_initial_device)
/* Copyright (C) 2005-2016 Free Software Foundation, Inc.
Contributed by Richard Henderson <rth@redhat.com>.
This file is part of the GNU Offloading and Multi Processing Library
(libgomp).
Libgomp is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* This file defines the OpenMP internal control variables and associated
OpenMP API entry points. */
#include "libgomp.h"
#include "gomp-constants.h"
#include <limits.h>
struct gomp_task_icv gomp_global_icv = {
.nthreads_var = 1,
.thread_limit_var = UINT_MAX,
.run_sched_var = GFS_DYNAMIC,
.run_sched_chunk_size = 1,
.default_device_var = 0,
.dyn_var = false,
.nest_var = false,
.bind_var = omp_proc_bind_false,
.target_data = NULL
};
unsigned long gomp_max_active_levels_var = INT_MAX;
bool gomp_cancel_var = false;
int gomp_max_task_priority_var = 0;
#ifndef HAVE_SYNC_BUILTINS
gomp_mutex_t gomp_managed_threads_lock;
#endif
unsigned long gomp_available_cpus = 1, gomp_managed_threads = 1;
unsigned long long gomp_spin_count_var, gomp_throttled_spin_count_var;
unsigned long *gomp_nthreads_var_list, gomp_nthreads_var_list_len;
char *gomp_bind_var_list;
unsigned long gomp_bind_var_list_len;
void **gomp_places_list;
unsigned long gomp_places_list_len;
int gomp_debug_var;
unsigned int gomp_num_teams_var;
char *goacc_device_type;
int goacc_device_num;
void
omp_set_num_threads (int n)
{
struct gomp_task_icv *icv = gomp_icv (true);
icv->nthreads_var = (n > 0 ? n : 1);
}
void
omp_set_dynamic (int val)
{
struct gomp_task_icv *icv = gomp_icv (true);
icv->dyn_var = val;
}
int
omp_get_dynamic (void)
{
struct gomp_task_icv *icv = gomp_icv (false);
return icv->dyn_var;
}
void
omp_set_nested (int val)
{
struct gomp_task_icv *icv = gomp_icv (true);
icv->nest_var = val;
}
int
omp_get_nested (void)
{
struct gomp_task_icv *icv = gomp_icv (false);
return icv->nest_var;
}
void
omp_set_schedule (omp_sched_t kind, int chunk_size)
{
struct gomp_task_icv *icv = gomp_icv (true);
switch (kind)
{
case omp_sched_static:
if (chunk_size < 1)
chunk_size = 0;
icv->run_sched_chunk_size = chunk_size;
break;
case omp_sched_dynamic:
case omp_sched_guided:
if (chunk_size < 1)
chunk_size = 1;
icv->run_sched_chunk_size = chunk_size;
break;
case omp_sched_auto:
break;
default:
return;
}
icv->run_sched_var = kind;
}
void
omp_get_schedule (omp_sched_t *kind, int *chunk_size)
{
struct gomp_task_icv *icv = gomp_icv (false);
*kind = icv->run_sched_var;
*chunk_size = icv->run_sched_chunk_size;
}
int
omp_get_max_threads (void)
{
struct gomp_task_icv *icv = gomp_icv (false);
return icv->nthreads_var;
}
int
omp_get_thread_limit (void)
{
struct gomp_task_icv *icv = gomp_icv (false);
return icv->thread_limit_var > INT_MAX ? INT_MAX : icv->thread_limit_var;
}
void
omp_set_max_active_levels (int max_levels)
{
if (max_levels >= 0)
gomp_max_active_levels_var = max_levels;
}
int
omp_get_max_active_levels (void)
{
return gomp_max_active_levels_var;
}
int
omp_get_cancellation (void)
{
return gomp_cancel_var;
}
int
omp_get_max_task_priority (void)
{
return gomp_max_task_priority_var;
}
omp_proc_bind_t
omp_get_proc_bind (void)
{
struct gomp_task_icv *icv = gomp_icv (false);
return icv->bind_var;
}
int
omp_get_initial_device (void)
{
return GOMP_DEVICE_HOST_FALLBACK;
}
int
omp_get_num_places (void)
{
return gomp_places_list_len;
}
int
omp_get_place_num (void)
{
if (gomp_places_list == NULL)
return -1;
struct gomp_thread *thr = gomp_thread ();
if (thr->place == 0)
gomp_init_affinity ();
return (int) thr->place - 1;
}
int
omp_get_partition_num_places (void)
{
if (gomp_places_list == NULL)
return 0;
struct gomp_thread *thr = gomp_thread ();
if (thr->place == 0)
gomp_init_affinity ();
return thr->ts.place_partition_len;
}
void
omp_get_partition_place_nums (int *place_nums)
{
if (gomp_places_list == NULL)
return;
struct gomp_thread *thr = gomp_thread ();
if (thr->place == 0)
gomp_init_affinity ();
unsigned int i;
for (i = 0; i < thr->ts.place_partition_len; i++)
*place_nums++ = thr->ts.place_partition_off + i;
}
ialias (omp_set_dynamic)
ialias (omp_set_nested)
ialias (omp_set_num_threads)
ialias (omp_get_dynamic)
ialias (omp_get_nested)
ialias (omp_set_schedule)
ialias (omp_get_schedule)
ialias (omp_get_max_threads)
ialias (omp_get_thread_limit)
ialias (omp_set_max_active_levels)
ialias (omp_get_max_active_levels)
ialias (omp_get_cancellation)
ialias (omp_get_proc_bind)
ialias (omp_get_initial_device)
ialias (omp_get_max_task_priority)
ialias (omp_get_num_places)
ialias (omp_get_place_num)
ialias (omp_get_partition_num_places)
ialias (omp_get_partition_place_nums)
......@@ -45,7 +45,9 @@
#include "gstdint.h"
#include "libgomp-plugin.h"
#ifdef HAVE_PTHREAD_H
#include <pthread.h>
#endif
#include <stdbool.h>
#include <stdlib.h>
#include <stdarg.h>
......@@ -122,6 +124,7 @@ struct htab;
#include "sem.h"
#include "mutex.h"
#include "bar.h"
#include "simple-bar.h"
#include "ptrlock.h"
......@@ -360,6 +363,7 @@ extern char *gomp_bind_var_list;
extern unsigned long gomp_bind_var_list_len;
extern void **gomp_places_list;
extern unsigned long gomp_places_list_len;
extern unsigned int gomp_num_teams_var;
extern int gomp_debug_var;
extern int goacc_device_num;
extern char *goacc_device_type;
......@@ -626,8 +630,8 @@ struct gomp_thread_pool
/* Number of threads running in this contention group. */
unsigned long threads_busy;
/* This barrier holds and releases threads waiting in threads. */
gomp_barrier_t threads_dock;
/* This barrier holds and releases threads waiting in thread pools. */
gomp_simple_barrier_t threads_dock;
};
enum gomp_cancel_kind
......@@ -642,7 +646,15 @@ enum gomp_cancel_kind
/* ... and here is that TLS data. */
#if defined HAVE_TLS || defined USE_EMUTLS
#if defined __nvptx__
extern struct gomp_thread *nvptx_thrs __attribute__((shared));
static inline struct gomp_thread *gomp_thread (void)
{
int tid;
asm ("mov.u32 %0, %%tid.y;" : "=r" (tid));
return nvptx_thrs + tid;
}
#elif defined HAVE_TLS || defined USE_EMUTLS
extern __thread struct gomp_thread gomp_tls_data;
static inline struct gomp_thread *gomp_thread (void)
{
......@@ -671,17 +683,21 @@ static inline struct gomp_task_icv *gomp_icv (bool write)
return &gomp_global_icv;
}
#ifdef LIBGOMP_USE_PTHREADS
/* The attributes to be used during thread creation. */
extern pthread_attr_t gomp_thread_attr;
extern pthread_key_t gomp_thread_destructor;
#endif
/* Function prototypes. */
/* affinity.c */
extern void gomp_init_affinity (void);
#ifdef LIBGOMP_USE_PTHREADS
extern void gomp_init_thread_affinity (pthread_attr_t *, unsigned int);
#endif
extern void **gomp_affinity_alloc (unsigned long, bool);
extern void gomp_affinity_init_place (void *);
extern bool gomp_affinity_add_cpus (void *, unsigned long, unsigned long,
......
/* Copyright (C) 2005-2016 Free Software Foundation, Inc.
Contributed by Richard Henderson <rth@redhat.com>.
This file is part of the GNU Offloading and Multi Processing Library
(libgomp).
Libgomp is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* This is a generic implementation of the public OpenMP locking primitives in
terms of internal gomp_mutex_t. It is not meant to be compiled on its own.
It is #include'd from config/{linux,nvptx}/lock.c. */
#include <string.h>
#include "libgomp.h"
/* The internal gomp_mutex_t and the external non-recursive omp_lock_t
have the same form. Re-use it. */
void
gomp_init_lock_30 (omp_lock_t *lock)
{
gomp_mutex_init (lock);
}
void
gomp_destroy_lock_30 (omp_lock_t *lock)
{
gomp_mutex_destroy (lock);
}
void
gomp_set_lock_30 (omp_lock_t *lock)
{
gomp_mutex_lock (lock);
}
void
gomp_unset_lock_30 (omp_lock_t *lock)
{
gomp_mutex_unlock (lock);
}
int
gomp_test_lock_30 (omp_lock_t *lock)
{
int oldval = 0;
return __atomic_compare_exchange_n (lock, &oldval, 1, false,
MEMMODEL_ACQUIRE, MEMMODEL_RELAXED);
}
void
gomp_init_nest_lock_30 (omp_nest_lock_t *lock)
{
memset (lock, '\0', sizeof (*lock));
}
void
gomp_destroy_nest_lock_30 (omp_nest_lock_t *lock)
{
}
void
gomp_set_nest_lock_30 (omp_nest_lock_t *lock)
{
void *me = gomp_icv (true);
if (lock->owner != me)
{
gomp_mutex_lock (&lock->lock);
lock->owner = me;
}
lock->count++;
}
void
gomp_unset_nest_lock_30 (omp_nest_lock_t *lock)
{
if (--lock->count == 0)
{
lock->owner = NULL;
gomp_mutex_unlock (&lock->lock);
}
}
int
gomp_test_nest_lock_30 (omp_nest_lock_t *lock)
{
void *me = gomp_icv (true);
int oldval;
if (lock->owner == me)
return ++lock->count;
oldval = 0;
if (__atomic_compare_exchange_n (&lock->lock, &oldval, 1, false,
MEMMODEL_ACQUIRE, MEMMODEL_RELAXED))
{
lock->owner = me;
lock->count = 1;
return 1;
}
return 0;
}
......@@ -31,6 +31,7 @@
#include <stdlib.h>
#include <string.h>
#ifdef LIBGOMP_USE_PTHREADS
/* This attribute contains PTHREAD_CREATE_DETACHED. */
pthread_attr_t gomp_thread_attr;
......@@ -110,7 +111,7 @@ gomp_thread_start (void *xdata)
{
pool->threads[thr->ts.team_id] = thr;
gomp_barrier_wait (&pool->threads_dock);
gomp_simple_barrier_wait (&pool->threads_dock);
do
{
struct gomp_team *team = thr->ts.team;
......@@ -120,7 +121,7 @@ gomp_thread_start (void *xdata)
gomp_team_barrier_wait_final (&team->barrier);
gomp_finish_task (task);
gomp_barrier_wait (&pool->threads_dock);
gomp_simple_barrier_wait (&pool->threads_dock);
local_fn = thr->fn;
local_data = thr->data;
......@@ -134,6 +135,7 @@ gomp_thread_start (void *xdata)
thr->task = NULL;
return NULL;
}
#endif
static inline struct gomp_team *
get_last_team (unsigned nthreads)
......@@ -224,11 +226,17 @@ gomp_free_pool_helper (void *thread_pool)
struct gomp_thread *thr = gomp_thread ();
struct gomp_thread_pool *pool
= (struct gomp_thread_pool *) thread_pool;
gomp_barrier_wait_last (&pool->threads_dock);
gomp_simple_barrier_wait_last (&pool->threads_dock);
gomp_sem_destroy (&thr->release);
thr->thread_pool = NULL;
thr->task = NULL;
#ifdef LIBGOMP_USE_PTHREADS
pthread_exit (NULL);
#elif defined(__nvptx__)
asm ("exit;");
#else
#error gomp_free_pool_helper must terminate the thread
#endif
}
/* Free a thread pool and release its threads. */
......@@ -250,12 +258,12 @@ gomp_free_thread (void *arg __attribute__((unused)))
nthr->data = pool;
}
/* This barrier undocks threads docked on pool->threads_dock. */
gomp_barrier_wait (&pool->threads_dock);
gomp_simple_barrier_wait (&pool->threads_dock);
/* And this waits till all threads have called gomp_barrier_wait_last
in gomp_free_pool_helper. */
gomp_barrier_wait (&pool->threads_dock);
gomp_simple_barrier_wait (&pool->threads_dock);
/* Now it is safe to destroy the barrier and free the pool. */
gomp_barrier_destroy (&pool->threads_dock);
gomp_simple_barrier_destroy (&pool->threads_dock);
#ifdef HAVE_SYNC_BUILTINS
__sync_fetch_and_add (&gomp_managed_threads,
......@@ -266,10 +274,12 @@ gomp_free_thread (void *arg __attribute__((unused)))
gomp_mutex_unlock (&gomp_managed_threads_lock);
#endif
}
free (pool->threads);
if (pool->last_team)
free_team (pool->last_team);
#ifndef __nvptx__
free (pool->threads);
free (pool);
#endif
thr->thread_pool = NULL;
}
if (thr->ts.level == 0 && __builtin_expect (thr->ts.team != NULL, 0))
......@@ -284,6 +294,7 @@ gomp_free_thread (void *arg __attribute__((unused)))
/* Launch a team. */
#ifdef LIBGOMP_USE_PTHREADS
void
gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
unsigned flags, struct gomp_team *team)
......@@ -429,7 +440,7 @@ gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
else if (old_threads_used == 0)
{
n = 0;
gomp_barrier_init (&pool->threads_dock, nthreads);
gomp_simple_barrier_init (&pool->threads_dock, nthreads);
}
else
{
......@@ -437,7 +448,7 @@ gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
/* Increase the barrier threshold to make sure all new
threads arrive before the team is released. */
gomp_barrier_reinit (&pool->threads_dock, nthreads);
gomp_simple_barrier_reinit (&pool->threads_dock, nthreads);
}
/* Not true yet, but soon will be. We're going to release all
......@@ -670,8 +681,8 @@ gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
threads and all the threads we're going to let die
arrive before the team is released. */
if (affinity_count)
gomp_barrier_reinit (&pool->threads_dock,
nthreads + affinity_count);
gomp_simple_barrier_reinit (&pool->threads_dock,
nthreads + affinity_count);
}
}
......@@ -812,7 +823,10 @@ gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
pthread_attr_destroy (&thread_attr);
do_release:
gomp_barrier_wait (nested ? &team->barrier : &pool->threads_dock);
if (nested)
gomp_barrier_wait (&team->barrier);
else
gomp_simple_barrier_wait (&pool->threads_dock);
/* Decrease the barrier threshold to match the number of threads
that should arrive back at the end of this team. The extra
......@@ -830,7 +844,7 @@ gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
if (affinity_count)
diff = -affinity_count;
gomp_barrier_reinit (&pool->threads_dock, nthreads);
gomp_simple_barrier_reinit (&pool->threads_dock, nthreads);
#ifdef HAVE_SYNC_BUILTINS
__sync_fetch_and_add (&gomp_managed_threads, diff);
......@@ -844,6 +858,7 @@ gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
&& team->prev_ts.place_partition_len > 64)
free (affinity_thr);
}
#endif
/* Terminate the current team. This is only to be called by the master
......@@ -919,6 +934,7 @@ gomp_team_end (void)
}
}
#ifdef LIBGOMP_USE_PTHREADS
/* Constructors for this file. */
......@@ -943,6 +959,7 @@ team_destructor (void)
crashes. */
pthread_key_delete (gomp_thread_destructor);
}
#endif
struct gomp_task_icv *
gomp_new_icv (void)
......@@ -951,6 +968,8 @@ gomp_new_icv (void)
struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task));
gomp_init_task (task, NULL, &gomp_global_icv);
thr->task = task;
#ifdef LIBGOMP_USE_PTHREADS
pthread_setspecific (gomp_thread_destructor, thr);
#endif
return &task->icv;
}
......@@ -7,7 +7,7 @@ global ALWAYS_CFLAGS
set shlib_ext [get_shlib_extension]
set lang_library_path "../libgfortran/.libs"
set lang_link_flags "-lgfortran"
set lang_link_flags "-lgfortran -foffload=-lgfortran"
if [info exists lang_include_flags] then {
unset lang_include_flags
}
......
......@@ -9,7 +9,7 @@ global ALWAYS_CFLAGS
set shlib_ext [get_shlib_extension]
set lang_library_path "../libgfortran/.libs"
set lang_link_flags "-lgfortran"
set lang_link_flags "-lgfortran -foffload=-lgfortran"
if [info exists lang_include_flags] then {
unset lang_include_flags
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment