Commit fa499995 by Andrew Stubbs Committed by Andrew Stubbs

GCN libgomp port

2019-11-13  Andrew Stubbs  <ams@codesourcery.com>
	    Kwok Cheung Yeung  <kcy@codesourcery.com>
	    Julian Brown  <julian@codesourcery.com>
	    Tom de Vries  <tom@codesourcery.com>

	include/
	* gomp-constants.h (GOMP_DEVICE_GCN): Define.
	(GOMP_VERSION_GCN): Define.

	libgomp/
	* Makefile.am (libgomp_la_SOURCES): Add oacc-target.c.
	* Makefile.in: Regenerate.
	* config.h.in (PLUGIN_GCN): Add new undef.
	* config/accel/openacc.f90 (acc_device_gcn): New parameter.
	* config/gcn/affinity-fmt.c: New file.
	* config/gcn/bar.c: New file.
	* config/gcn/bar.h: New file.
	* config/gcn/doacross.h: New file.
	* config/gcn/icv-device.c: New file.
	* config/gcn/oacc-target.c: New file.
	* config/gcn/simple-bar.h: New file.
	* config/gcn/target.c: New file.
	* config/gcn/task.c: New file.
	* config/gcn/team.c: New file.
	* config/gcn/time.c: New file.
	* configure.ac: Add amdgcn*-*-*.
	* configure: Regenerate.
	* configure.tgt: Add amdgcn*-*-*.
	* libgomp-plugin.h (offload_target_type): Add OFFLOAD_TARGET_TYPE_GCN.
	* libgomp.h (gcn_thrs): Add amdgcn variant.
	(set_gcn_thrs): Likewise.
	(gomp_thread): Likewise.
	* oacc-int.h (goacc_thread): Likewise.
	* oacc-target.c: New file.
	* openacc.f90 (acc_device_gcn): New parameter.
	* openacc.h (acc_device_t): Add acc_device_gcn.
	* team.c (gomp_free_pool_helper): Add amdgcn support.

Co-Authored-By: Julian Brown <julian@codesourcery.com>
Co-Authored-By: Kwok Cheung Yeung <kcy@codesourcery.com>
Co-Authored-By: Tom de Vries <tom@codesourcery.com>

From-SVN: r278135
parent d2903ce0
2019-11-13 Andrew Stubbs <ams@codesourcery.com>
Kwok Cheung Yeung <kcy@codesourcery.com>
Julian Brown <julian@codesourcery.com>
Tom de Vries <tom@codesourcery.com>
* gomp-constants.h (GOMP_DEVICE_GCN): Define.
(GOMP_VERSION_GCN): Define.
2019-08-08 Martin Liska <mliska@suse.cz> 2019-08-08 Martin Liska <mliska@suse.cz>
PR bootstrap/91352 PR bootstrap/91352
......
...@@ -174,6 +174,7 @@ enum gomp_map_kind ...@@ -174,6 +174,7 @@ enum gomp_map_kind
#define GOMP_DEVICE_NVIDIA_PTX 5 #define GOMP_DEVICE_NVIDIA_PTX 5
#define GOMP_DEVICE_INTEL_MIC 6 #define GOMP_DEVICE_INTEL_MIC 6
#define GOMP_DEVICE_HSA 7 #define GOMP_DEVICE_HSA 7
#define GOMP_DEVICE_GCN 8
#define GOMP_DEVICE_ICV -1 #define GOMP_DEVICE_ICV -1
#define GOMP_DEVICE_HOST_FALLBACK -2 #define GOMP_DEVICE_HOST_FALLBACK -2
...@@ -215,6 +216,7 @@ enum gomp_map_kind ...@@ -215,6 +216,7 @@ enum gomp_map_kind
#define GOMP_VERSION_NVIDIA_PTX 1 #define GOMP_VERSION_NVIDIA_PTX 1
#define GOMP_VERSION_INTEL_MIC 0 #define GOMP_VERSION_INTEL_MIC 0
#define GOMP_VERSION_HSA 0 #define GOMP_VERSION_HSA 0
#define GOMP_VERSION_GCN 1
#define GOMP_VERSION_PACK(LIB, DEV) (((LIB) << 16) | (DEV)) #define GOMP_VERSION_PACK(LIB, DEV) (((LIB) << 16) | (DEV))
#define GOMP_VERSION_LIB(PACK) (((PACK) >> 16) & 0xffff) #define GOMP_VERSION_LIB(PACK) (((PACK) >> 16) & 0xffff)
......
2019-11-13 Andrew Stubbs <ams@codesourcery.com> 2019-11-13 Andrew Stubbs <ams@codesourcery.com>
Kwok Cheung Yeung <kcy@codesourcery.com>
Julian Brown <julian@codesourcery.com>
Tom de Vries <tom@codesourcery.com>
* Makefile.am (libgomp_la_SOURCES): Add oacc-target.c.
* Makefile.in: Regenerate.
* config.h.in (PLUGIN_GCN): Add new undef.
* config/accel/openacc.f90 (acc_device_gcn): New parameter.
* config/gcn/affinity-fmt.c: New file.
* config/gcn/bar.c: New file.
* config/gcn/bar.h: New file.
* config/gcn/doacross.h: New file.
* config/gcn/icv-device.c: New file.
* config/gcn/oacc-target.c: New file.
* config/gcn/simple-bar.h: New file.
* config/gcn/target.c: New file.
* config/gcn/task.c: New file.
* config/gcn/team.c: New file.
* config/gcn/time.c: New file.
* configure.ac: Add amdgcn*-*-*.
* configure: Regenerate.
* configure.tgt: Add amdgcn*-*-*.
* libgomp-plugin.h (offload_target_type): Add OFFLOAD_TARGET_TYPE_GCN.
* libgomp.h (gcn_thrs): Add amdgcn variant.
(set_gcn_thrs): Likewise.
(gomp_thread): Likewise.
* oacc-int.h (goacc_thread): Likewise.
* oacc-target.c: New file.
* openacc.f90 (acc_device_gcn): New parameter.
* openacc.h (acc_device_t): Add acc_device_gcn.
* team.c (gomp_free_pool_helper): Add amdgcn support.
2019-11-13 Andrew Stubbs <ams@codesourcery.com>
Julian Brown <julian@codesourcery.com> Julian Brown <julian@codesourcery.com>
* libgomp-plugin.h (GOMP_OFFLOAD_openacc_async_construct): Add int * libgomp-plugin.h (GOMP_OFFLOAD_openacc_async_construct): Add int
......
...@@ -65,7 +65,7 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c error.c \ ...@@ -65,7 +65,7 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c error.c \
proc.c sem.c bar.c ptrlock.c time.c fortran.c affinity.c target.c \ proc.c sem.c bar.c ptrlock.c time.c fortran.c affinity.c target.c \
splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c oacc-init.c \ splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c oacc-init.c \
oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \ oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
affinity-fmt.c teams.c oacc-profiling.c affinity-fmt.c teams.c oacc-profiling.c oacc-target.c
include $(top_srcdir)/plugin/Makefrag.am include $(top_srcdir)/plugin/Makefrag.am
......
...@@ -217,7 +217,7 @@ am_libgomp_la_OBJECTS = alloc.lo atomic.lo barrier.lo critical.lo \ ...@@ -217,7 +217,7 @@ am_libgomp_la_OBJECTS = alloc.lo atomic.lo barrier.lo critical.lo \
target.lo splay-tree.lo libgomp-plugin.lo oacc-parallel.lo \ target.lo splay-tree.lo libgomp-plugin.lo oacc-parallel.lo \
oacc-host.lo oacc-init.lo oacc-mem.lo oacc-async.lo \ oacc-host.lo oacc-init.lo oacc-mem.lo oacc-async.lo \
oacc-plugin.lo oacc-cuda.lo priority_queue.lo affinity-fmt.lo \ oacc-plugin.lo oacc-cuda.lo priority_queue.lo affinity-fmt.lo \
teams.lo oacc-profiling.lo $(am__objects_1) teams.lo oacc-profiling.lo oacc-target.lo $(am__objects_1)
libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS) libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS)
AM_V_P = $(am__v_P_@AM_V@) AM_V_P = $(am__v_P_@AM_V@)
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
...@@ -552,7 +552,8 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c \ ...@@ -552,7 +552,8 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c \
affinity.c target.c splay-tree.c libgomp-plugin.c \ affinity.c target.c splay-tree.c libgomp-plugin.c \
oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c \ oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c \
oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \ oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
affinity-fmt.c teams.c oacc-profiling.c $(am__append_3) affinity-fmt.c teams.c oacc-profiling.c oacc-target.c \
$(am__append_3)
# Nvidia PTX OpenACC plugin. # Nvidia PTX OpenACC plugin.
@PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION) @PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION)
...@@ -755,6 +756,7 @@ distclean-compile: ...@@ -755,6 +756,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-parallel.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-parallel.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-plugin.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-plugin.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-profiling.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-profiling.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-target.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ordered.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ordered.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/parallel.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/parallel.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/priority_queue.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/priority_queue.Plo@am__quote@
......
...@@ -170,6 +170,9 @@ ...@@ -170,6 +170,9 @@
/* Define to the version of this package. */ /* Define to the version of this package. */
#undef PACKAGE_VERSION #undef PACKAGE_VERSION
/* Define to 1 if the GCN plugin is built, 0 if not. */
#undef PLUGIN_GCN
/* Define to 1 if the HSA plugin is built, 0 if not. */ /* Define to 1 if the HSA plugin is built, 0 if not. */
#undef PLUGIN_HSA #undef PLUGIN_HSA
......
...@@ -51,6 +51,7 @@ module openacc_kinds ...@@ -51,6 +51,7 @@ module openacc_kinds
! integer (acc_device_kind), parameter :: acc_device_host_nonshm = 3 removed. ! integer (acc_device_kind), parameter :: acc_device_host_nonshm = 3 removed.
integer (acc_device_kind), parameter :: acc_device_not_host = 4 integer (acc_device_kind), parameter :: acc_device_not_host = 4
integer (acc_device_kind), parameter :: acc_device_nvidia = 5 integer (acc_device_kind), parameter :: acc_device_nvidia = 5
integer (acc_device_kind), parameter :: acc_device_gcn = 8
end module end module
......
/* Copyright (C) 2018-2019 Free Software Foundation, Inc.
This file is part of the GNU Offloading and Multi Processing Library
(libgomp).
Libgomp is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "libgomp.h"
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_INTTYPES_H
# include <inttypes.h> /* For PRIx64. */
#endif
#ifdef HAVE_UNAME
#include <sys/utsname.h>
#endif
/* The HAVE_GETPID and HAVE_GETHOSTNAME configure tests are passing for nvptx,
while the nvptx newlib implementation does not support those functions.
Override the configure test results here. */
#undef HAVE_GETPID
#undef HAVE_GETHOSTNAME
/* The GCN newlib implementation does not support fwrite, but it does support
write. Map fwrite to write. */
#undef fwrite
#define fwrite(ptr, size, nmemb, stream) write (1, (ptr), (nmemb) * (size))
#include "../../affinity-fmt.c"
/* Copyright (C) 2015-2019 Free Software Foundation, Inc.
Contributed by Mentor Embedded.
This file is part of the GNU Offloading and Multi Processing Library
(libgomp).
Libgomp is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* This is an AMD GCN specific implementation of a barrier synchronization
mechanism for libgomp. This type is private to the library. This
implementation uses atomic instructions and s_barrier instruction. It
uses MEMMODEL_RELAXED here because barriers are within workgroups and
therefore don't need to flush caches. */
#include <limits.h>
#include "libgomp.h"
void
gomp_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
{
if (__builtin_expect (state & BAR_WAS_LAST, 0))
{
/* Next time we'll be awaiting TOTAL threads again. */
bar->awaited = bar->total;
__atomic_store_n (&bar->generation, bar->generation + BAR_INCR,
MEMMODEL_RELAXED);
}
asm ("s_barrier" ::: "memory");
}
void
gomp_barrier_wait (gomp_barrier_t *bar)
{
gomp_barrier_wait_end (bar, gomp_barrier_wait_start (bar));
}
/* Like gomp_barrier_wait, except that if the encountering thread
is not the last one to hit the barrier, it returns immediately.
The intended usage is that a thread which intends to gomp_barrier_destroy
this barrier calls gomp_barrier_wait, while all other threads
call gomp_barrier_wait_last. When gomp_barrier_wait returns,
the barrier can be safely destroyed. */
void
gomp_barrier_wait_last (gomp_barrier_t *bar)
{
/* Deferring to gomp_barrier_wait does not use the optimization opportunity
allowed by the interface contract for all-but-last participants. The
original implementation in config/linux/bar.c handles this better. */
gomp_barrier_wait (bar);
}
void
gomp_team_barrier_wake (gomp_barrier_t *bar, int count)
{
asm ("s_barrier" ::: "memory");
}
void
gomp_team_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
{
unsigned int generation, gen;
if (__builtin_expect (state & BAR_WAS_LAST, 0))
{
/* Next time we'll be awaiting TOTAL threads again. */
struct gomp_thread *thr = gomp_thread ();
struct gomp_team *team = thr->ts.team;
bar->awaited = bar->total;
team->work_share_cancelled = 0;
if (__builtin_expect (team->task_count, 0))
{
gomp_barrier_handle_tasks (state);
state &= ~BAR_WAS_LAST;
}
else
{
state &= ~BAR_CANCELLED;
state += BAR_INCR - BAR_WAS_LAST;
__atomic_store_n (&bar->generation, state, MEMMODEL_RELAXED);
asm ("s_barrier" ::: "memory");
return;
}
}
generation = state;
state &= ~BAR_CANCELLED;
int retry = 100;
do
{
if (retry-- == 0)
{
/* It really shouldn't happen that barriers get out of sync, but
if they do then this will loop until they realign, so we need
to avoid an infinite loop where the thread just isn't there. */
const char msg[] = ("Barrier sync failed (another thread died?);"
" aborting.");
write (2, msg, sizeof (msg)-1);
abort();
}
asm ("s_barrier" ::: "memory");
gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
if (__builtin_expect (gen & BAR_TASK_PENDING, 0))
{
gomp_barrier_handle_tasks (state);
gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
}
generation |= gen & BAR_WAITING_FOR_TASK;
}
while (gen != state + BAR_INCR);
}
void
gomp_team_barrier_wait (gomp_barrier_t *bar)
{
gomp_team_barrier_wait_end (bar, gomp_barrier_wait_start (bar));
}
void
gomp_team_barrier_wait_final (gomp_barrier_t *bar)
{
gomp_barrier_state_t state = gomp_barrier_wait_final_start (bar);
if (__builtin_expect (state & BAR_WAS_LAST, 0))
bar->awaited_final = bar->total;
gomp_team_barrier_wait_end (bar, state);
}
bool
gomp_team_barrier_wait_cancel_end (gomp_barrier_t *bar,
gomp_barrier_state_t state)
{
unsigned int generation, gen;
if (__builtin_expect (state & BAR_WAS_LAST, 0))
{
/* Next time we'll be awaiting TOTAL threads again. */
/* BAR_CANCELLED should never be set in state here, because
cancellation means that at least one of the threads has been
cancelled, thus on a cancellable barrier we should never see
all threads to arrive. */
struct gomp_thread *thr = gomp_thread ();
struct gomp_team *team = thr->ts.team;
bar->awaited = bar->total;
team->work_share_cancelled = 0;
if (__builtin_expect (team->task_count, 0))
{
gomp_barrier_handle_tasks (state);
state &= ~BAR_WAS_LAST;
}
else
{
state += BAR_INCR - BAR_WAS_LAST;
__atomic_store_n (&bar->generation, state, MEMMODEL_RELAXED);
asm ("s_barrier" ::: "memory");
return false;
}
}
if (__builtin_expect (state & BAR_CANCELLED, 0))
return true;
generation = state;
int retry = 100;
do
{
if (retry-- == 0)
{
/* It really shouldn't happen that barriers get out of sync, but
if they do then this will loop until they realign, so we need
to avoid an infinite loop where the thread just isn't there. */
const char msg[] = ("Barrier sync failed (another thread died?);"
" aborting.");
write (2, msg, sizeof (msg)-1);
abort();
}
asm ("s_barrier" ::: "memory");
gen = __atomic_load_n (&bar->generation, MEMMODEL_RELAXED);
if (__builtin_expect (gen & BAR_CANCELLED, 0))
return true;
if (__builtin_expect (gen & BAR_TASK_PENDING, 0))
{
gomp_barrier_handle_tasks (state);
gen = __atomic_load_n (&bar->generation, MEMMODEL_RELAXED);
}
generation |= gen & BAR_WAITING_FOR_TASK;
}
while (gen != state + BAR_INCR);
return false;
}
bool
gomp_team_barrier_wait_cancel (gomp_barrier_t *bar)
{
return gomp_team_barrier_wait_cancel_end (bar, gomp_barrier_wait_start (bar));
}
void
gomp_team_barrier_cancel (struct gomp_team *team)
{
gomp_mutex_lock (&team->task_lock);
if (team->barrier.generation & BAR_CANCELLED)
{
gomp_mutex_unlock (&team->task_lock);
return;
}
team->barrier.generation |= BAR_CANCELLED;
gomp_mutex_unlock (&team->task_lock);
gomp_team_barrier_wake (&team->barrier, INT_MAX);
}
/* Copyright (C) 2015-2019 Free Software Foundation, Inc.
Contributed by Mentor Embedded.
This file is part of the GNU Offloading and Multi Processing Library
(libgomp).
Libgomp is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* This is an AMD GCN specific implementation of a barrier synchronization
mechanism for libgomp. This type is private to the library. This
implementation uses atomic instructions and s_barrier instruction. It
uses MEMMODEL_RELAXED here because barriers are within workgroups and
therefore don't need to flush caches. */
#ifndef GOMP_BARRIER_H
#define GOMP_BARRIER_H 1
#include "mutex.h"
typedef struct
{
unsigned total;
unsigned generation;
unsigned awaited;
unsigned awaited_final;
} gomp_barrier_t;
typedef unsigned int gomp_barrier_state_t;
/* The generation field contains a counter in the high bits, with a few
low bits dedicated to flags. Note that TASK_PENDING and WAS_LAST can
share space because WAS_LAST is never stored back to generation. */
#define BAR_TASK_PENDING 1
#define BAR_WAS_LAST 1
#define BAR_WAITING_FOR_TASK 2
#define BAR_CANCELLED 4
#define BAR_INCR 8
static inline void gomp_barrier_init (gomp_barrier_t *bar, unsigned count)
{
bar->total = count;
bar->awaited = count;
bar->awaited_final = count;
bar->generation = 0;
}
static inline void gomp_barrier_reinit (gomp_barrier_t *bar, unsigned count)
{
__atomic_add_fetch (&bar->awaited, count - bar->total, MEMMODEL_RELAXED);
bar->total = count;
}
static inline void gomp_barrier_destroy (gomp_barrier_t *bar)
{
}
extern void gomp_barrier_wait (gomp_barrier_t *);
extern void gomp_barrier_wait_last (gomp_barrier_t *);
extern void gomp_barrier_wait_end (gomp_barrier_t *, gomp_barrier_state_t);
extern void gomp_team_barrier_wait (gomp_barrier_t *);
extern void gomp_team_barrier_wait_final (gomp_barrier_t *);
extern void gomp_team_barrier_wait_end (gomp_barrier_t *,
gomp_barrier_state_t);
extern bool gomp_team_barrier_wait_cancel (gomp_barrier_t *);
extern bool gomp_team_barrier_wait_cancel_end (gomp_barrier_t *,
gomp_barrier_state_t);
extern void gomp_team_barrier_wake (gomp_barrier_t *, int);
struct gomp_team;
extern void gomp_team_barrier_cancel (struct gomp_team *);
static inline gomp_barrier_state_t
gomp_barrier_wait_start (gomp_barrier_t *bar)
{
unsigned int ret = __atomic_load_n (&bar->generation, MEMMODEL_RELAXED);
ret &= -BAR_INCR | BAR_CANCELLED;
/* A memory barrier is needed before exiting from the various forms
of gomp_barrier_wait, to satisfy OpenMP API version 3.1 section
2.8.6 flush Construct, which says there is an implicit flush during
a barrier region. This is a convenient place to add the barrier,
so we use MEMMODEL_ACQ_REL here rather than MEMMODEL_ACQUIRE. */
if (__atomic_add_fetch (&bar->awaited, -1, MEMMODEL_RELAXED) == 0)
ret |= BAR_WAS_LAST;
return ret;
}
static inline gomp_barrier_state_t
gomp_barrier_wait_cancel_start (gomp_barrier_t *bar)
{
return gomp_barrier_wait_start (bar);
}
/* This is like gomp_barrier_wait_start, except it decrements
bar->awaited_final rather than bar->awaited and should be used
for the gomp_team_end barrier only. */
static inline gomp_barrier_state_t
gomp_barrier_wait_final_start (gomp_barrier_t *bar)
{
unsigned int ret = __atomic_load_n (&bar->generation, MEMMODEL_RELAXED);
ret &= -BAR_INCR | BAR_CANCELLED;
/* See above gomp_barrier_wait_start comment. */
if (__atomic_add_fetch (&bar->awaited_final, -1, MEMMODEL_RELAXED) == 0)
ret |= BAR_WAS_LAST;
return ret;
}
static inline bool
gomp_barrier_last_thread (gomp_barrier_state_t state)
{
return state & BAR_WAS_LAST;
}
/* All the inlines below must be called with team->task_lock
held. */
static inline void
gomp_team_barrier_set_task_pending (gomp_barrier_t *bar)
{
bar->generation |= BAR_TASK_PENDING;
}
static inline void
gomp_team_barrier_clear_task_pending (gomp_barrier_t *bar)
{
bar->generation &= ~BAR_TASK_PENDING;
}
static inline void
gomp_team_barrier_set_waiting_for_tasks (gomp_barrier_t *bar)
{
bar->generation |= BAR_WAITING_FOR_TASK;
}
static inline bool
gomp_team_barrier_waiting_for_tasks (gomp_barrier_t *bar)
{
return (bar->generation & BAR_WAITING_FOR_TASK) != 0;
}
static inline bool
gomp_team_barrier_cancelled (gomp_barrier_t *bar)
{
return __builtin_expect ((bar->generation & BAR_CANCELLED) != 0, 0);
}
static inline void
gomp_team_barrier_done (gomp_barrier_t *bar, gomp_barrier_state_t state)
{
bar->generation = (state & -BAR_INCR) + BAR_INCR;
}
#endif /* GOMP_BARRIER_H */
/* Copyright (C) 2015-2019 Free Software Foundation, Inc.
Contributed by Mentor Embedded.
This file is part of the GNU Offloading and Multi Processing Library
(libgomp).
Libgomp is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* This is the AMD GCN implementation of doacross spinning. */
#ifndef GOMP_DOACROSS_H
#define GOMP_DOACROSS_H 1
#include "libgomp.h"
static inline int
cpu_relax (void)
{
/* This can be implemented as just a memory barrier, but a sleep seems
like it should allow the wavefront to yield (maybe?)
Use the shortest possible sleep time of 1*64 cycles. */
asm volatile ("s_sleep\t1" ::: "memory");
return 0;
}
static inline void doacross_spin (unsigned long *addr, unsigned long expected,
unsigned long cur)
{
/* Prevent compiler from optimizing based on bounds of containing object. */
asm ("" : "+r" (addr));
do
{
/* An alternative implementation might use s_setprio to lower the
priority temporarily, and then restore it after. */
int i = cpu_relax ();
cur = addr[i];
}
while (cur <= expected);
}
#endif /* GOMP_DOACROSS_H */
/* Copyright (C) 2015-2019 Free Software Foundation, Inc.
Contributed by Mentor Embedded.
This file is part of the GNU Offloading and Multi Processing Library
(libgomp).
Libgomp is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* This file defines OpenMP API entry points that accelerator targets are
expected to replace. */
#include "libgomp.h"
void
omp_set_default_device (int device_num __attribute__((unused)))
{
}
int
omp_get_default_device (void)
{
return 0;
}
int
omp_get_num_devices (void)
{
return 0;
}
int
omp_get_num_teams (void)
{
return gomp_num_teams_var + 1;
}
int __attribute__ ((__optimize__ ("O2")))
omp_get_team_num (void)
{
return __builtin_gcn_dim_pos (0);
}
int
omp_is_initial_device (void)
{
/* AMD GCN is an accelerator-only target. */
return 0;
}
ialias (omp_set_default_device)
ialias (omp_get_default_device)
ialias (omp_get_num_devices)
ialias (omp_get_num_teams)
ialias (omp_get_team_num)
ialias (omp_is_initial_device)
/* Oversized reductions lock variable
Copyright (C) 2017-2019 Free Software Foundation, Inc.
Contributed by Mentor Graphics.
This file is part of the GNU Offloading and Multi Processing Library
(libgomp).
Libgomp is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* We use a global lock variable for reductions on objects larger than
64 bits. Until and unless proven that lock contention for
different reductions is a problem, a single lock will suffice. */
unsigned volatile __reduction_lock = 0;
/* Copyright (C) 2015-2019 Free Software Foundation, Inc.
Contributed by Mentor Embedded.
This file is part of the GNU Offloading and Multi Processing Library
(libgomp).
Libgomp is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* This is a simplified barrier that is suitable for thread pool
synchronizaton. Only a subset of full barrier API (bar.h) is exposed.
Here in the AMD GCN-specific implementation, we expect that thread pool
corresponds to the wavefronts within a work group. */
#ifndef GOMP_SIMPLE_BARRIER_H
#define GOMP_SIMPLE_BARRIER_H 1
/* AMD GCN has no use for this type. */
typedef int gomp_simple_barrier_t;
/* GCN barriers block all wavefronts, so the count is not interesting. */
static inline void
gomp_simple_barrier_init (gomp_simple_barrier_t *bar, unsigned count)
{
}
static inline void
gomp_simple_barrier_destroy (gomp_simple_barrier_t *bar)
{
}
static inline void
gomp_simple_barrier_wait (gomp_simple_barrier_t *bar)
{
asm volatile ("s_barrier" ::: "memory");
}
static inline void
gomp_simple_barrier_wait_last (gomp_simple_barrier_t *bar)
{
/* GCN has no way to signal a barrier without waiting. */
asm volatile ("s_barrier" ::: "memory");
}
#endif /* GOMP_SIMPLE_BARRIER_H */
/* Copyright (C) 2017-2019 Free Software Foundation, Inc.
Contributed by Mentor Embedded.
This file is part of the GNU Offloading and Multi Processing Library
(libgomp).
Libgomp is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "libgomp.h"
#include <limits.h>
void
GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
{
if (thread_limit)
{
struct gomp_task_icv *icv = gomp_icv (true);
icv->thread_limit_var
= thread_limit > INT_MAX ? UINT_MAX : thread_limit;
}
unsigned int num_workgroups, workgroup_id;
num_workgroups = __builtin_gcn_dim_size (0);
workgroup_id = __builtin_gcn_dim_pos (0);
if (!num_teams || num_teams >= num_workgroups)
num_teams = num_workgroups;
else if (workgroup_id >= num_teams)
{
gomp_free_thread (gcn_thrs ());
exit (0);
}
gomp_num_teams_var = num_teams - 1;
}
int
omp_pause_resource (omp_pause_resource_t kind, int device_num)
{
(void) kind;
(void) device_num;
return -1;
}
int
omp_pause_resource_all (omp_pause_resource_t kind)
{
(void) kind;
return -1;
}
ialias (omp_pause_resource)
ialias (omp_pause_resource_all)
/* Copyright (C) 2017-2019 Free Software Foundation, Inc.
Contributed by Mentor Embedded.
This file is part of the GNU Offloading and Multi Processing Library
(libgomp).
Libgomp is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* This file handles the maintainence of tasks in response to task
creation and termination. */
#include "libgomp.h"
/* AMD GCN is an accelerator-only target, so this should never be called. */
bool
gomp_target_task_fn (void *data)
{
__builtin_unreachable ();
}
#include "../../task.c"
/* Copyright (C) 2017-2019 Free Software Foundation, Inc.
Contributed by Mentor Embedded.
This file is part of the GNU Offloading and Multi Processing Library
(libgomp).
Libgomp is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* This file handles maintainance of threads on AMD GCN. */
#include "libgomp.h"
#include <stdlib.h>
#include <string.h>
static void gomp_thread_start (struct gomp_thread_pool *);
/* This externally visible function handles target region entry. It
sets up a per-team thread pool and transfers control by returning to
the kernel in the master thread or gomp_thread_start in other threads.
The name of this function is part of the interface with the compiler: for
each OpenMP kernel the compiler configures the stack, then calls here.
Likewise, gomp_gcn_exit_kernel is called during the kernel epilogue. */
void
gomp_gcn_enter_kernel (void)
{
int threadid = __builtin_gcn_dim_pos (1);
if (threadid == 0)
{
int numthreads = __builtin_gcn_dim_size (1);
int teamid = __builtin_gcn_dim_pos(0);
/* Set up the global state.
Every team will do this, but that should be harmless. */
gomp_global_icv.nthreads_var = 16;
gomp_global_icv.thread_limit_var = numthreads;
/* Starting additional threads is not supported. */
gomp_global_icv.dyn_var = true;
/* Allocate and initialize the team-local-storage data. */
struct gomp_thread *thrs = gomp_malloc_cleared (sizeof (*thrs)
* numthreads);
set_gcn_thrs (thrs);
/* Allocate and initailize a pool of threads in the team.
The threads are already running, of course, we just need to manage
the communication between them. */
struct gomp_thread_pool *pool = gomp_malloc (sizeof (*pool));
pool->threads = gomp_malloc (sizeof (void *) * numthreads);
for (int tid = 0; tid < numthreads; tid++)
pool->threads[tid] = &thrs[tid];
pool->threads_size = numthreads;
pool->threads_used = numthreads;
pool->threads_busy = 1;
pool->last_team = NULL;
gomp_simple_barrier_init (&pool->threads_dock, numthreads);
thrs->thread_pool = pool;
asm ("s_barrier" ::: "memory");
return; /* Return to kernel. */
}
else
{
asm ("s_barrier" ::: "memory");
gomp_thread_start (gcn_thrs ()[0].thread_pool);
/* gomp_thread_start does not return. */
}
}
void
gomp_gcn_exit_kernel (void)
{
gomp_free_thread (gcn_thrs ());
free (gcn_thrs ());
}
/* This function contains the idle loop in which a thread waits
to be called up to become part of a team. */
static void
gomp_thread_start (struct gomp_thread_pool *pool)
{
struct gomp_thread *thr = gomp_thread ();
gomp_sem_init (&thr->release, 0);
thr->thread_pool = pool;
/* The loop exits only when "fn" is assigned "gomp_free_pool_helper",
which contains "s_endpgm", or an infinite no-op loop is
suspected (this happens when the thread master crashes). */
int nul_limit = 99;
do
{
gomp_simple_barrier_wait (&pool->threads_dock);
if (!thr->fn)
{
if (nul_limit-- > 0)
continue;
else
{
const char msg[] = ("team master not responding;"
" slave thread aborting");
write (2, msg, sizeof (msg)-1);
abort();
}
}
thr->fn (thr->data);
thr->fn = NULL;
struct gomp_task *task = thr->task;
gomp_team_barrier_wait_final (&thr->ts.team->barrier);
gomp_finish_task (task);
}
while (1);
}
/* Launch a team. */
void
gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
unsigned flags, struct gomp_team *team,
struct gomp_taskgroup *taskgroup)
{
struct gomp_thread *thr, *nthr;
struct gomp_task *task;
struct gomp_task_icv *icv;
struct gomp_thread_pool *pool;
unsigned long nthreads_var;
thr = gomp_thread ();
pool = thr->thread_pool;
task = thr->task;
icv = task ? &task->icv : &gomp_global_icv;
/* Always save the previous state, even if this isn't a nested team.
In particular, we should save any work share state from an outer
orphaned work share construct. */
team->prev_ts = thr->ts;
thr->ts.team = team;
thr->ts.team_id = 0;
++thr->ts.level;
if (nthreads > 1)
++thr->ts.active_level;
thr->ts.work_share = &team->work_shares[0];
thr->ts.last_work_share = NULL;
thr->ts.single_count = 0;
thr->ts.static_trip = 0;
thr->task = &team->implicit_task[0];
nthreads_var = icv->nthreads_var;
gomp_init_task (thr->task, task, icv);
team->implicit_task[0].icv.nthreads_var = nthreads_var;
team->implicit_task[0].taskgroup = taskgroup;
if (nthreads == 1)
return;
/* Release existing idle threads. */
for (unsigned i = 1; i < nthreads; ++i)
{
nthr = pool->threads[i];
nthr->ts.team = team;
nthr->ts.work_share = &team->work_shares[0];
nthr->ts.last_work_share = NULL;
nthr->ts.team_id = i;
nthr->ts.level = team->prev_ts.level + 1;
nthr->ts.active_level = thr->ts.active_level;
nthr->ts.single_count = 0;
nthr->ts.static_trip = 0;
nthr->task = &team->implicit_task[i];
gomp_init_task (nthr->task, task, icv);
team->implicit_task[i].icv.nthreads_var = nthreads_var;
team->implicit_task[i].taskgroup = taskgroup;
nthr->fn = fn;
nthr->data = data;
team->ordered_release[i] = &nthr->release;
}
gomp_simple_barrier_wait (&pool->threads_dock);
}
#include "../../team.c"
/* Copyright (C) 2015-2019 Free Software Foundation, Inc.
Contributed by Mentor Embedded.
This file is part of the GNU Offloading and Multi Processing Library
(libgomp).
Libgomp is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* This file implements timer routines for AMD GCN. */
#include "libgomp.h"
/* According to AMD:
dGPU RTC is 27MHz
AGPU RTC is 100MHz
FIXME: DTRT on an APU. */
#define RTC_TICKS (1.0 / 27000000.0) /* 27MHz */
double
omp_get_wtime (void)
{
uint64_t clock;
asm ("s_memrealtime %0\n\t"
"s_waitcnt 0" : "=r" (clock));
return clock * RTC_TICKS;
}
double
omp_get_wtick (void)
{
return RTC_TICKS;
}
ialias (omp_get_wtime)
ialias (omp_get_wtick)
...@@ -14921,7 +14921,7 @@ case "$host" in ...@@ -14921,7 +14921,7 @@ case "$host" in
*-*-rtems*) *-*-rtems*)
# RTEMS supports Pthreads, but the library is not available at GCC build time. # RTEMS supports Pthreads, but the library is not available at GCC build time.
;; ;;
nvptx*-*-*) nvptx*-*-* | amdgcn*-*-*)
# NVPTX does not support Pthreads, has its own code replacement. # NVPTX does not support Pthreads, has its own code replacement.
libgomp_use_pthreads=no libgomp_use_pthreads=no
# NVPTX is an accelerator-only target # NVPTX is an accelerator-only target
......
...@@ -176,7 +176,7 @@ case "$host" in ...@@ -176,7 +176,7 @@ case "$host" in
*-*-rtems*) *-*-rtems*)
# RTEMS supports Pthreads, but the library is not available at GCC build time. # RTEMS supports Pthreads, but the library is not available at GCC build time.
;; ;;
nvptx*-*-*) nvptx*-*-* | amdgcn*-*-*)
# NVPTX does not support Pthreads, has its own code replacement. # NVPTX does not support Pthreads, has its own code replacement.
libgomp_use_pthreads=no libgomp_use_pthreads=no
# NVPTX is an accelerator-only target # NVPTX is an accelerator-only target
......
...@@ -164,6 +164,10 @@ case "${target}" in ...@@ -164,6 +164,10 @@ case "${target}" in
fi fi
;; ;;
amdgcn*-*-*)
config_path="gcn accel"
;;
*) *)
;; ;;
......
...@@ -50,7 +50,8 @@ enum offload_target_type ...@@ -50,7 +50,8 @@ enum offload_target_type
/* OFFLOAD_TARGET_TYPE_HOST_NONSHM = 3 removed. */ /* OFFLOAD_TARGET_TYPE_HOST_NONSHM = 3 removed. */
OFFLOAD_TARGET_TYPE_NVIDIA_PTX = 5, OFFLOAD_TARGET_TYPE_NVIDIA_PTX = 5,
OFFLOAD_TARGET_TYPE_INTEL_MIC = 6, OFFLOAD_TARGET_TYPE_INTEL_MIC = 6,
OFFLOAD_TARGET_TYPE_HSA = 7 OFFLOAD_TARGET_TYPE_HSA = 7,
OFFLOAD_TARGET_TYPE_GCN = 8
}; };
/* Opaque type to represent plugin-dependent implementation of an /* Opaque type to represent plugin-dependent implementation of an
......
...@@ -692,6 +692,24 @@ static inline struct gomp_thread *gomp_thread (void) ...@@ -692,6 +692,24 @@ static inline struct gomp_thread *gomp_thread (void)
asm ("mov.u32 %0, %%tid.y;" : "=r" (tid)); asm ("mov.u32 %0, %%tid.y;" : "=r" (tid));
return nvptx_thrs + tid; return nvptx_thrs + tid;
} }
#elif defined __AMDGCN__
static inline struct gomp_thread *gcn_thrs (void)
{
/* The value is at the bottom of LDS. */
struct gomp_thread * __lds *thrs = (struct gomp_thread * __lds *)4;
return *thrs;
}
static inline void set_gcn_thrs (struct gomp_thread *val)
{
/* The value is at the bottom of LDS. */
struct gomp_thread * __lds *thrs = (struct gomp_thread * __lds *)4;
*thrs = val;
}
static inline struct gomp_thread *gomp_thread (void)
{
int tid = __builtin_gcn_dim_pos(1);
return gcn_thrs () + tid;
}
#elif defined HAVE_TLS || defined USE_EMUTLS #elif defined HAVE_TLS || defined USE_EMUTLS
extern __thread struct gomp_thread gomp_tls_data; extern __thread struct gomp_thread gomp_tls_data;
static inline struct gomp_thread *gomp_thread (void) static inline struct gomp_thread *gomp_thread (void)
......
...@@ -82,7 +82,14 @@ struct goacc_thread ...@@ -82,7 +82,14 @@ struct goacc_thread
void *target_tls; void *target_tls;
}; };
#if defined HAVE_TLS || defined USE_EMUTLS #ifdef __AMDGCN__
static inline struct goacc_thread *
goacc_thread (void)
{
/* Unused in the offload libgomp for OpenACC: return a dummy value. */
return 0;
}
#elif defined HAVE_TLS || defined USE_EMUTLS
extern __thread struct goacc_thread *goacc_tls_data; extern __thread struct goacc_thread *goacc_tls_data;
static inline struct goacc_thread * static inline struct goacc_thread *
goacc_thread (void) goacc_thread (void)
......
/* Nothing needed here. */
...@@ -46,6 +46,7 @@ module openacc_kinds ...@@ -46,6 +46,7 @@ module openacc_kinds
! integer (acc_device_kind), parameter :: acc_device_host_nonshm = 3 removed. ! integer (acc_device_kind), parameter :: acc_device_host_nonshm = 3 removed.
integer (acc_device_kind), parameter :: acc_device_not_host = 4 integer (acc_device_kind), parameter :: acc_device_not_host = 4
integer (acc_device_kind), parameter :: acc_device_nvidia = 5 integer (acc_device_kind), parameter :: acc_device_nvidia = 5
integer (acc_device_kind), parameter :: acc_device_gcn = 8
public :: acc_handle_kind public :: acc_handle_kind
......
...@@ -55,6 +55,7 @@ typedef enum acc_device_t { ...@@ -55,6 +55,7 @@ typedef enum acc_device_t {
/* acc_device_host_nonshm = 3 removed. */ /* acc_device_host_nonshm = 3 removed. */
acc_device_not_host = 4, acc_device_not_host = 4,
acc_device_nvidia = 5, acc_device_nvidia = 5,
acc_device_gcn = 8,
_ACC_device_hwm, _ACC_device_hwm,
/* Ensure enumeration is layout compatible with int. */ /* Ensure enumeration is layout compatible with int. */
_ACC_highest = __INT_MAX__, _ACC_highest = __INT_MAX__,
......
...@@ -239,6 +239,9 @@ gomp_free_pool_helper (void *thread_pool) ...@@ -239,6 +239,9 @@ gomp_free_pool_helper (void *thread_pool)
pthread_exit (NULL); pthread_exit (NULL);
#elif defined(__nvptx__) #elif defined(__nvptx__)
asm ("exit;"); asm ("exit;");
#elif defined(__AMDGCN__)
asm ("s_dcache_wb\n\t"
"s_endpgm");
#else #else
#error gomp_free_pool_helper must terminate the thread #error gomp_free_pool_helper must terminate the thread
#endif #endif
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment