Commit fd71a9a2 by Thomas Schwinge Committed by Thomas Schwinge

OpenACC 2.5 kernels construct: num_gangs, num_workers, vector_length clauses

	gcc/c/
	* c-parser.c (OACC_KERNELS_CLAUSE_MASK): Add
	"PRAGMA_OACC_CLAUSE_NUM_GANGS", "PRAGMA_OACC_CLAUSE_NUM_WORKERS",
	"VECTOR_LENGTH".
	gcc/cp/
	* parser.c (OACC_KERNELS_CLAUSE_MASK): Add
	"PRAGMA_OACC_CLAUSE_NUM_GANGS", "PRAGMA_OACC_CLAUSE_NUM_WORKERS",
	"VECTOR_LENGTH".
	gcc/fortran/
	* openmp.c (OACC_KERNELS_CLAUSES): Add "OMP_CLAUSE_NUM_GANGS",
	"OMP_CLAUSE_NUM_WORKERS", "OMP_CLAUSE_VECTOR_LENGTH".
	gcc/
	* omp-offload.c (execute_oacc_device_lower): Remove the
	parallelism dimensions function attributes for unparallelized
	OpenACC kernels constructs.
	gcc/testsuite/
	* c-c++-common/goacc/parallel-dims-1.c: Update.
	* c-c++-common/goacc/parallel-dims-2.c: Likewise.
	* c-c++-common/goacc/routine-1.c: Likewise.
	* c-c++-common/goacc/uninit-dim-clause.c: Likewise.
	* g++.dg/goacc/template.C: Likewise.
	* gfortran.dg/goacc/kernels-tree.f95: Likewise.
	* gfortran.dg/goacc/routine-3.f90: Likewise.
	* gfortran.dg/goacc/sie.f95: Likewise.
	* gfortran.dg/goacc/uninit-dim-clause.f95: Likewise.
	libgomp/
	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-2.c: Update.
	* testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Likewise.
	* testsuite/libgomp.oacc-fortran/kernels-loop-2.f95: Likewise.

From-SVN: r248370
parent 464d0118
2017-05-23 Thomas Schwinge <thomas@codesourcery.com>
* omp-offload.c (execute_oacc_device_lower): Remove the
parallelism dimensions function attributes for unparallelized
OpenACC kernels constructs.
2017-05-23 Martin Liska <mliska@suse.cz> 2017-05-23 Martin Liska <mliska@suse.cz>
* cgraph.c (cgraph_node::get_create): Use symtab_node::dump_{asm_,}name * cgraph.c (cgraph_node::get_create): Use symtab_node::dump_{asm_,}name
......
2017-05-23 Thomas Schwinge <thomas@codesourcery.com>
* c-parser.c (OACC_KERNELS_CLAUSE_MASK): Add
"PRAGMA_OACC_CLAUSE_NUM_GANGS", "PRAGMA_OACC_CLAUSE_NUM_WORKERS",
"VECTOR_LENGTH".
2017-05-23 Marek Polacek <polacek@redhat.com> 2017-05-23 Marek Polacek <polacek@redhat.com>
* c-parser.c (c_parser_compound_statement_nostart): Remove redundant * c-parser.c (c_parser_compound_statement_nostart): Remove redundant
......
...@@ -13984,11 +13984,14 @@ c_parser_oacc_loop (location_t loc, c_parser *parser, char *p_name, ...@@ -13984,11 +13984,14 @@ c_parser_oacc_loop (location_t loc, c_parser *parser, char *p_name,
| (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_DEFAULT) \ | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_DEFAULT) \
| (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_DEVICEPTR) \ | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_DEVICEPTR) \
| (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_IF) \ | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_IF) \
| (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_NUM_GANGS) \
| (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_NUM_WORKERS) \
| (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT) \ | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT) \
| (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT_OR_COPY) \ | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT_OR_COPY) \
| (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT_OR_COPYIN) \ | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT_OR_COPYIN) \
| (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT_OR_COPYOUT) \ | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT_OR_COPYOUT) \
| (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT_OR_CREATE) \ | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT_OR_CREATE) \
| (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_VECTOR_LENGTH) \
| (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_WAIT) ) | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_WAIT) )
#define OACC_PARALLEL_CLAUSE_MASK \ #define OACC_PARALLEL_CLAUSE_MASK \
......
2017-05-23 Thomas Schwinge <thomas@codesourcery.com>
* parser.c (OACC_KERNELS_CLAUSE_MASK): Add
"PRAGMA_OACC_CLAUSE_NUM_GANGS", "PRAGMA_OACC_CLAUSE_NUM_WORKERS",
"VECTOR_LENGTH".
2017-05-23 Nathan Sidwell <nathan@acm.org> 2017-05-23 Nathan Sidwell <nathan@acm.org>
* cp-tree.h (OVL_P): New. * cp-tree.h (OVL_P): New.
......
...@@ -36432,11 +36432,14 @@ cp_parser_oacc_loop (cp_parser *parser, cp_token *pragma_tok, char *p_name, ...@@ -36432,11 +36432,14 @@ cp_parser_oacc_loop (cp_parser *parser, cp_token *pragma_tok, char *p_name,
| (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_DEFAULT) \ | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_DEFAULT) \
| (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_DEVICEPTR) \ | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_DEVICEPTR) \
| (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_IF) \ | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_IF) \
| (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_NUM_GANGS) \
| (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_NUM_WORKERS) \
| (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT) \ | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT) \
| (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT_OR_COPY) \ | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT_OR_COPY) \
| (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT_OR_COPYIN) \ | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT_OR_COPYIN) \
| (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT_OR_COPYOUT) \ | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT_OR_COPYOUT) \
| (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT_OR_CREATE) \ | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT_OR_CREATE) \
| (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_VECTOR_LENGTH) \
| (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_WAIT) ) | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_WAIT) )
#define OACC_PARALLEL_CLAUSE_MASK \ #define OACC_PARALLEL_CLAUSE_MASK \
2017-05-23 Thomas Schwinge <thomas@codesourcery.com>
* openmp.c (OACC_KERNELS_CLAUSES): Add "OMP_CLAUSE_NUM_GANGS",
"OMP_CLAUSE_NUM_WORKERS", "OMP_CLAUSE_VECTOR_LENGTH".
2017-05-22 Janus Weil <janus@gcc.gnu.org> 2017-05-22 Janus Weil <janus@gcc.gnu.org>
PR fortran/80766 PR fortran/80766
......
...@@ -1932,7 +1932,8 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, const omp_mask mask, ...@@ -1932,7 +1932,8 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, const omp_mask mask,
| OMP_CLAUSE_PRESENT_OR_CREATE | OMP_CLAUSE_DEVICEPTR | OMP_CLAUSE_PRIVATE \ | OMP_CLAUSE_PRESENT_OR_CREATE | OMP_CLAUSE_DEVICEPTR | OMP_CLAUSE_PRIVATE \
| OMP_CLAUSE_FIRSTPRIVATE | OMP_CLAUSE_DEFAULT | OMP_CLAUSE_WAIT) | OMP_CLAUSE_FIRSTPRIVATE | OMP_CLAUSE_DEFAULT | OMP_CLAUSE_WAIT)
#define OACC_KERNELS_CLAUSES \ #define OACC_KERNELS_CLAUSES \
(omp_mask (OMP_CLAUSE_IF) | OMP_CLAUSE_ASYNC | OMP_CLAUSE_DEVICEPTR \ (omp_mask (OMP_CLAUSE_IF) | OMP_CLAUSE_ASYNC | OMP_CLAUSE_NUM_GANGS \
| OMP_CLAUSE_NUM_WORKERS | OMP_CLAUSE_VECTOR_LENGTH | OMP_CLAUSE_DEVICEPTR \
| OMP_CLAUSE_COPY | OMP_CLAUSE_COPYIN | OMP_CLAUSE_COPYOUT \ | OMP_CLAUSE_COPY | OMP_CLAUSE_COPYIN | OMP_CLAUSE_COPYOUT \
| OMP_CLAUSE_CREATE | OMP_CLAUSE_PRESENT | OMP_CLAUSE_PRESENT_OR_COPY \ | OMP_CLAUSE_CREATE | OMP_CLAUSE_PRESENT | OMP_CLAUSE_PRESENT_OR_COPY \
| OMP_CLAUSE_PRESENT_OR_COPYIN | OMP_CLAUSE_PRESENT_OR_COPYOUT \ | OMP_CLAUSE_PRESENT_OR_COPYIN | OMP_CLAUSE_PRESENT_OR_COPYOUT \
......
...@@ -1451,6 +1451,15 @@ execute_oacc_device_lower () ...@@ -1451,6 +1451,15 @@ execute_oacc_device_lower ()
= (lookup_attribute ("oacc kernels parallelized", = (lookup_attribute ("oacc kernels parallelized",
DECL_ATTRIBUTES (current_function_decl)) != NULL); DECL_ATTRIBUTES (current_function_decl)) != NULL);
/* Unparallelized OpenACC kernels constructs must get launched as 1 x 1 x 1
kernels, so remove the parallelism dimensions function attributes
potentially set earlier on. */
if (is_oacc_kernels && !is_oacc_kernels_parallelized)
{
oacc_set_fn_attrib (current_function_decl, NULL, NULL);
attrs = oacc_get_fn_attrib (current_function_decl);
}
/* Discover, partition and process the loops. */ /* Discover, partition and process the loops. */
oacc_loop *loops = oacc_loop_discovery (); oacc_loop *loops = oacc_loop_discovery ();
int fn_level = oacc_fn_attrib_level (attrs); int fn_level = oacc_fn_attrib_level (attrs);
......
2017-05-23 Thomas Schwinge <thomas@codesourcery.com>
* c-c++-common/goacc/parallel-dims-1.c: Update.
* c-c++-common/goacc/parallel-dims-2.c: Likewise.
* c-c++-common/goacc/routine-1.c: Likewise.
* c-c++-common/goacc/uninit-dim-clause.c: Likewise.
* g++.dg/goacc/template.C: Likewise.
* gfortran.dg/goacc/kernels-tree.f95: Likewise.
* gfortran.dg/goacc/routine-3.f90: Likewise.
* gfortran.dg/goacc/sie.f95: Likewise.
* gfortran.dg/goacc/uninit-dim-clause.f95: Likewise.
2017-05-23 Nathan Sidwell <nathan@acm.org> 2017-05-23 Nathan Sidwell <nathan@acm.org>
* g++.dg/lookup/using13.C: Adjust expected error. * g++.dg/lookup/using13.C: Adjust expected error.
......
...@@ -3,6 +3,9 @@ ...@@ -3,6 +3,9 @@
void f(int i) void f(int i)
{ {
#pragma acc kernels num_gangs(i) num_workers(i) vector_length(i)
;
#pragma acc parallel num_gangs(i) num_workers(i) vector_length(i) #pragma acc parallel num_gangs(i) num_workers(i) vector_length(i)
; ;
} }
...@@ -21,6 +21,13 @@ void seq (void) ...@@ -21,6 +21,13 @@ void seq (void)
int main () int main ()
{ {
#pragma acc kernels num_gangs (32) num_workers (32) vector_length (32)
{
gang ();
worker ();
vector ();
seq ();
}
#pragma acc parallel num_gangs (32) num_workers (32) vector_length (32) #pragma acc parallel num_gangs (32) num_workers (32) vector_length (32)
{ {
......
/* { dg-do compile } */
/* { dg-additional-options "-Wuninitialized" } */ /* { dg-additional-options "-Wuninitialized" } */
#include <stdbool.h> void acc_parallel()
int
main (void)
{ {
int i, j, k; int i, j, k;
...@@ -17,3 +13,17 @@ main (void) ...@@ -17,3 +13,17 @@ main (void)
#pragma acc parallel vector_length(k) /* { dg-warning "is used uninitialized in this function" } */ #pragma acc parallel vector_length(k) /* { dg-warning "is used uninitialized in this function" } */
; ;
} }
void acc_kernels()
{
int i, j, k;
#pragma acc kernels num_gangs(i) /* { dg-warning "is used uninitialized in this function" } */
;
#pragma acc kernels num_workers(j) /* { dg-warning "is used uninitialized in this function" } */
;
#pragma acc kernels vector_length(k) /* { dg-warning "is used uninitialized in this function" } */
;
}
...@@ -100,6 +100,10 @@ oacc_kernels_copy (T a) ...@@ -100,6 +100,10 @@ oacc_kernels_copy (T a)
float y = 3; float y = 3;
double z = 4; double z = 4;
#pragma acc kernels num_gangs (a) num_workers (a) vector_length (a) default (none) copyout (b) copyin (a)
for (int i = 0; i < 1; i++)
b = a;
#pragma acc kernels copy (w, x, y, z) #pragma acc kernels copy (w, x, y, z)
{ {
w = accDouble<char>(w); w = accDouble<char>(w);
......
...@@ -6,7 +6,8 @@ program test ...@@ -6,7 +6,8 @@ program test
integer :: q, i, j, k, m, n, o, p, r, s, t, u, v, w integer :: q, i, j, k, m, n, o, p, r, s, t, u, v, w
logical :: l = .true. logical :: l = .true.
!$acc kernels if(l) async copy(i), copyin(j), copyout(k), create(m) & !$acc kernels if(l) async num_gangs(i) num_workers(i) vector_length(i) &
!$acc copy(i), copyin(j), copyout(k), create(m) &
!$acc present(o), pcopy(p), pcopyin(r), pcopyout(s), pcreate(t) & !$acc present(o), pcopy(p), pcopyin(r), pcopyout(s), pcreate(t) &
!$acc deviceptr(u) !$acc deviceptr(u)
!$acc end kernels !$acc end kernels
...@@ -16,6 +17,9 @@ end program test ...@@ -16,6 +17,9 @@ end program test
! { dg-final { scan-tree-dump-times "if" 1 "original" } } ! { dg-final { scan-tree-dump-times "if" 1 "original" } }
! { dg-final { scan-tree-dump-times "async" 1 "original" } } ! { dg-final { scan-tree-dump-times "async" 1 "original" } }
! { dg-final { scan-tree-dump-times "num_gangs" 1 "original" } }
! { dg-final { scan-tree-dump-times "num_workers" 1 "original" } }
! { dg-final { scan-tree-dump-times "vector_length" 1 "original" } }
! { dg-final { scan-tree-dump-times "map\\(force_tofrom:i\\)" 1 "original" } } ! { dg-final { scan-tree-dump-times "map\\(force_tofrom:i\\)" 1 "original" } }
! { dg-final { scan-tree-dump-times "map\\(force_to:j\\)" 1 "original" } } ! { dg-final { scan-tree-dump-times "map\\(force_to:j\\)" 1 "original" } }
......
...@@ -4,6 +4,12 @@ CONTAINS ...@@ -4,6 +4,12 @@ CONTAINS
INTEGER :: i INTEGER :: i
REAL(KIND=8), ALLOCATABLE :: un(:), ua(:) REAL(KIND=8), ALLOCATABLE :: un(:), ua(:)
!$acc kernels num_gangs(2) num_workers(4) vector_length(32)
DO jj = 1, 100
un(i) = ua(i)
END DO
!$acc end kernels
!$acc parallel num_gangs(2) num_workers(4) vector_length(32) !$acc parallel num_gangs(2) num_workers(4) vector_length(32)
DO jj = 1, 100 DO jj = 1, 100
un(i) = ua(i) un(i) = ua(i)
......
...@@ -95,6 +95,34 @@ program test ...@@ -95,6 +95,34 @@ program test
!$acc parallel num_gangs("1") ! { dg-error "scalar INTEGER expression" } !$acc parallel num_gangs("1") ! { dg-error "scalar INTEGER expression" }
!$acc end parallel !$acc end parallel
!$acc kernels num_gangs ! { dg-error "Unclassifiable OpenACC directive" }
!$acc kernels num_gangs(3)
!$acc end kernels
!$acc kernels num_gangs(i)
!$acc end kernels
!$acc kernels num_gangs(i+1)
!$acc end kernels
!$acc kernels num_gangs(-1) ! { dg-warning "must be positive" }
!$acc end kernels
!$acc kernels num_gangs(0) ! { dg-warning "must be positive" }
!$acc end kernels
!$acc kernels num_gangs() ! { dg-error "Invalid character in name" }
!$acc kernels num_gangs(1.5) ! { dg-error "scalar INTEGER expression" }
!$acc end kernels
!$acc kernels num_gangs(.true.) ! { dg-error "scalar INTEGER expression" }
!$acc end kernels
!$acc kernels num_gangs("1") ! { dg-error "scalar INTEGER expression" }
!$acc end kernels
!$acc parallel num_workers ! { dg-error "Unclassifiable OpenACC directive" } !$acc parallel num_workers ! { dg-error "Unclassifiable OpenACC directive" }
...@@ -124,6 +152,34 @@ program test ...@@ -124,6 +152,34 @@ program test
!$acc parallel num_workers("1") ! { dg-error "scalar INTEGER expression" } !$acc parallel num_workers("1") ! { dg-error "scalar INTEGER expression" }
!$acc end parallel !$acc end parallel
!$acc kernels num_workers ! { dg-error "Unclassifiable OpenACC directive" }
!$acc kernels num_workers(3)
!$acc end kernels
!$acc kernels num_workers(i)
!$acc end kernels
!$acc kernels num_workers(i+1)
!$acc end kernels
!$acc kernels num_workers(-1) ! { dg-warning "must be positive" }
!$acc end kernels
!$acc kernels num_workers(0) ! { dg-warning "must be positive" }
!$acc end kernels
!$acc kernels num_workers() ! { dg-error "Invalid character in name" }
!$acc kernels num_workers(1.5) ! { dg-error "scalar INTEGER expression" }
!$acc end kernels
!$acc kernels num_workers(.true.) ! { dg-error "scalar INTEGER expression" }
!$acc end kernels
!$acc kernels num_workers("1") ! { dg-error "scalar INTEGER expression" }
!$acc end kernels
!$acc parallel vector_length ! { dg-error "Unclassifiable OpenACC directive" } !$acc parallel vector_length ! { dg-error "Unclassifiable OpenACC directive" }
...@@ -153,6 +209,34 @@ program test ...@@ -153,6 +209,34 @@ program test
!$acc parallel vector_length("1") ! { dg-error "scalar INTEGER expression" } !$acc parallel vector_length("1") ! { dg-error "scalar INTEGER expression" }
!$acc end parallel !$acc end parallel
!$acc kernels vector_length ! { dg-error "Unclassifiable OpenACC directive" }
!$acc kernels vector_length(3)
!$acc end kernels
!$acc kernels vector_length(i)
!$acc end kernels
!$acc kernels vector_length(i+1)
!$acc end kernels
!$acc kernels vector_length(-1) ! { dg-warning "must be positive" }
!$acc end kernels
!$acc kernels vector_length(0) ! { dg-warning "must be positive" }
!$acc end kernels
!$acc kernels vector_length() ! { dg-error "Invalid character in name" }
!$acc kernels vector_length(1.5) ! { dg-error "scalar INTEGER expression" }
!$acc end kernels
!$acc kernels vector_length(.true.) ! { dg-error "scalar INTEGER expression" }
!$acc end kernels
!$acc kernels vector_length("1") ! { dg-error "scalar INTEGER expression" }
!$acc end kernels
!$acc loop gang !$acc loop gang
do i = 1,10 do i = 1,10
...@@ -249,4 +333,4 @@ program test ...@@ -249,4 +333,4 @@ program test
do i = 1,10 do i = 1,10
enddo enddo
end program test end program test
\ No newline at end of file
! { dg-do compile }
! { dg-additional-options "-Wuninitialized" } ! { dg-additional-options "-Wuninitialized" }
program test subroutine acc_parallel
implicit none implicit none
integer :: i, j, k integer :: i, j, k
...@@ -13,5 +12,18 @@ program test ...@@ -13,5 +12,18 @@ program test
!$acc parallel vector_length(k) ! { dg-warning "is used uninitialized in this function" } !$acc parallel vector_length(k) ! { dg-warning "is used uninitialized in this function" }
!$acc end parallel !$acc end parallel
end subroutine acc_parallel
end program test subroutine acc_kernels
implicit none
integer :: i, j, k
!$acc kernels num_gangs(i) ! { dg-warning "is used uninitialized in this function" }
!$acc end kernels
!$acc kernels num_workers(j) ! { dg-warning "is used uninitialized in this function" }
!$acc end kernels
!$acc kernels vector_length(k) ! { dg-warning "is used uninitialized in this function" }
!$acc end kernels
end subroutine acc_kernels
2017-05-23 Thomas Schwinge <thomas@codesourcery.com> 2017-05-23 Thomas Schwinge <thomas@codesourcery.com>
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-2.c: Update.
* testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Likewise.
* testsuite/libgomp.oacc-fortran/kernels-loop-2.f95: Likewise.
* testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Rewrite. * testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Rewrite.
* testsuite/lib/libgomp.exp * testsuite/lib/libgomp.exp
(check_effective_target_openacc_nvidia_accel_configured): New (check_effective_target_openacc_nvidia_accel_configured): New
......
...@@ -14,27 +14,40 @@ main (void) ...@@ -14,27 +14,40 @@ main (void)
b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int)); b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int)); c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
/* Parallelism dimensions: compiler/runtime decides. */
#pragma acc kernels copyout (a[0:N]) #pragma acc kernels copyout (a[0:N])
{ {
for (COUNTERTYPE i = 0; i < N; i++) for (COUNTERTYPE i = 0; i < N; i++)
a[i] = i * 2; a[i] = i * 2;
} }
#pragma acc kernels copyout (b[0:N]) /* Parallelism dimensions: variable. */
#pragma acc kernels copyout (b[0:N]) \
num_gangs (3 + a[3]) num_workers (5 + a[5]) vector_length (7 + a[7])
/* { dg-prune-output "using vector_length \\(32\\), ignoring runtime setting" } */
{ {
for (COUNTERTYPE i = 0; i < N; i++) for (COUNTERTYPE i = 0; i < N; i++)
b[i] = i * 4; b[i] = i * 4;
} }
#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N]) /* Parallelism dimensions: literal. */
#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N]) \
num_gangs (3) num_workers (5) vector_length (7)
/* { dg-prune-output "using vector_length \\(32\\), ignoring 7" } */
{ {
for (COUNTERTYPE ii = 0; ii < N; ii++) for (COUNTERTYPE ii = 0; ii < N; ii++)
c[ii] = a[ii] + b[ii]; c[ii] = a[ii] + b[ii];
} }
for (COUNTERTYPE i = 0; i < N; i++) for (COUNTERTYPE i = 0; i < N; i++)
if (c[i] != a[i] + b[i]) {
abort (); if (a[i] != i * 2)
abort ();
if (b[i] != i * 4)
abort ();
if (c[i] != a[i] + b[i])
abort ();
}
free (a); free (a);
free (b); free (b);
......
...@@ -520,5 +520,40 @@ int main () ...@@ -520,5 +520,40 @@ int main ()
} }
/* Unparallelized OpenACC kernels constructs must get launched as 1 x 1 x 1
kernels even when there are explicit num_gangs, num_workers, or
vector_length clauses. */
{
int gangs = 5;
#define WORKERS 5
#define VECTORS 13
int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
gangs_min = workers_min = vectors_min = INT_MAX;
gangs_max = workers_max = vectors_max = INT_MIN;
#pragma acc kernels \
num_gangs (gangs) \
num_workers (WORKERS) \
vector_length (VECTORS)
{
/* This is to make the OpenACC kernels construct unparallelizable. */
asm volatile ("" : : : "memory");
#pragma acc loop reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
for (int i = 100; i > -100; --i)
{
gangs_min = gangs_max = acc_gang ();
workers_min = workers_max = acc_worker ();
vectors_min = vectors_max = acc_vector ();
}
}
if (gangs_min != 0 || gangs_max != 1 - 1
|| workers_min != 0 || workers_max != 1 - 1
|| vectors_min != 0 || vectors_max != 1 - 1)
__builtin_abort ();
#undef VECTORS
#undef WORKERS
}
return 0; return 0;
} }
...@@ -6,25 +6,34 @@ program main ...@@ -6,25 +6,34 @@ program main
integer, dimension (0:n-1) :: a, b, c integer, dimension (0:n-1) :: a, b, c
integer :: i, ii integer :: i, ii
! Parallelism dimensions: compiler/runtime decides.
!$acc kernels copyout (a(0:n-1)) !$acc kernels copyout (a(0:n-1))
do i = 0, n - 1 do i = 0, n - 1
a(i) = i * 2 a(i) = i * 2
end do end do
!$acc end kernels !$acc end kernels
!$acc kernels copyout (b(0:n-1)) ! Parallelism dimensions: variable.
!$acc kernels copyout (b(0:n-1)) &
!$acc num_gangs (3 + a(3)) num_workers (5 + a(5)) vector_length (7 + a(7))
! { dg-prune-output "using vector_length \\(32\\), ignoring runtime setting" }
do i = 0, n -1 do i = 0, n -1
b(i) = i * 4 b(i) = i * 4
end do end do
!$acc end kernels !$acc end kernels
!$acc kernels copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1)) ! Parallelism dimensions: literal.
!$acc kernels copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1)) &
!$acc num_gangs (3) num_workers (5) vector_length (7)
! { dg-prune-output "using vector_length \\(32\\), ignoring 7" }
do ii = 0, n - 1 do ii = 0, n - 1
c(ii) = a(ii) + b(ii) c(ii) = a(ii) + b(ii)
end do end do
!$acc end kernels !$acc end kernels
do i = 0, n - 1 do i = 0, n - 1
if (a(i) .ne. i * 2) call abort
if (b(i) .ne. i * 4) call abort
if (c(i) .ne. a(i) + b(i)) call abort if (c(i) .ne. a(i) + b(i)) call abort
end do end do
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment