Commit c42cfb5c by Cesar Philippidis Committed by Cesar Philippidis

re PR lto/70289 ([openacc] ICE in input_varpool_node)

	gcc/
	PR lto/70289
	PR ipa/70348
	PR tree-optimization/70373
	PR middle-end/70533
	PR middle-end/70534
	PR middle-end/70535
	* gimplify.c (gimplify_adjust_omp_clauses): Add or adjust data
	clauses for acc parallel reductions as necessary.  Error on those
	that are private.
	* omp-low.c (scan_sharing_clauses): Don't install variables which
	are used in acc parallel reductions.
	(lower_rec_input_clauses): Remove dead code.
	(lower_oacc_reductions): Add support for reference reductions.
	(lower_reduction_clauses): Remove dead code.
	(lower_omp_target): Don't remap variables appearing in acc parallel
	reductions.
	* tree.h (OMP_CLAUSE_MAP_IN_REDUCTION): New macro.

	gcc/testsuite/
	* c-c++-common/goacc/reduction-5.c: New test.
	* c-c++-common/goacc/reduction-promotions.c: New test.
	* gfortran.dg/goacc/reduction-3.f95: New test.
	* gfortran.dg/goacc/reduction-promotions.f90: New test.

	libgomp/
	* testsuite/libgomp.oacc-c-c++-common/loop-reduction-gang-np-1.c: New
	test.
	* testsuite/libgomp.oacc-c-c++-common/loop-reduction-gw-np-1.c: New
	test.
	* testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-1.c: New
	test.
	* testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-2.c: New
	test.
	* testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-3.c: New
	test.
	* testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-4.c: New
	test.
	* testsuite/libgomp.oacc-c-c++-common/loop-reduction-vector-p-1.c: New
	test.
	* testsuite/libgomp.oacc-c-c++-common/loop-reduction-vector-p-2.c: New
	test.
	* testsuite/libgomp.oacc-c-c++-common/loop-reduction-worker-p-1.c: New
	test.
	* testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-1.c: New test.
	* testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-2.c: New test.
	* testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-3.c: New test.
	* testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-1.c: New
	test.
	* testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-2.c: New
	test.
	* testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-3.c: New
	test.
	* testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-4.c: New
	test.
	* testsuite/libgomp.oacc-c-c++-common/par-reduction-1.c: Add test
	coverage.
	* testsuite/libgomp.oacc-c-c++-common/par-reduction-2.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/parallel-reduction.c: New test.
	* testsuite/libgomp.oacc-c-c++-common/pr70289.c: New test.
	* testsuite/libgomp.oacc-c-c++-common/pr70373.c: New test.
	* testsuite/libgomp.oacc-c-c++-common/reduction-1.c: Add test
	coverage.
	* testsuite/libgomp.oacc-c-c++-common/reduction-2.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/reduction-3.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/reduction-4.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/reduction-5.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/reduction-6.c: New test.
	* testsuite/libgomp.oacc-c-c++-common/reduction.h: New test.
	* testsuite/libgomp.oacc-fortran/parallel-reduction.f90: New test.
	* testsuite/libgomp.oacc-fortran/pr70289.f90: New test.
	* testsuite/libgomp.oacc-fortran/reduction-1.f90: Add test coverage.
	* testsuite/libgomp.oacc-fortran/reduction-2.f90: Likewise.
	* testsuite/libgomp.oacc-fortran/reduction-3.f90: Likewise.
	* testsuite/libgomp.oacc-fortran/reduction-4.f90: Likewise.
	* testsuite/libgomp.oacc-fortran/reduction-5.f90: Likewise.
	* testsuite/libgomp.oacc-fortran/reduction-6.f90: Likewise.
	* testsuite/libgomp.oacc-fortran/reduction-7.f90: New test.

From-SVN: r234840
parent 51a4b0f1
2016-04-08 Cesar Philippidis <cesar@codesourcery.com>
PR lto/70289
PR ipa/70348
PR tree-optimization/70373
PR middle-end/70533
PR middle-end/70534
PR middle-end/70535
* gimplify.c (gimplify_adjust_omp_clauses): Add or adjust data
clauses for acc parallel reductions as necessary. Error on those
that are private.
* omp-low.c (scan_sharing_clauses): Don't install variables which
are used in acc parallel reductions.
(lower_rec_input_clauses): Remove dead code.
(lower_oacc_reductions): Add support for reference reductions.
(lower_reduction_clauses): Remove dead code.
(lower_omp_target): Don't remap variables appearing in acc parallel
reductions.
* tree.h (OMP_CLAUSE_MAP_IN_REDUCTION): New macro.
2016-04-08 Jakub Jelinek <jakub@redhat.com> 2016-04-08 Jakub Jelinek <jakub@redhat.com>
PR middle-end/70593 PR middle-end/70593
......
...@@ -7987,6 +7987,34 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, gimple_seq body, tree *list_p, ...@@ -7987,6 +7987,34 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, gimple_seq body, tree *list_p,
break; break;
} }
decl = OMP_CLAUSE_DECL (c); decl = OMP_CLAUSE_DECL (c);
/* Data clasues associated with acc parallel reductions must be
compatible with present_or_copy. Warn and adjust the clause
if that is not the case. */
if (ctx->region_type == ORT_ACC_PARALLEL)
{
tree t = DECL_P (decl) ? decl : TREE_OPERAND (decl, 0);
n = NULL;
if (DECL_P (t))
n = splay_tree_lookup (ctx->variables, (splay_tree_key) t);
if (n && (n->value & GOVD_REDUCTION))
{
enum gomp_map_kind kind = OMP_CLAUSE_MAP_KIND (c);
OMP_CLAUSE_MAP_IN_REDUCTION (c) = 1;
if ((kind & GOMP_MAP_TOFROM) != GOMP_MAP_TOFROM
&& kind != GOMP_MAP_FORCE_PRESENT
&& kind != GOMP_MAP_POINTER)
{
warning_at (OMP_CLAUSE_LOCATION (c), 0,
"incompatible data clause with reduction "
"on %qE; promoting to present_or_copy",
DECL_NAME (t));
OMP_CLAUSE_SET_MAP_KIND (c, GOMP_MAP_TOFROM);
}
}
}
if (!DECL_P (decl)) if (!DECL_P (decl))
{ {
if ((ctx->region_type & ORT_TARGET) != 0 if ((ctx->region_type & ORT_TARGET) != 0
...@@ -8118,6 +8146,33 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, gimple_seq body, tree *list_p, ...@@ -8118,6 +8146,33 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, gimple_seq body, tree *list_p,
case OMP_CLAUSE_REDUCTION: case OMP_CLAUSE_REDUCTION:
decl = OMP_CLAUSE_DECL (c); decl = OMP_CLAUSE_DECL (c);
/* OpenACC reductions need a present_or_copy data clause.
Add one if necessary. Error is the reduction is private. */
if (ctx->region_type == ORT_ACC_PARALLEL)
{
n = splay_tree_lookup (ctx->variables, (splay_tree_key) decl);
if (n->value & (GOVD_PRIVATE | GOVD_FIRSTPRIVATE))
error_at (OMP_CLAUSE_LOCATION (c), "invalid private "
"reduction on %qE", DECL_NAME (decl));
else if ((n->value & GOVD_MAP) == 0)
{
tree next = OMP_CLAUSE_CHAIN (c);
tree nc = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_MAP);
OMP_CLAUSE_SET_MAP_KIND (nc, GOMP_MAP_TOFROM);
OMP_CLAUSE_DECL (nc) = decl;
OMP_CLAUSE_CHAIN (c) = nc;
lang_hooks.decls.omp_finish_clause (nc, pre_p);
while (1)
{
OMP_CLAUSE_MAP_IN_REDUCTION (nc) = 1;
if (OMP_CLAUSE_CHAIN (nc) == NULL)
break;
nc = OMP_CLAUSE_CHAIN (nc);
}
OMP_CLAUSE_CHAIN (nc) = next;
n->value |= GOVD_MAP;
}
}
if (DECL_P (decl) if (DECL_P (decl)
&& omp_shared_to_firstprivate_optimizable_decl_p (decl)) && omp_shared_to_firstprivate_optimizable_decl_p (decl))
omp_mark_stores (gimplify_omp_ctxp->outer_context, decl); omp_mark_stores (gimplify_omp_ctxp->outer_context, decl);
......
...@@ -2122,7 +2122,8 @@ scan_sharing_clauses (tree clauses, omp_context *ctx, ...@@ -2122,7 +2122,8 @@ scan_sharing_clauses (tree clauses, omp_context *ctx,
else else
install_var_field (decl, true, 3, ctx, install_var_field (decl, true, 3, ctx,
base_pointers_restrict); base_pointers_restrict);
if (is_gimple_omp_offloaded (ctx->stmt)) if (is_gimple_omp_offloaded (ctx->stmt)
&& !OMP_CLAUSE_MAP_IN_REDUCTION (c))
install_var_local (decl, ctx); install_var_local (decl, ctx);
} }
} }
...@@ -4839,7 +4840,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, ...@@ -4839,7 +4840,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
gimplify_assign (ptr, x, ilist); gimplify_assign (ptr, x, ilist);
} }
} }
else if (is_reference (var) && !is_oacc_parallel (ctx)) else if (is_reference (var))
{ {
/* For references that are being privatized for Fortran, /* For references that are being privatized for Fortran,
allocate new backing storage for the new pointer allocate new backing storage for the new pointer
...@@ -5575,7 +5576,8 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner, ...@@ -5575,7 +5576,8 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
tree orig = OMP_CLAUSE_DECL (c); tree orig = OMP_CLAUSE_DECL (c);
tree var = maybe_lookup_decl (orig, ctx); tree var = maybe_lookup_decl (orig, ctx);
tree ref_to_res = NULL_TREE; tree ref_to_res = NULL_TREE;
tree incoming, outgoing; tree incoming, outgoing, v1, v2, v3;
bool is_private = false;
enum tree_code rcode = OMP_CLAUSE_REDUCTION_CODE (c); enum tree_code rcode = OMP_CLAUSE_REDUCTION_CODE (c);
if (rcode == MINUS_EXPR) if (rcode == MINUS_EXPR)
...@@ -5588,7 +5590,6 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner, ...@@ -5588,7 +5590,6 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
if (!var) if (!var)
var = orig; var = orig;
gcc_assert (!is_reference (var));
incoming = outgoing = var; incoming = outgoing = var;
...@@ -5624,22 +5625,38 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner, ...@@ -5624,22 +5625,38 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
for (; cls; cls = OMP_CLAUSE_CHAIN (cls)) for (; cls; cls = OMP_CLAUSE_CHAIN (cls))
if (OMP_CLAUSE_CODE (cls) == OMP_CLAUSE_REDUCTION if (OMP_CLAUSE_CODE (cls) == OMP_CLAUSE_REDUCTION
&& orig == OMP_CLAUSE_DECL (cls)) && orig == OMP_CLAUSE_DECL (cls))
goto has_outer_reduction; {
incoming = outgoing = lookup_decl (orig, probe);
goto has_outer_reduction;
}
else if ((OMP_CLAUSE_CODE (cls) == OMP_CLAUSE_FIRSTPRIVATE
|| OMP_CLAUSE_CODE (cls) == OMP_CLAUSE_PRIVATE)
&& orig == OMP_CLAUSE_DECL (cls))
{
is_private = true;
goto do_lookup;
}
} }
do_lookup: do_lookup:
/* This is the outermost construct with this reduction, /* This is the outermost construct with this reduction,
see if there's a mapping for it. */ see if there's a mapping for it. */
if (gimple_code (outer->stmt) == GIMPLE_OMP_TARGET if (gimple_code (outer->stmt) == GIMPLE_OMP_TARGET
&& maybe_lookup_field (orig, outer)) && maybe_lookup_field (orig, outer) && !is_private)
{ {
ref_to_res = build_receiver_ref (orig, false, outer); ref_to_res = build_receiver_ref (orig, false, outer);
if (is_reference (orig)) if (is_reference (orig))
ref_to_res = build_simple_mem_ref (ref_to_res); ref_to_res = build_simple_mem_ref (ref_to_res);
tree type = TREE_TYPE (var);
if (POINTER_TYPE_P (type))
type = TREE_TYPE (type);
outgoing = var; outgoing = var;
incoming = omp_reduction_init_op (loc, rcode, TREE_TYPE (var)); incoming = omp_reduction_init_op (loc, rcode, type);
} }
else if (ctx->outer)
incoming = outgoing = lookup_decl (orig, ctx->outer);
else else
incoming = outgoing = orig; incoming = outgoing = orig;
...@@ -5649,6 +5666,37 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner, ...@@ -5649,6 +5666,37 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
if (!ref_to_res) if (!ref_to_res)
ref_to_res = integer_zero_node; ref_to_res = integer_zero_node;
if (is_reference (orig))
{
tree type = TREE_TYPE (var);
const char *id = IDENTIFIER_POINTER (DECL_NAME (var));
if (!inner)
{
tree x = create_tmp_var (TREE_TYPE (type), id);
gimplify_assign (var, build_fold_addr_expr (x), fork_seq);
}
v1 = create_tmp_var (type, id);
v2 = create_tmp_var (type, id);
v3 = create_tmp_var (type, id);
gimplify_assign (v1, var, fork_seq);
gimplify_assign (v2, var, fork_seq);
gimplify_assign (v3, var, fork_seq);
var = build_simple_mem_ref (var);
v1 = build_simple_mem_ref (v1);
v2 = build_simple_mem_ref (v2);
v3 = build_simple_mem_ref (v3);
outgoing = build_simple_mem_ref (outgoing);
if (TREE_CODE (incoming) != INTEGER_CST)
incoming = build_simple_mem_ref (incoming);
}
else
v1 = v2 = v3 = var;
/* Determine position in reduction buffer, which may be used /* Determine position in reduction buffer, which may be used
by target. */ by target. */
enum machine_mode mode = TYPE_MODE (TREE_TYPE (var)); enum machine_mode mode = TYPE_MODE (TREE_TYPE (var));
...@@ -5678,20 +5726,20 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner, ...@@ -5678,20 +5726,20 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
= build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION, = build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION,
TREE_TYPE (var), 6, init_code, TREE_TYPE (var), 6, init_code,
unshare_expr (ref_to_res), unshare_expr (ref_to_res),
var, level, op, off); v1, level, op, off);
tree fini_call tree fini_call
= build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION, = build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION,
TREE_TYPE (var), 6, fini_code, TREE_TYPE (var), 6, fini_code,
unshare_expr (ref_to_res), unshare_expr (ref_to_res),
var, level, op, off); v2, level, op, off);
tree teardown_call tree teardown_call
= build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION, = build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION,
TREE_TYPE (var), 6, teardown_code, TREE_TYPE (var), 6, teardown_code,
ref_to_res, var, level, op, off); ref_to_res, v3, level, op, off);
gimplify_assign (var, setup_call, &before_fork); gimplify_assign (v1, setup_call, &before_fork);
gimplify_assign (var, init_call, &after_fork); gimplify_assign (v2, init_call, &after_fork);
gimplify_assign (var, fini_call, &before_join); gimplify_assign (v3, fini_call, &before_join);
gimplify_assign (outgoing, teardown_call, &after_join); gimplify_assign (outgoing, teardown_call, &after_join);
} }
...@@ -5933,9 +5981,6 @@ lower_reduction_clauses (tree clauses, gimple_seq *stmt_seqp, omp_context *ctx) ...@@ -5933,9 +5981,6 @@ lower_reduction_clauses (tree clauses, gimple_seq *stmt_seqp, omp_context *ctx)
} }
} }
if (is_gimple_omp_oacc (ctx->stmt))
return;
stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START), stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START),
0); 0);
gimple_seq_add_stmt (stmt_seqp, stmt); gimple_seq_add_stmt (stmt_seqp, stmt);
...@@ -15829,7 +15874,10 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) ...@@ -15829,7 +15874,10 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
if (!maybe_lookup_field (var, ctx)) if (!maybe_lookup_field (var, ctx))
continue; continue;
if (offloaded) /* Don't remap oacc parallel reduction variables, because the
intermediate result must be local to each gang. */
if (offloaded && !(OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
&& OMP_CLAUSE_MAP_IN_REDUCTION (c)))
{ {
x = build_receiver_ref (var, true, ctx); x = build_receiver_ref (var, true, ctx);
tree new_var = lookup_decl (var, ctx); tree new_var = lookup_decl (var, ctx);
......
2016-04-08 Cesar Philippidis <cesar@codesourcery.com>
PR lto/70289
PR ipa/70348
PR tree-optimization/70373
PR middle-end/70533
PR middle-end/70534
PR middle-end/70535
* c-c++-common/goacc/reduction-5.c: New test.
* c-c++-common/goacc/reduction-promotions.c: New test.
* gfortran.dg/goacc/reduction-3.f95: New test.
* gfortran.dg/goacc/reduction-promotions.f90: New test.
2016-04-08 Patrick Palka <ppalka@gcc.gnu.org> 2016-04-08 Patrick Palka <ppalka@gcc.gnu.org>
PR c++/70590 PR c++/70590
......
/* Integer reductions. */
#define n 1000
int
main(void)
{
int v1;
#pragma acc parallel reduction(+:v1) private(v1) /* { dg-error "appears more than once in data clauses" } */
;
#pragma acc parallel reduction(+:v1) firstprivate(v1) /* { dg-error "appears more than once in data clauses" } */
;
return 0;
}
/* Integer reductions. */
#define n 1000
int
main(void)
{
int v1, v2;
#pragma acc parallel reduction(+:v1,v2)
;
#pragma acc parallel reduction(+:v1,v2) copy(v1,v2)
;
#pragma acc parallel reduction(+:v1,v2) pcopy(v1,v2)
;
#pragma acc parallel reduction(+:v1,v2) present(v1,v2)
;
#pragma acc parallel reduction(+:v1,v2) copyin(v1,v2) /* { dg-warning "incompatible data clause with reduction" } */
;
#pragma acc parallel reduction(+:v1,v2) pcopyin(v1,v2) /* { dg-warning "incompatible data clause with reduction" } */
;
#pragma acc parallel reduction(+:v1,v2) copyout(v1,v2) /* { dg-warning "incompatible data clause with reduction" } */
;
#pragma acc parallel reduction(+:v1,v2) pcopyout(v1,v2) /* { dg-warning "incompatible data clause with reduction" } */
;
#pragma acc parallel reduction(+:v1,v2) create(v1,v2) /* { dg-warning "incompatible data clause with reduction" } */
;
#pragma acc parallel reduction(+:v1,v2) pcreate(v1,v2) /* { dg-warning "incompatible data clause with reduction" } */
;
return 0;
}
! { dg-do compile }
subroutine foo (ia1)
integer :: i1, i2
!$acc parallel reduction (+:i1) private(i1) ! { dg-error "invalid private reduction on .i1." }
!$acc end parallel
!$acc parallel reduction (+:i2) firstprivate(i2) ! { dg-error "invalid private reduction on .i2." }
!$acc end parallel
end subroutine foo
! Ensure that each parallel reduction variable as a copy or pcopy
! data clause.
! { dg-additional-options "-fdump-tree-gimple" }
program test
implicit none
integer :: v1, v2
!$acc parallel reduction(+:v1,v2)
!$acc end parallel
!$acc parallel reduction(+:v1,v2) copy(v1,v2)
!$acc end parallel
!$acc parallel reduction(+:v1,v2) pcopy(v1,v2)
!$acc end parallel
!$acc parallel reduction(+:v1,v2) present(v1,v2)
!$acc end parallel
!$acc parallel reduction(+:v1,v2) copyin(v1,v2) ! { dg-warning "incompatible data clause" }
!$acc end parallel
!$acc parallel reduction(+:v1,v2) pcopyin(v1,v2) ! { dg-warning "incompatible data clause" }
!$acc end parallel
!$acc parallel reduction(+:v1,v2) copyout(v1,v2) ! { dg-warning "incompatible data clause" }
!$acc end parallel
!$acc parallel reduction(+:v1,v2) pcopyout(v1,v2) ! { dg-warning "incompatible data clause" }
!$acc end parallel
!$acc parallel reduction(+:v1,v2) create(v1,v2) ! { dg-warning "incompatible data clause" }
!$acc end parallel
!$acc parallel reduction(+:v1,v2) pcreate(v1,v2) ! { dg-warning "incompatible data clause" }
!$acc end parallel
end program test
! { dg-final { scan-tree-dump-times "map.tofrom:v1" 8 "gimple" } }
! { dg-final { scan-tree-dump-times "map.tofrom:v2" 8 "gimple" } }
! { dg-final { scan-tree-dump-times "map.force_tofrom:v1" 1 "gimple" } }
! { dg-final { scan-tree-dump-times "map.force_tofrom:v2" 1 "gimple" } }
! { dg-final { scan-tree-dump-times "map.force_present:v1" 1 "gimple" } }
! { dg-final { scan-tree-dump-times "map.force_present:v2" 1 "gimple" } }
...@@ -1536,6 +1536,9 @@ extern void protected_set_expr_location (tree, location_t); ...@@ -1536,6 +1536,9 @@ extern void protected_set_expr_location (tree, location_t);
treatment if OMP_CLAUSE_SIZE is zero. */ treatment if OMP_CLAUSE_SIZE is zero. */
#define OMP_CLAUSE_MAP_MAYBE_ZERO_LENGTH_ARRAY_SECTION(NODE) \ #define OMP_CLAUSE_MAP_MAYBE_ZERO_LENGTH_ARRAY_SECTION(NODE) \
TREE_PROTECTED (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_MAP)) TREE_PROTECTED (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_MAP))
/* Nonzero if this map clause is for an ACC parallel reduction variable. */
#define OMP_CLAUSE_MAP_IN_REDUCTION(NODE) \
TREE_PRIVATE (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_MAP))
#define OMP_CLAUSE_PROC_BIND_KIND(NODE) \ #define OMP_CLAUSE_PROC_BIND_KIND(NODE) \
(OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_PROC_BIND)->omp_clause.subcode.proc_bind_kind) (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_PROC_BIND)->omp_clause.subcode.proc_bind_kind)
......
2016-04-08 Cesar Philippidis <cesar@codesourcery.com>
PR lto/70289
PR ipa/70348
PR tree-optimization/70373
PR middle-end/70533
PR middle-end/70534
PR middle-end/70535
* testsuite/libgomp.oacc-c-c++-common/loop-reduction-gang-np-1.c: New
test.
* testsuite/libgomp.oacc-c-c++-common/loop-reduction-gw-np-1.c: New
test.
* testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-1.c: New
test.
* testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-2.c: New
test.
* testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-3.c: New
test.
* testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-4.c: New
test.
* testsuite/libgomp.oacc-c-c++-common/loop-reduction-vector-p-1.c: New
test.
* testsuite/libgomp.oacc-c-c++-common/loop-reduction-vector-p-2.c: New
test.
* testsuite/libgomp.oacc-c-c++-common/loop-reduction-worker-p-1.c: New
test.
* testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-1.c: New test.
* testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-2.c: New test.
* testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-3.c: New test.
* testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-1.c: New
test.
* testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-2.c: New
test.
* testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-3.c: New
test.
* testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-4.c: New
test.
* testsuite/libgomp.oacc-c-c++-common/par-reduction-1.c: Add test
coverage.
* testsuite/libgomp.oacc-c-c++-common/par-reduction-2.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/parallel-reduction.c: New test.
* testsuite/libgomp.oacc-c-c++-common/pr70289.c: New test.
* testsuite/libgomp.oacc-c-c++-common/pr70373.c: New test.
* testsuite/libgomp.oacc-c-c++-common/reduction-1.c: Add test
coverage.
* testsuite/libgomp.oacc-c-c++-common/reduction-2.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/reduction-3.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/reduction-4.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/reduction-5.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/reduction-6.c: New test.
* testsuite/libgomp.oacc-c-c++-common/reduction.h: New test.
* testsuite/libgomp.oacc-fortran/parallel-reduction.f90: New test.
* testsuite/libgomp.oacc-fortran/pr70289.f90: New test.
* testsuite/libgomp.oacc-fortran/reduction-1.f90: Add test coverage.
* testsuite/libgomp.oacc-fortran/reduction-2.f90: Likewise.
* testsuite/libgomp.oacc-fortran/reduction-3.f90: Likewise.
* testsuite/libgomp.oacc-fortran/reduction-4.f90: Likewise.
* testsuite/libgomp.oacc-fortran/reduction-5.f90: Likewise.
* testsuite/libgomp.oacc-fortran/reduction-6.f90: Likewise.
* testsuite/libgomp.oacc-fortran/reduction-7.f90: New test.
2016-03-30 Thomas Schwinge <thomas@codesourcery.com> 2016-03-30 Thomas Schwinge <thomas@codesourcery.com>
James Norris <jnorris@codesourcery.com> James Norris <jnorris@codesourcery.com>
Nathan Sidwell <nathan@codesourcery.com> Nathan Sidwell <nathan@codesourcery.com>
......
/* { dg-additional-options "-w" } */
#include <assert.h>
/* Test of reduction on loop directive (gangs, non-private reduction
variable). */
int
main (int argc, char *argv[])
{
int i, arr[1024], res = 0, hres = 0;
for (i = 0; i < 1024; i++)
arr[i] = i;
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
copy(res)
{
#pragma acc loop gang reduction(+:res)
for (i = 0; i < 1024; i++)
res += arr[i];
}
for (i = 0; i < 1024; i++)
hres += arr[i];
assert (res == hres);
res = hres = 1;
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
copy(res)
{
#pragma acc loop gang reduction(*:res)
for (i = 0; i < 12; i++)
res *= arr[i];
}
for (i = 0; i < 12; i++)
hres *= arr[i];
assert (res == hres);
return 0;
}
/* { dg-additional-options "-w" } */
#include <assert.h>
/* Test of reduction on loop directive (gangs and vectors, non-private
reduction variable). */
int
main (int argc, char *argv[])
{
int i, arr[1024], res = 0, hres = 0;
for (i = 0; i < 1024; i++)
arr[i] = i;
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
copy(res)
{
#pragma acc loop gang vector reduction(+:res)
for (i = 0; i < 1024; i++)
res += arr[i];
}
for (i = 0; i < 1024; i++)
hres += arr[i];
assert (res == hres);
return 0;
}
/* { dg-additional-options "-w" } */
#include <assert.h>
/* Test of reduction on loop directive (gangs and workers, non-private
reduction variable). */
int
main (int argc, char *argv[])
{
int i, arr[1024], res = 0, hres = 0;
for (i = 0; i < 1024; i++)
arr[i] = i;
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
copy(res)
{
#pragma acc loop gang worker reduction(+:res)
for (i = 0; i < 1024; i++)
res += arr[i];
}
for (i = 0; i < 1024; i++)
hres += arr[i];
assert (res == hres);
return 0;
}
#include <assert.h>
/* Test of reduction on loop directive (gangs, workers and vectors, non-private
reduction variable). */
int
main (int argc, char *argv[])
{
int i, arr[1024], res = 0, hres = 0;
for (i = 0; i < 1024; i++)
arr[i] = i;
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
copy(res)
{
#pragma acc loop gang worker vector reduction(+:res)
for (i = 0; i < 1024; i++)
res += arr[i];
}
for (i = 0; i < 1024; i++)
hres += arr[i];
assert (res == hres);
return 0;
}
#include <assert.h>
/* Test of reduction on loop directive (gangs, workers and vectors, non-private
reduction variable: separate gang and worker/vector loops). */
int
main (int argc, char *argv[])
{
int i, j, arr[32768], res = 0, hres = 0;
for (i = 0; i < 32768; i++)
arr[i] = i;
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
copy(res)
{
#pragma acc loop gang reduction(+:res)
for (j = 0; j < 32; j++)
{
#pragma acc loop worker vector reduction(+:res)
for (i = 0; i < 1024; i++)
res += arr[j * 1024 + i];
}
/* "res" is non-private, and is not available until after the parallel
region. */
}
for (i = 0; i < 32768; i++)
hres += arr[i];
assert (res == hres);
return 0;
}
#include <assert.h>
/* Test of reduction on loop directive (gangs, workers and vectors, non-private
reduction variable: separate gang and worker/vector loops). */
int
main (int argc, char *argv[])
{
int i, j;
double arr[32768], res = 0, hres = 0;
for (i = 0; i < 32768; i++)
arr[i] = i;
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
copyin(arr) copy(res)
{
#pragma acc loop gang reduction(+:res)
for (j = 0; j < 32; j++)
{
#pragma acc loop worker vector reduction(+:res)
for (i = 0; i < 1024; i++)
res += arr[j * 1024 + i];
}
}
for (i = 0; i < 32768; i++)
hres += arr[i];
assert (res == hres);
return 0;
}
#include <assert.h>
/* Test of reduction on loop directive (gangs, workers and vectors, multiple
non-private reduction variables, float type). */
int
main (int argc, char *argv[])
{
int i, j;
float arr[32768];
float res = 0, mres = 0, hres = 0, hmres = 0;
for (i = 0; i < 32768; i++)
arr[i] = i;
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
copy(res, mres)
{
#pragma acc loop gang reduction(+:res) reduction(max:mres)
for (j = 0; j < 32; j++)
{
#pragma acc loop worker vector reduction(+:res) reduction(max:mres)
for (i = 0; i < 1024; i++)
{
res += arr[j * 1024 + i];
if (arr[j * 1024 + i] > mres)
mres = arr[j * 1024 + i];
}
#pragma acc loop worker vector reduction(+:res) reduction(max:mres)
for (i = 0; i < 1024; i++)
{
res += arr[j * 1024 + (1023 - i)];
if (arr[j * 1024 + (1023 - i)] > mres)
mres = arr[j * 1024 + (1023 - i)];
}
}
}
for (j = 0; j < 32; j++)
for (i = 0; i < 1024; i++)
{
hres += arr[j * 1024 + i];
hres += arr[j * 1024 + (1023 - i)];
if (arr[j * 1024 + i] > hmres)
hmres = arr[j * 1024 + i];
if (arr[j * 1024 + (1023 - i)] > hmres)
hmres = arr[j * 1024 + (1023 - i)];
}
assert (res == hres);
assert (mres == hmres);
return 0;
}
/* { dg-additional-options "-w" } */
#include <assert.h>
/* Test of reduction on loop directive (vectors, private reduction
variable). */
int
main (int argc, char *argv[])
{
int i, j, arr[1024], out[32], res = 0, hres = 0;
for (i = 0; i < 1024; i++)
arr[i] = i;
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
private(res) copyout(out)
{
#pragma acc loop gang
for (j = 0; j < 32; j++)
{
res = 0;
#pragma acc loop vector reduction(+:res)
for (i = 0; i < 32; i++)
res += arr[j * 32 + i];
out[j] = res;
}
}
for (j = 0; j < 32; j++)
{
hres = 0;
for (i = 0; i < 32; i++)
hres += arr[j * 32 + i];
assert (out[j] == hres);
}
return 0;
}
#include <assert.h>
/* Test of reduction on loop directive (vector reduction in
gang-partitioned/worker-partitioned mode, private reduction variable). */
int
main (int argc, char *argv[])
{
int i, j, k;
double ina[1024], inb[1024], out[1024], acc;
for (j = 0; j < 32; j++)
for (i = 0; i < 32; i++)
{
ina[j * 32 + i] = (i == j) ? 2.0 : 0.0;
inb[j * 32 + i] = (double) (i + j);
}
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
private(acc) copyin(ina, inb) copyout(out)
{
#pragma acc loop gang worker
for (k = 0; k < 32; k++)
for (j = 0; j < 32; j++)
{
acc = 0;
#pragma acc loop vector reduction(+:acc)
for (i = 0; i < 32; i++)
acc += ina[k * 32 + i] * inb[i * 32 + j];
out[k * 32 + j] = acc;
}
}
for (j = 0; j < 32; j++)
for (i = 0; i < 32; i++)
assert (out[j * 32 + i] == (i + j) * 2);
return 0;
}
/* { dg-additional-options "-w" } */
#include <assert.h>
/* Test of reduction on loop directive (workers, private reduction
variable). */
int
main (int argc, char *argv[])
{
int i, j, arr[1024], out[32], res = 0, hres = 0;
for (i = 0; i < 1024; i++)
arr[i] = i;
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
private(res) copyout(out)
{
#pragma acc loop gang
for (j = 0; j < 32; j++)
{
res = 0;
#pragma acc loop worker reduction(+:res)
for (i = 0; i < 32; i++)
res += arr[j * 32 + i];
out[j] = res;
}
}
for (j = 0; j < 32; j++)
{
hres = 0;
for (i = 0; i < 32; i++)
hres += arr[j * 32 + i];
assert (out[j] == hres);
}
return 0;
}
#include <assert.h>
/* Test of reduction on loop directive (workers and vectors, private reduction
variable). */
int
main (int argc, char *argv[])
{
int i, j, arr[1024], out[32], res = 0, hres = 0;
for (i = 0; i < 1024; i++)
arr[i] = i;
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
private(res) copyout(out)
{
#pragma acc loop gang
for (j = 0; j < 32; j++)
{
res = 0;
#pragma acc loop worker vector reduction(+:res)
for (i = 0; i < 32; i++)
res += arr[j * 32 + i];
out[j] = res;
}
}
for (j = 0; j < 32; j++)
{
hres = 0;
for (i = 0; i < 32; i++)
hres += arr[j * 32 + i];
assert (out[j] == hres);
}
return 0;
}
#include <assert.h>
/* Test of reduction on loop directive (workers and vectors, private reduction
variable). */
int
main (int argc, char *argv[])
{
int i, j, arr[32768], out[32], res = 0, hres = 0;
for (i = 0; i < 32768; i++)
arr[i] = i;
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
private(res) copyout(out)
{
#pragma acc loop gang
for (j = 0; j < 32; j++)
{
res = j;
#pragma acc loop worker reduction(+:res)
for (i = 0; i < 1024; i++)
res += arr[j * 1024 + i];
#pragma acc loop vector reduction(+:res)
for (i = 1023; i >= 0; i--)
res += arr[j * 1024 + i];
out[j] = res;
}
}
for (j = 0; j < 32; j++)
{
hres = j;
for (i = 0; i < 1024; i++)
hres += arr[j * 1024 + i] * 2;
assert (out[j] == hres);
}
return 0;
}
#include <assert.h>
/* Test of reduction on loop directive (workers and vectors, private reduction
variable: gang-redundant mode). */
int
main (int argc, char *argv[])
{
int i, arr[1024], out[32], res = 0, hres = 0;
for (i = 0; i < 1024; i++)
arr[i] = i ^ 33;
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
private(res) copyin(arr) copyout(out)
{
/* Private variables aren't initialized by default in openacc. */
res = 0;
/* "res" should be available at the end of the following loop (and should
have the same value redundantly in each gang). */
#pragma acc loop worker vector reduction(+:res)
for (i = 0; i < 1024; i++)
res += arr[i];
#pragma acc loop gang (static: 1)
for (i = 0; i < 32; i++)
out[i] = res;
}
for (i = 0; i < 1024; i++)
hres += arr[i];
for (i = 0; i < 32; i++)
assert (out[i] == hres);
return 0;
}
#include <assert.h>
/* Test of reduction on both parallel and loop directives (worker and
vector-partitioned loops individually in gang-partitioned mode, int
type). */
int
main (int argc, char *argv[])
{
int i, j, arr[32768], res = 0, hres = 0;
for (i = 0; i < 32768; i++)
arr[i] = i;
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
reduction(+:res) copy(res)
{
#pragma acc loop gang
for (j = 0; j < 32; j++)
{
#pragma acc loop worker reduction(+:res)
for (i = 0; i < 1024; i++)
res += arr[j * 1024 + i];
#pragma acc loop vector reduction(+:res)
for (i = 1023; i >= 0; i--)
res += arr[j * 1024 + i];
}
}
for (j = 0; j < 32; j++)
for (i = 0; i < 1024; i++)
hres += arr[j * 1024 + i] * 2;
assert (res == hres);
return 0;
}
#include <assert.h>
/* Test of reduction on both parallel and loop directives (workers and vectors
in gang-partitioned mode, int type with XOR). */
int
main (int argc, char *argv[])
{
int i, j, arr[32768], res = 0, hres = 0;
for (i = 0; i < 32768; i++)
arr[i] = i;
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
reduction(^:res)
{
#pragma acc loop gang
for (j = 0; j < 32; j++)
{
#pragma acc loop worker vector reduction(^:res)
for (i = 0; i < 1024; i++)
res ^= arr[j * 1024 + i];
#pragma acc loop worker vector reduction(^:res)
for (i = 0; i < 1024; i++)
res ^= arr[j * 1024 + (1023 - i)];
}
}
for (j = 0; j < 32; j++)
for (i = 0; i < 1024; i++)
{
hres ^= arr[j * 1024 + i];
hres ^= arr[j * 1024 + (1023 - i)];
}
assert (res == hres);
return 0;
}
#include <assert.h>
/* Test of reduction on both parallel and loop directives (workers and vectors
together in gang-partitioned mode, float type). */
int
main (int argc, char *argv[])
{
int i, j;
float arr[32768];
float res = 0, hres = 0;
for (i = 0; i < 32768; i++)
arr[i] = i;
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
reduction(+:res) copy(res)
{
#pragma acc loop gang
for (j = 0; j < 32; j++)
{
#pragma acc loop worker vector reduction(+:res)
for (i = 0; i < 1024; i++)
res += arr[j * 1024 + i];
#pragma acc loop worker vector reduction(+:res)
for (i = 0; i < 1024; i++)
res += arr[j * 1024 + (1023 - i)];
}
}
for (j = 0; j < 32; j++)
for (i = 0; i < 1024; i++)
{
hres += arr[j * 1024 + i];
hres += arr[j * 1024 + (1023 - i)];
}
assert (res == hres);
return 0;
}
#include <assert.h>
/* Test of reduction on both parallel and loop directives (workers and vectors
together in gang-partitioned mode, float type, multiple reductions). */
int
main (int argc, char *argv[])
{
int i, j;
float arr[32768];
float res = 0, mres = 0, hres = 0, hmres = 0;
for (i = 0; i < 32768; i++)
arr[i] = i;
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
reduction(+:res) reduction(max:mres) copy(res, mres)
{
#pragma acc loop gang
for (j = 0; j < 32; j++)
{
#pragma acc loop worker vector reduction(+:res) reduction(max:mres)
for (i = 0; i < 1024; i++)
{
res += arr[j * 1024 + i];
if (arr[j * 1024 + i] > mres)
mres = arr[j * 1024 + i];
}
#pragma acc loop worker vector reduction(+:res) reduction(max:mres)
for (i = 0; i < 1024; i++)
{
res += arr[j * 1024 + (1023 - i)];
if (arr[j * 1024 + (1023 - i)] > mres)
mres = arr[j * 1024 + (1023 - i)];
}
}
}
for (j = 0; j < 32; j++)
for (i = 0; i < 1024; i++)
{
hres += arr[j * 1024 + i];
hres += arr[j * 1024 + (1023 - i)];
if (arr[j * 1024 + i] > hmres)
hmres = arr[j * 1024 + i];
if (arr[j * 1024 + (1023 - i)] > hmres)
hmres = arr[j * 1024 + (1023 - i)];
}
assert (res == hres);
assert (mres == hmres);
return 0;
}
/* { dg-additional-options "-w" } */
#include <assert.h> #include <assert.h>
/* Test of reduction on parallel directive. */
#define ACTUAL_GANGS 256
int int
main (int argc, char *argv[]) main (int argc, char *argv[])
{ {
int res, res2 = 0; int res, res1 = 0, res2 = 0;
#if defined(ACC_DEVICE_TYPE_host) #if defined(ACC_DEVICE_TYPE_host)
# define GANGS 1 # define GANGS 1
#else #else
# define GANGS 256 # define GANGS 256
#endif #endif
#pragma acc parallel num_gangs(GANGS) copy(res2) #pragma acc parallel num_gangs(GANGS) num_workers(32) vector_length(32) \
reduction(+:res1) copy(res2, res1)
{ {
res1 += 5;
#pragma acc atomic #pragma acc atomic
res2 += 5; res2 += 5;
} }
res = GANGS * 5; res = GANGS * 5;
assert (res == res1);
assert (res == res2); assert (res == res2);
#undef GANGS #undef GANGS
res = res2 = 1; res = res1 = res2 = 1;
#if defined(ACC_DEVICE_TYPE_host) #if defined(ACC_DEVICE_TYPE_host)
# define GANGS 1 # define GANGS 1
#else #else
# define GANGS 8 # define GANGS 8
#endif #endif
#pragma acc parallel num_gangs(GANGS) copy(res2) #pragma acc parallel num_gangs(GANGS) num_workers(32) vector_length(32) \
reduction(*:res1) copy(res1, res2)
{ {
res1 *= 5;
#pragma acc atomic #pragma acc atomic
res2 *= 5; res2 *= 5;
} }
for (int i = 0; i < GANGS; ++i) for (int i = 0; i < GANGS; ++i)
res *= 5; res *= 5;
assert (res == res1);
assert (res == res2); assert (res == res2);
#undef GANGS #undef GANGS
......
/* { dg-additional-options "-w" } */
#include <assert.h> #include <assert.h>
#include <openacc.h> #include <openacc.h>
/* Test of reduction on parallel directive (with async). */
int int
main (int argc, char *argv[]) main (int argc, char *argv[])
{ {
int res, res2 = 0; int res, res1 = 0, res2 = 0;
#if defined(ACC_DEVICE_TYPE_host) #if defined(ACC_DEVICE_TYPE_host)
# define GANGS 1 # define GANGS 1
#else #else
# define GANGS 256 # define GANGS 256
#endif #endif
#pragma acc parallel num_gangs(GANGS) copy(res2) async(1) #pragma acc parallel num_gangs(GANGS) num_workers(32) vector_length(32) \
reduction(+:res1) copy(res1, res2) async(1)
{ {
res1 += 5;
#pragma acc atomic #pragma acc atomic
res2 += 5; res2 += 5;
} }
...@@ -20,18 +27,22 @@ main (int argc, char *argv[]) ...@@ -20,18 +27,22 @@ main (int argc, char *argv[])
acc_wait (1); acc_wait (1);
assert (res == res1);
assert (res == res2); assert (res == res2);
#undef GANGS #undef GANGS
res = res2 = 1; res = res1 = res2 = 1;
#if defined(ACC_DEVICE_TYPE_host) #if defined(ACC_DEVICE_TYPE_host)
# define GANGS 1 # define GANGS 1
#else #else
# define GANGS 8 # define GANGS 8
#endif #endif
#pragma acc parallel num_gangs(GANGS) copy(res2) async(1) #pragma acc parallel num_gangs(GANGS) num_workers(32) vector_length(32) \
reduction(*:res1) copy(res1, res2) async(1)
{ {
res1 *= 5;
#pragma acc atomic #pragma acc atomic
res2 *= 5; res2 *= 5;
} }
...@@ -40,6 +51,7 @@ main (int argc, char *argv[]) ...@@ -40,6 +51,7 @@ main (int argc, char *argv[])
acc_wait (1); acc_wait (1);
assert (res == res1);
assert (res == res2); assert (res == res2);
return 0; return 0;
......
...@@ -5,12 +5,20 @@ ...@@ -5,12 +5,20 @@
int main () int main ()
{ {
int dummy[10];
#pragma acc parallel num_workers (2<<20) /* { dg-error "using num_workers" } */ #pragma acc parallel num_workers (2<<20) /* { dg-error "using num_workers" } */
{ {
#pragma acc loop worker
for (int i = 0; i < 10; i++)
dummy[i] = i;
} }
#pragma acc parallel vector_length (2<<20) /* { dg-error "using vector_length" } */ #pragma acc parallel vector_length (2<<20) /* { dg-error "using vector_length" } */
{ {
#pragma acc loop vector
for (int i = 0; i < 10; i++)
dummy[i] = i;
} }
return 0; return 0;
......
/* { dg-do run } */
/* { dg-additional-options "-w" } */
#include <stdlib.h>
#include <openacc.h>
#define N 10
int
main ()
{
int s1 = 0, s2 = 0;
int i;
int dummy = 0;
#pragma acc data copy (dummy)
{
#pragma acc parallel num_gangs (N) reduction (+:s1) copy(s1)
{
s1++;
}
}
if (acc_get_device_type () != acc_device_nvidia)
{
if (s1 != 1)
abort ();
}
else
{
if (s1 != N)
abort ();
}
s1 = 0;
s2 = 0;
#pragma acc parallel num_gangs (10) reduction (+:s1, s2) copy(s1, s2)
{
s1++;
s2 += N;
}
if (acc_get_device_type () != acc_device_nvidia)
{
if (s1 != 1)
abort ();
if (s2 != N)
abort ();
}
else
{
if (s1 != N)
abort ();
if (s2 != N*N)
abort ();
}
s1 = 0;
#pragma acc parallel num_gangs (10) reduction (+:s1) copy(s1)
{
#pragma acc loop gang reduction (+:s1)
for (i = 0; i < 10; i++)
s1++;
}
if (s1 != N)
abort ();
return 0;
}
int
main ()
{
int i;
static int temp;
#pragma acc parallel reduction(+:temp)
{
temp++;
}
return 0;
}
#define N 32
int
foo (unsigned int sum)
{
#pragma acc parallel reduction (+:sum)
{
sum;
}
return sum;
}
int
main (void)
{
unsigned int sum = 0;
foo (sum);
return 0;
}
/* { dg-do run } */ /* { dg-do run } */
/* Ignore vector_length warnings for offloaded (nvptx) targets. */
/* { dg-additional-options "-foffload=-w" } */
/* Integer reductions. */ /* Integer reductions. */
#include <stdlib.h> #include <stdlib.h>
#include <stdbool.h> #include "reduction.h"
#define vl 32 const int ng = 8;
const int nw = 4;
#define DO_PRAGMA(x) _Pragma (#x) const int vl = 32;
#define check_reduction_op(type, op, init, b) \
{ \
type res, vres; \
res = (init); \
DO_PRAGMA (acc parallel vector_length (vl) copy(res)) \
DO_PRAGMA (acc loop reduction (op:res))\
for (i = 0; i < n; i++) \
res = res op (b); \
\
vres = (init); \
for (i = 0; i < n; i++) \
vres = vres op (b); \
\
if (res != vres) \
abort (); \
}
static void static void
test_reductions_int (void) test_reductions (void)
{ {
const int n = 1000; const int n = 100;
int i; int i;
int array[n]; int array[n];
for (i = 0; i < n; i++) for (i = 0; i < n; i++)
array[i] = i; array[i] = i+1;
check_reduction_op (int, +, 0, array[i]); /* Gang reductions. */
check_reduction_op (int, *, 1, array[i]); check_reduction_op (int, +, 0, array[i], num_gangs (ng), gang);
check_reduction_op (int, &, -1, array[i]); check_reduction_op (int, *, 1, array[i], num_gangs (ng), gang);
check_reduction_op (int, |, 0, array[i]); check_reduction_op (int, &, -1, array[i], num_gangs (ng), gang);
check_reduction_op (int, ^, 0, array[i]); check_reduction_op (int, |, 0, array[i], num_gangs (ng), gang);
check_reduction_op (int, ^, 0, array[i], num_gangs (ng), gang);
/* Worker reductions. */
check_reduction_op (int, +, 0, array[i], num_workers (nw), worker);
check_reduction_op (int, *, 1, array[i], num_workers (nw), worker);
check_reduction_op (int, &, -1, array[i], num_workers (nw), worker);
check_reduction_op (int, |, 0, array[i], num_workers (nw), worker);
check_reduction_op (int, ^, 0, array[i], num_workers (nw), worker);
/* Vector reductions. */
check_reduction_op (int, +, 0, array[i], vector_length (vl), vector);
check_reduction_op (int, *, 1, array[i], vector_length (vl), vector);
check_reduction_op (int, &, -1, array[i], vector_length (vl), vector);
check_reduction_op (int, |, 0, array[i], vector_length (vl), vector);
check_reduction_op (int, ^, 0, array[i], vector_length (vl), vector);
/* Combined reductions. */
check_reduction_op (int, +, 0, array[i], num_gangs (ng) num_workers (nw)
vector_length (vl), gang worker vector);
check_reduction_op (int, *, 1, array[i], num_gangs (ng) num_workers (nw)
vector_length (vl), gang worker vector);
check_reduction_op (int, &, -1, array[i], num_gangs (ng) num_workers (nw)
vector_length (vl), gang worker vector);
check_reduction_op (int, |, 0, array[i], num_gangs (ng) num_workers (nw)
vector_length (vl), gang worker vector);
check_reduction_op (int, ^, 0, array[i], num_gangs (ng) num_workers (nw)
vector_length (vl), gang worker vector);
} }
static void static void
...@@ -55,29 +68,31 @@ test_reductions_bool (void) ...@@ -55,29 +68,31 @@ test_reductions_bool (void)
array[i] = i; array[i] = i;
cmp_val = 5; cmp_val = 5;
check_reduction_op (bool, &&, true, (cmp_val > array[i]));
check_reduction_op (bool, ||, false, (cmp_val > array[i]));
}
#define check_reduction_macro(type, op, init, b) \ /* Gang reductions. */
{ \ check_reduction_op (int, &&, 1, (cmp_val > array[i]), num_gangs (ng),
type res, vres; \ gang);
res = (init); \ check_reduction_op (int, ||, 0, (cmp_val > array[i]), num_gangs (ng),
DO_PRAGMA (acc parallel vector_length (vl) copy(res))\ gang);
DO_PRAGMA (acc loop reduction (op:res))\
for (i = 0; i < n; i++) \ /* Worker reductions. */
res = op (res, (b)); \ check_reduction_op (int, &&, 1, (cmp_val > array[i]), num_workers (nw),
\ worker);
vres = (init); \ check_reduction_op (int, ||, 0, (cmp_val > array[i]), num_workers (nw),
for (i = 0; i < n; i++) \ worker);
vres = op (vres, (b)); \
\ /* Vector reductions. */
if (res != vres) \ check_reduction_op (int, &&, 1, (cmp_val > array[i]), vector_length (vl),
abort (); \ vector);
} check_reduction_op (int, ||, 0, (cmp_val > array[i]), vector_length (vl),
vector);
#define max(a, b) (((a) > (b)) ? (a) : (b))
#define min(a, b) (((a) < (b)) ? (a) : (b)) /* Combined reductions. */
check_reduction_op (int, &&, 1, (cmp_val > array[i]), num_gangs (ng)
num_workers (nw) vector_length (vl), gang worker vector);
check_reduction_op (int, ||, 0, (cmp_val > array[i]), num_gangs (ng)
num_workers (nw) vector_length (vl), gang worker vector);
}
static void static void
test_reductions_minmax (void) test_reductions_minmax (void)
...@@ -89,14 +104,32 @@ test_reductions_minmax (void) ...@@ -89,14 +104,32 @@ test_reductions_minmax (void)
for (i = 0; i < n; i++) for (i = 0; i < n; i++)
array[i] = i; array[i] = i;
check_reduction_macro (int, min, n + 1, array[i]); /* Gang reductions. */
check_reduction_macro (int, max, -1, array[i]); check_reduction_macro (int, min, n + 1, array[i], num_gangs (ng), gang);
check_reduction_macro (int, max, -1, array[i], num_gangs (ng), gang);
/* Worker reductions. */
check_reduction_macro (int, min, n + 1, array[i], num_workers (nw), worker);
check_reduction_macro (int, max, -1, array[i], num_workers (nw), worker);
/* Vector reductions. */
check_reduction_macro (int, min, n + 1, array[i], vector_length (vl),
vector);
check_reduction_macro (int, max, -1, array[i], vector_length (vl), vector);
/* Combined reductions. */
check_reduction_macro (int, min, n + 1, array[i], num_gangs (ng)
num_workers (nw) vector_length (vl), gang worker
vector);
check_reduction_macro (int, max, -1, array[i], num_gangs (ng)
num_workers (nw) vector_length (vl), gang worker
vector);
} }
int int
main (void) main (void)
{ {
test_reductions_int (); test_reductions ();
test_reductions_bool (); test_reductions_bool ();
test_reductions_minmax (); test_reductions_minmax ();
return 0; return 0;
......
/* { dg-do run } */ /* { dg-do run } */
/* Ignore vector_length warnings for offloaded (nvptx) targets. */
/* { dg-additional-options "-foffload=-w" } */
/* float reductions. */ /* float reductions. */
#include <stdlib.h> #include <stdlib.h>
#include <stdbool.h> #include "reduction.h"
#include <math.h>
#define vl 32 const int ng = 8;
const int nw = 4;
const int vl = 32;
int static void
main(void) test_reductions (void)
{ {
const int n = 1000; const int n = 100;
int i; int i;
float vresult, result, array[n]; float array[n];
bool lvresult, lresult;
for (i = 0; i < n; i++) for (i = 0; i < n; i++)
array[i] = i; array[i] = i+1;
result = 0;
vresult = 0;
/* '+' reductions. */ /* Gang reductions. */
#pragma acc parallel vector_length (vl) copy(result) check_reduction_op (float, +, 0, array[i], num_gangs (ng), gang);
#pragma acc loop reduction (+:result) check_reduction_op (float, *, 1, array[i], num_gangs (ng), gang);
for (i = 0; i < n; i++)
result += array[i];
/* Verify the reduction. */ /* Worker reductions. */
for (i = 0; i < n; i++) check_reduction_op (float, +, 0, array[i], num_workers (nw), worker);
vresult += array[i]; check_reduction_op (float, *, 1, array[i], num_workers (nw), worker);
if (result != vresult)
abort ();
result = 0;
vresult = 0;
/* '*' reductions. */
#pragma acc parallel vector_length (vl) copy(result)
#pragma acc loop reduction (*:result)
for (i = 0; i < n; i++)
result *= array[i];
/* Verify the reduction. */ /* Vector reductions. */
for (i = 0; i < n; i++) check_reduction_op (float, +, 0, array[i], vector_length (vl), vector);
vresult *= array[i]; check_reduction_op (float, *, 1, array[i], vector_length (vl), vector);
if (fabs(result - vresult) > .0001)
abort ();
result = 0;
vresult = 0;
/* 'max' reductions. */
#pragma acc parallel vector_length (vl) copy(result)
#pragma acc loop reduction (max:result)
for (i = 0; i < n; i++)
result = result > array[i] ? result : array[i];
/* Verify the reduction. */
for (i = 0; i < n; i++)
vresult = vresult > array[i] ? vresult : array[i];
if (result != vresult)
abort ();
result = 0;
vresult = 0;
/* 'min' reductions. */
#pragma acc parallel vector_length (vl) copy(result)
#pragma acc loop reduction (min:result)
for (i = 0; i < n; i++)
result = result < array[i] ? result : array[i];
/* Verify the reduction. */ /* Combined reductions. */
for (i = 0; i < n; i++) check_reduction_op (float, +, 0, array[i], num_gangs (ng) num_workers (nw)
vresult = vresult < array[i] ? vresult : array[i]; vector_length (vl), gang worker vector);
check_reduction_op (float, *, 1, array[i], num_gangs (ng) num_workers (nw)
if (result != vresult) vector_length (vl), gang worker vector);
abort (); }
result = 5;
vresult = 5;
lresult = false;
lvresult = false;
/* '&&' reductions. */
#pragma acc parallel vector_length (vl) copy(lresult)
#pragma acc loop reduction (&&:lresult)
for (i = 0; i < n; i++)
lresult = lresult && (result > array[i]);
/* Verify the reduction. */
for (i = 0; i < n; i++)
lvresult = lresult && (result > array[i]);
if (lresult != lvresult)
abort ();
result = 5;
vresult = 5;
lresult = false;
lvresult = false;
/* '||' reductions. */ static void
#pragma acc parallel vector_length (vl) copy(lresult) test_reductions_minmax (void)
#pragma acc loop reduction (||:lresult) {
for (i = 0; i < n; i++) const int n = 1000;
lresult = lresult || (result > array[i]); int i;
float array[n];
/* Verify the reduction. */
for (i = 0; i < n; i++) for (i = 0; i < n; i++)
lvresult = lresult || (result > array[i]); array[i] = i;
if (lresult != lvresult) /* Gang reductions. */
abort (); check_reduction_macro (float, min, n + 1, array[i], num_gangs (ng), gang);
check_reduction_macro (float, max, -1, array[i], num_gangs (ng), gang);
/* Worker reductions. */
check_reduction_macro (float, min, n + 1, array[i], num_workers (nw),
worker);
check_reduction_macro (float, max, -1, array[i], num_workers (nw), worker);
/* Vector reductions. */
check_reduction_macro (float, min, n + 1, array[i], vector_length (vl),
vector);
check_reduction_macro (float, max, -1, array[i], vector_length (vl), vector);
/* Combined reductions. */
check_reduction_macro (float, min, n + 1, array[i], num_gangs (ng)
num_workers (nw) vector_length (vl), gang worker
vector);
check_reduction_macro (float, max, -1, array[i], num_gangs (ng)
num_workers (nw)vector_length (vl), gang worker
vector);
}
int
main (void)
{
test_reductions ();
test_reductions_minmax ();
return 0; return 0;
} }
/* { dg-do run } */ /* { dg-do run } */
/* Ignore vector_length warnings for offloaded (nvptx) targets. */
/* { dg-additional-options "-foffload=-w" } */
/* double reductions. */ /* double reductions. */
#include <stdlib.h> #include <stdlib.h>
#include <stdbool.h> #include "reduction.h"
#include <math.h>
#define vl 32 const int ng = 8;
const int nw = 4;
const int vl = 32;
int static void
main(void) test_reductions (void)
{ {
const int n = 1000; const int n = 10;
int i; int i;
double vresult, result, array[n]; double array[n];
bool lvresult, lresult;
for (i = 0; i < n; i++)
array[i] = i;
result = 0;
vresult = 0;
/* 'max' reductions. */
#pragma acc parallel vector_length (vl) copy(result)
#pragma acc loop reduction (max:result)
for (i = 0; i < n; i++)
result = result > array[i] ? result : array[i];
/* Verify the reduction. */
for (i = 0; i < n; i++)
vresult = vresult > array[i] ? vresult : array[i];
if (result != vresult)
abort ();
result = 0;
vresult = 0;
/* 'min' reductions. */
#pragma acc parallel vector_length (vl) copy(result)
#pragma acc loop reduction (min:result)
for (i = 0; i < n; i++)
result = result < array[i] ? result : array[i];
/* Verify the reduction. */
for (i = 0; i < n; i++) for (i = 0; i < n; i++)
vresult = vresult < array[i] ? vresult : array[i]; array[i] = i+1;
if (result != vresult) /* Gang reductions. */
abort (); check_reduction_op (double, +, 0, array[i], num_gangs (ng), gang);
check_reduction_op (double, *, 1, array[i], num_gangs (ng), gang);
result = 5; /* Worker reductions. */
vresult = 5; check_reduction_op (double, +, 0, array[i], num_workers (nw), worker);
check_reduction_op (double, *, 1, array[i], num_workers (nw), worker);
lresult = false; /* Vector reductions. */
lvresult = false; check_reduction_op (double, +, 0, array[i], vector_length (vl), vector);
check_reduction_op (double, *, 1, array[i], vector_length (vl), vector);
/* '&&' reductions. */ /* Combined reductions. */
#pragma acc parallel vector_length (vl) copy(lresult) check_reduction_op (double, +, 0, array[i], num_gangs (ng) num_workers (nw)
#pragma acc loop reduction (&&:lresult) vector_length (vl), gang worker vector);
for (i = 0; i < n; i++) check_reduction_op (double, *, 1, array[i], num_gangs (ng) num_workers (nw)
lresult = lresult && (result > array[i]); vector_length (vl), gang worker vector);
}
/* Verify the reduction. */
for (i = 0; i < n; i++)
lvresult = lresult && (result > array[i]);
if (lresult != lvresult)
abort ();
result = 5;
vresult = 5;
lresult = false;
lvresult = false;
/* '||' reductions. */ static void
#pragma acc parallel vector_length (vl) copy(lresult) test_reductions_minmax (void)
#pragma acc loop reduction (||:lresult) {
for (i = 0; i < n; i++) const int n = 1000;
lresult = lresult || (result > array[i]); int i;
double array[n];
/* Verify the reduction. */
for (i = 0; i < n; i++) for (i = 0; i < n; i++)
lvresult = lresult || (result > array[i]); array[i] = i;
if (lresult != lvresult) /* Gang reductions. */
abort (); check_reduction_macro (double, min, n + 1, array[i], num_gangs (ng), gang);
check_reduction_macro (double, max, -1, array[i], num_gangs (ng), gang);
/* Worker reductions. */
check_reduction_macro (double, min, n + 1, array[i], num_workers (nw),
worker);
check_reduction_macro (double, max, -1, array[i], num_workers (nw), worker);
/* Vector reductions. */
check_reduction_macro (double, min, n + 1, array[i], vector_length (vl),
vector);
check_reduction_macro (double, max, -1, array[i], vector_length (vl),
vector);
/* Combined reductions. */
check_reduction_macro (double, min, n + 1, array[i], num_gangs (ng)
num_workers (nw) vector_length (vl), gang worker
vector);
check_reduction_macro (double, max, -1, array[i], num_gangs (ng)
num_workers (nw) vector_length (vl), gang worker
vector);
}
int
main (void)
{
test_reductions ();
test_reductions_minmax ();
return 0; return 0;
} }
/* { dg-do run { target { ! { hppa*-*-hpux* } } } } */ /* { dg-do run { target { ! { hppa*-*-hpux* } } } } */
/* Ignore vector_length warnings for offloaded (nvptx) targets. */
/* { dg-additional-options "-foffload=-w" } */
/* complex reductions. */ /* complex reductions. */
#include <stdlib.h> #include <stdlib.h>
#include <stdbool.h>
#include <math.h>
#include <complex.h> #include <complex.h>
#include "reduction.h"
#define vl 32 const int ng = 8;
const int nw = 4;
const int vl = 32;
int static void
main(void) test_reductions (void)
{ {
const int n = 1000; const int n = 10;
int i; int i;
double _Complex vresult, result, array[n]; double _Complex array[n];
bool lvresult, lresult;
for (i = 0; i < n; i++)
array[i] = i;
result = 0;
vresult = 0;
/* '&&' reductions. */
#pragma acc parallel vector_length (vl) copy(lresult)
#pragma acc loop reduction (&&:lresult)
for (i = 0; i < n; i++)
lresult = lresult && (creal(result) > creal(array[i]));
/* Verify the reduction. */
for (i = 0; i < n; i++)
lvresult = lresult && (creal(result) > creal(array[i]));
if (lresult != lvresult)
abort ();
result = 5;
vresult = 5;
lresult = false;
lvresult = false;
/* '||' reductions. */
#pragma acc parallel vector_length (vl) copy(lresult)
#pragma acc loop reduction (||:lresult)
for (i = 0; i < n; i++)
lresult = lresult || (creal(result) > creal(array[i]));
/* Verify the reduction. */
for (i = 0; i < n; i++) for (i = 0; i < n; i++)
lvresult = lresult || (creal(result) > creal(array[i])); array[i] = i+1;
if (lresult != lvresult) /* Gang reductions. */
abort (); check_reduction_op (double, +, 0, creal (array[i]), num_gangs (ng), gang);
check_reduction_op (double, *, 1, creal (array[i]), num_gangs (ng), gang);
/* Worker reductions. */
check_reduction_op (double, +, 0, creal (array[i]), num_workers (nw),
worker);
check_reduction_op (double, *, 1, creal (array[i]), num_workers (nw),
worker);
/* Vector reductions. */
check_reduction_op (double, +, 0, creal (array[i]), vector_length (vl),
vector);
check_reduction_op (double, *, 1, creal (array[i]), vector_length (vl),
vector);
/* Combined reductions. */
check_reduction_op (double, +, 0, creal (array[i]), num_gangs (ng)
num_workers (nw) vector_length (vl), gang worker
vector);
check_reduction_op (double, *, 1, creal (array[i]), num_gangs (ng)
num_workers (nw) vector_length (vl), gang worker
vector);
}
int
main (void)
{
test_reductions ();
return 0; return 0;
} }
/* { dg-do run } */
/* { dg-additional-options "-w" } */
/* Ignore vector_length warnings for offloaded (nvptx) targets. */
/* { dg-additional-options "-foffload=-w" } */
/* Multiple reductions. */
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
const int ng = 8;
const int nw = 4;
const int vl = 32;
const int n = 100;
#define DO_PRAGMA(x) _Pragma (#x)
#define check_reduction(gwv_par, gwv_loop) \
{ \
s1 = 2; s2 = 5; \
DO_PRAGMA (acc parallel gwv_par copy (s1, s2)) \
DO_PRAGMA (acc loop gwv_loop reduction (+:s1, s2)) \
for (i = 0; i < n; i++) \
{ \
s1 = s1 + 3; \
s2 = s2 + 5; \
} \
\
if (s1 != v1 && s2 != v2) \
abort (); \
}
int int
main (void) main (void)
{ {
int s1 = 2, s2 = 5, v1 = 2, v2 = 5; int s1 = 2, s2 = 5, v1 = 2, v2 = 5;
int n = 100;
int i; int i;
#pragma acc parallel vector_length (32) copy(s1,s2)
#pragma acc loop reduction (+:s1, s2)
for (i = 0; i < n; i++)
{
s1 = s1 + 3;
s2 = s2 + 2;
}
for (i = 0; i < n; i++) for (i = 0; i < n; i++)
{ {
v1 = v1 + 3; v1 = v1 + 3;
v2 = v2 + 2; v2 = v2 + 2;
} }
if (s1 != v1) check_reduction (num_gangs (ng), gang);
abort ();
/* Nvptx targets require a vector_length or 32 in to allow spinlocks with
if (s2 != v2) gangs. */
abort (); check_reduction (num_workers (nw) vector_length (vl), worker);
check_reduction (vector_length (vl), vector);
check_reduction (num_gangs (ng) num_workers (nw) vector_length (vl), gang
worker vector);
return 0; return 0;
} }
/* { dg-do run } */
/* { dg-additional-options "-w" } */
/* Test reductions on explicitly private variables. */
#include <assert.h>
int
main ()
{
int i, j, red[10];
int v;
for (i = 0; i < 10; i++)
red[i] = -1;
#pragma acc parallel copyout(red)
{
#pragma acc loop gang private(v)
for (j = 0; j < 10; j++)
{
v = j;
#pragma acc loop vector reduction (+:v)
for (i = 0; i < 100; i++)
v++;
red[j] = v;
}
}
for (i = 0; i < 10; i++)
assert (red[i] == i + 100);
return 0;
}
#ifndef REDUCTION_H
#define REDUCTION_H
#define DO_PRAGMA(x) _Pragma (#x)
#define check_reduction_op(type, op, init, b, gwv_par, gwv_loop) \
{ \
type res, vres; \
res = (init); \
DO_PRAGMA (acc parallel gwv_par copy (res)) \
DO_PRAGMA (acc loop gwv_loop reduction (op:res)) \
for (i = 0; i < n; i++) \
res = res op (b); \
\
vres = (init); \
for (i = 0; i < n; i++) \
vres = vres op (b); \
\
if (res != vres) \
abort (); \
}
#define check_reduction_macro(type, op, init, b, gwv_par, gwv_loop) \
{ \
type res, vres; \
res = (init); \
DO_PRAGMA (acc parallel gwv_par copy(res)) \
DO_PRAGMA (acc loop gwv_loop reduction (op:res)) \
for (i = 0; i < n; i++) \
res = op (res, (b)); \
\
vres = (init); \
for (i = 0; i < n; i++) \
vres = op (vres, (b)); \
\
if (res != vres) \
abort (); \
}
#define max(a, b) (((a) > (b)) ? (a) : (b))
#define min(a, b) (((a) < (b)) ? (a) : (b))
#endif
! { dg-do run }
! { dg-additional-options "-w" }
program reduction
implicit none
integer, parameter :: n = 10
integer s1, s2
include "openacc_lib.h"
s1 = 0
s2 = 0
!$acc parallel reduction(+:s1,s2) num_gangs (n) copy(s1)
s1 = s1 + 1
s2 = s2 + 1
!$acc end parallel
if (acc_get_device_type () .eq. acc_device_nvidia) then
if (s1 .ne. n) call abort
if (s2 .ne. n) call abort
else
if (s1 .ne. 1) call abort
if (s2 .ne. 1) call abort
end if
! Test reductions inside subroutines
s1 = 0
s2 = 0
call redsub (s1, s2, n)
if (acc_get_device_type () .eq. acc_device_nvidia) then
if (s1 .ne. n) call abort
else
if (s2 .ne. 1) call abort
end if
end program reduction
subroutine redsub(s1, s2, n)
implicit none
integer :: s1, s2, n
!$acc parallel reduction(+:s1,s2) num_gangs (10) copy(s1)
s1 = s1 + 1
s2 = s2 + 1
!$acc end parallel
end subroutine redsub
program foo
implicit none
integer :: i
integer :: temp = 0
integer :: temp2 = 0
!$acc parallel
!$acc loop gang private(temp)
do i=1, 10000
temp = 0
enddo
!$acc end parallel
!$acc parallel reduction(+:temp2)
!$acc loop gang reduction(+:temp2)
do i=1, 10000
temp2 = 0
enddo
!$acc end parallel
end program foo
...@@ -5,50 +5,108 @@ ...@@ -5,50 +5,108 @@
program reduction_4 program reduction_4
implicit none implicit none
integer, parameter :: n = 10, vl = 32 integer, parameter :: n = 10, ng = 8, nw = 4, vl = 32
integer :: i integer :: i
complex :: vresult, result real :: vresult, rg, rw, rv, rc
complex, dimension (n) :: array complex, dimension (n) :: array
do i = 1, n do i = 1, n
array(i) = i array(i) = i
end do end do
result = 0 !
! '+' reductions
!
rg = 0
rw = 0
rv = 0
rc = 0
vresult = 0 vresult = 0
! '+' reductions !$acc parallel num_gangs(ng) copy(rg)
!$acc loop reduction(+:rg) gang
do i = 1, n
rg = rg + REAL(array(i))
end do
!$acc end parallel
!$acc parallel num_workers(nw) copy(rw)
!$acc loop reduction(+:rw) worker
do i = 1, n
rw = rw + REAL(array(i))
end do
!$acc end parallel
!$acc parallel vector_length(vl) copy(rv)
!$acc loop reduction(+:rv) vector
do i = 1, n
rv = rv + REAL(array(i))
end do
!$acc end parallel
!$acc parallel vector_length(vl) num_gangs(1) copy(result) !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
!$acc loop reduction(+:result) !$acc loop reduction(+:rc) gang worker vector
do i = 1, n do i = 1, n
result = result + array(i) rc = rc + REAL(array(i))
end do end do
!$acc end parallel !$acc end parallel
! Verify the results ! Verify the results
do i = 1, n do i = 1, n
vresult = vresult + array(i) vresult = vresult + REAL(array(i))
end do end do
if (result .ne. vresult) call abort if (rg .ne. vresult) call abort
if (rw .ne. vresult) call abort
if (rv .ne. vresult) call abort
if (rc .ne. vresult) call abort
result = 1 !
! '*' reductions
!
rg = 1
rw = 1
rv = 1
rc = 1
vresult = 1 vresult = 1
! ! '*' reductions !$acc parallel num_gangs(ng) copy(rg)
! !$acc loop reduction(*:rg) gang
! !$acc parallel vector_length(vl) do i = 1, n
! !$acc loop reduction(*:result) rg = rg * REAL(array(i))
! do i = 1, n end do
! result = result * array(i) !$acc end parallel
! end do
! !$acc end parallel !$acc parallel num_workers(nw) copy(rw)
! !$acc loop reduction(*:rw) worker
! ! Verify the results do i = 1, n
! do i = 1, n rw = rw * REAL(array(i))
! vresult = vresult * array(i) end do
! end do !$acc end parallel
!
! if (result.ne.vresult) call abort !$acc parallel vector_length(vl) copy(rv)
!$acc loop reduction(*:rv) vector
do i = 1, n
rv = rv * REAL(array(i))
end do
!$acc end parallel
!$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
!$acc loop reduction(*:rc) gang worker vector
do i = 1, n
rc = rc * REAL(array(i))
end do
!$acc end parallel
! Verify the results
do i = 1, n
vresult = vresult * REAL(array(i))
end do
if (rg .ne. vresult) call abort
if (rw .ne. vresult) call abort
if (rv .ne. vresult) call abort
if (rc .ne. vresult) call abort
end program reduction_4 end program reduction_4
! { dg-do run } ! { dg-do run }
! { dg-additional-options "-w" }
! subroutine reduction ! subroutine reduction
program reduction program reduction
integer, parameter :: n = 40, c = 10 integer, parameter :: n = 40, c = 10
integer :: i, vsum, sum integer :: i, vsum, gs, ws, vs, cs, ns
call redsub (sum, n, c) call redsub_gang (gs, n, c)
call redsub_worker (ws, n, c)
call redsub_vector (vs, n, c)
call redsub_combined (cs, n, c)
call redsub_nested (ns, n, c)
vsum = 0 vsum = 0
...@@ -15,21 +20,80 @@ program reduction ...@@ -15,21 +20,80 @@ program reduction
vsum = vsum + c vsum = vsum + c
end do end do
if (sum.ne.vsum) call abort () if (gs .ne. vsum) call abort ()
if (ws .ne. vsum) call abort ()
if (vs .ne. vsum) call abort ()
if (cs .ne. vsum) call abort ()
if (ns .ne. vsum) call abort ()
end program reduction end program reduction
subroutine redsub(sum, n, c) subroutine redsub_gang(sum, n, c)
integer :: sum, n, c integer :: sum, n, c
integer :: s sum = 0
s = 0
!$acc parallel vector_length(32) copyin (n, c) copy (s) num_gangs(1) !$acc parallel copyin (n, c) num_gangs(n) copy(sum)
!$acc loop reduction(+:s) !$acc loop reduction(+:sum) gang
do i = 1, n do i = 1, n
s = s + c sum = sum + c
end do end do
!$acc end parallel !$acc end parallel
end subroutine redsub_gang
sum = s subroutine redsub_worker(sum, n, c)
end subroutine redsub integer :: sum, n, c
sum = 0
!$acc parallel copyin (n, c) num_workers(4) vector_length (32) copy(sum)
!$acc loop reduction(+:sum) worker
do i = 1, n
sum = sum + c
end do
!$acc end parallel
end subroutine redsub_worker
subroutine redsub_vector(sum, n, c)
integer :: sum, n, c
sum = 0
!$acc parallel copyin (n, c) vector_length(32) copy(sum)
!$acc loop reduction(+:sum) vector
do i = 1, n
sum = sum + c
end do
!$acc end parallel
end subroutine redsub_vector
subroutine redsub_combined(sum, n, c)
integer :: sum, n, c
sum = 0
!$acc parallel num_gangs (8) num_workers (4) vector_length(32) copy(sum)
!$acc loop reduction(+:sum) gang worker vector
do i = 1, n
sum = sum + c
end do
!$acc end parallel
end subroutine redsub_combined
subroutine redsub_nested(sum, n, c)
integer :: sum, n, c
integer :: ii, jj
ii = n / 10;
jj = 10;
sum = 0
!$acc parallel num_gangs (8) copy(sum)
!$acc loop reduction(+:sum) gang
do i = 1, ii
!$acc loop reduction(+:sum) vector
do j = 1, jj
sum = sum + c
end do
end do
!$acc end parallel
end subroutine redsub_nested
! { dg-do run } ! { dg-do run }
! { dg-additional-options "-cpp -w" }
program reduction program reduction
implicit none implicit none
integer, parameter :: n = 100 integer, parameter :: n = 100, n2 = 1000, chunksize = 10
integer :: i, s1, s2, vs1, vs2 integer :: i, gs1, gs2, ws1, ws2, vs1, vs2, cs1, cs2, hs1, hs2
integer :: j, red, vred
s1 = 0 gs1 = 0
s2 = 0 gs2 = 0
ws1 = 0
ws2 = 0
vs1 = 0 vs1 = 0
vs2 = 0 vs2 = 0
cs1 = 0
cs2 = 0
hs1 = 0
hs2 = 0
!$acc parallel vector_length (32) copy(s1, s2) !$acc parallel num_gangs (1000) copy(gs1, gs2)
!$acc loop reduction(+:s1, s2) !$acc loop reduction(+:gs1, gs2) gang
do i = 1, n do i = 1, n
s1 = s1 + 1 gs1 = gs1 + 1
s2 = s2 + 2 gs2 = gs2 + 2
end do end do
!$acc end parallel !$acc end parallel
! Verify the results !$acc parallel num_workers (4) vector_length (32) copy(ws1, ws2)
!$acc loop reduction(+:ws1, ws2) worker
do i = 1, n
ws1 = ws1 + 1
ws2 = ws2 + 2
end do
!$acc end parallel
!$acc parallel vector_length (32) copy(vs1, vs2)
!$acc loop reduction(+:vs1, vs2) vector
do i = 1, n do i = 1, n
vs1 = vs1 + 1 vs1 = vs1 + 1
vs2 = vs2 + 2 vs2 = vs2 + 2
end do end do
!$acc end parallel
!$acc parallel num_gangs(8) num_workers(4) vector_length(32) copy(cs1, cs2)
!$acc loop reduction(+:cs1, cs2) gang worker vector
do i = 1, n
cs1 = cs1 + 1
cs2 = cs2 + 2
end do
!$acc end parallel
! Verify the results on the host
do i = 1, n
hs1 = hs1 + 1
hs2 = hs2 + 2
end do
if (gs1 .ne. hs1) call abort ()
if (gs2 .ne. hs2) call abort ()
if (ws1 .ne. hs1) call abort ()
if (ws2 .ne. hs2) call abort ()
if (vs1 .ne. hs1) call abort ()
if (vs2 .ne. hs2) call abort ()
if (cs1 .ne. hs1) call abort ()
if (cs2 .ne. hs2) call abort ()
! Nested reductions.
red = 0
vred = 0
!$acc parallel num_gangs(10) vector_length(32) copy(red)
!$acc loop reduction(+:red) gang
do i = 1, n/chunksize
!$acc loop reduction(+:red) vector
do j = 1, chunksize
red = red + chunksize
end do
end do
!$acc end parallel
do i = 1, n/chunksize
do j = 1, chunksize
vred = vred + chunksize
end do
end do
if (s1.ne.vs1) call abort () if (red .ne. vred) call abort ()
if (s2.ne.vs2) call abort ()
end program reduction end program reduction
! { dg-do run }
! { dg-additional-options "-w" }
! subroutine reduction with private and firstprivate variables
program reduction
integer, parameter :: n = 100
integer :: i, j, vsum, cs, arr(n)
call redsub_private (cs, n, arr)
call redsub_bogus (cs, n)
call redsub_combined (cs, n, arr)
vsum = 0
! Verify the results
do i = 1, n
vsum = i
do j = 1, n
vsum = vsum + 1;
end do
if (vsum .ne. arr(i)) call abort ()
end do
end program reduction
! This subroutine tests a reduction with an explicit private variable.
subroutine redsub_private(sum, n, arr)
integer :: sum, n, arr(n)
integer :: i, j, v
!$acc parallel copyout (arr)
!$acc loop gang private (v)
do j = 1, n
v = j
!$acc loop vector reduction (+:v)
do i = 1, 100
v = v + 1
end do
arr(j) = v
end do
!$acc end parallel
! verify the results
do i = 1, 10
if (arr(i) .ne. 100+i) call abort ()
end do
end subroutine redsub_private
! Bogus reduction on an impliclitly firstprivate variable. The results do
! survive the parallel region. The goal here is to ensure that gfortran
! doesn't ICE.
subroutine redsub_bogus(sum, n)
integer :: sum, n, arr(n)
integer :: i
!$acc parallel
!$acc loop gang worker vector reduction (+:sum)
do i = 1, n
sum = sum + 1
end do
!$acc end parallel
end subroutine redsub_bogus
! This reduction involving a firstprivate variable yields legitimate results.
subroutine redsub_combined(sum, n, arr)
integer :: sum, n, arr(n)
integer :: i, j
!$acc parallel copy (arr)
!$acc loop gang
do i = 1, n
sum = i;
!$acc loop reduction(+:sum)
do j = 1, n
sum = sum + 1
end do
arr(i) = sum
end do
!$acc end parallel
end subroutine redsub_combined
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment