Commit 2f6bb511 by Jakub Jelinek Committed by Jakub Jelinek

tree-core.h (enum omp_clause_code): Add OMP_CLAUSE__SCANTEMP_ clause.

	* tree-core.h (enum omp_clause_code): Add OMP_CLAUSE__SCANTEMP_
	clause.
	* tree.h (OMP_CLAUSE_DECL): Use OMP_CLAUSE__SCANTEMP_ instead of
	OMP_CLAUSE__CONDTEMP_ as range's upper bound.
	(OMP_CLAUSE__SCANTEMP__ALLOC, OMP_CLAUSE__SCANTEMP__CONTROL): Define.
	* tree.c (omp_clause_num_ops, omp_clause_code_name): Add
	OMP_CLAUSE__SCANTEMP_ entry.
	(walk_tree_1): Handle OMP_CLAUSE__SCANTEMP_.
	* tree-pretty-print.c (dump_omp_clause): Likewise.
	* tree-nested.c (convert_nonlocal_omp_clauses,
	convert_local_omp_clauses): Likewise.
	* omp-general.h (struct omp_for_data): Add have_scantemp and
	have_nonctrl_scantemp members.
	* omp-general.c (omp_extract_for_data): Initialize them.
	* omp-low.c (struct omp_context): Add scan_exclusive member.
	(scan_omp_1_stmt): Don't unnecessarily mask gimple_omp_for_kind
	result again with GF_OMP_FOR_KIND_MASK.  Initialize also
	ctx->scan_exclusive.
	(lower_rec_simd_input_clauses): Use ctx->scan_exclusive instead
	of !ctx->scan_inclusive.
	(lower_rec_input_clauses): Simplify gimplification of dtors using
	gimplify_and_add.  For non-is_simd test OMP_CLAUSE_REDUCTION_INSCAN
	rather than rvarp.  Handle OMP_CLAUSE_REDUCTION_INSCAN in worksharing
	loops.  Don't add barrier for reduction_omp_orig_ref if
	ctx->scan_??xclusive.
	(lower_reduction_clauses): Don't do anything for ctx->scan_??xclusive.
	(lower_omp_scan): Use ctx->scan_exclusive instead
	of !ctx->scan_inclusive.  Handle worksharing loops with inscan
	reductions.  Use new_vard != new_var instead of repeated
	omp_is_reference calls.
	(omp_find_scan, lower_omp_for_scan): New functions.
	(lower_omp_for): Call lower_omp_for_scan for worksharing loops with
	inscan reductions.
	* omp-expand.c (expand_omp_scantemp_alloc): New function.
	(expand_omp_for_static_nochunk): Handle fd->have_nonctrl_scantemp
	and fd->have_scantemp.

	* c-c++-common/gomp/scan-3.c (f1): Don't expect a sorry message.
	* c-c++-common/gomp/scan-5.c (foo): Likewise.

	* testsuite/libgomp.c++/scan-1.C: New test.
	* testsuite/libgomp.c++/scan-2.C: New test.
	* testsuite/libgomp.c++/scan-3.C: New test.
	* testsuite/libgomp.c++/scan-4.C: New test.
	* testsuite/libgomp.c++/scan-5.C: New test.
	* testsuite/libgomp.c++/scan-6.C: New test.
	* testsuite/libgomp.c++/scan-7.C: New test.
	* testsuite/libgomp.c++/scan-8.C: New test.
	* testsuite/libgomp.c/scan-1.c: New test.
	* testsuite/libgomp.c/scan-2.c: New test.
	* testsuite/libgomp.c/scan-3.c: New test.
	* testsuite/libgomp.c/scan-4.c: New test.
	* testsuite/libgomp.c/scan-5.c: New test.
	* testsuite/libgomp.c/scan-6.c: New test.
	* testsuite/libgomp.c/scan-7.c: New test.
	* testsuite/libgomp.c/scan-8.c: New test.

From-SVN: r272958
parent 83eb9522
2019-07-03 Jakub Jelinek <jakub@redhat.com> 2019-07-03 Jakub Jelinek <jakub@redhat.com>
* tree-core.h (enum omp_clause_code): Add OMP_CLAUSE__SCANTEMP_
clause.
* tree.h (OMP_CLAUSE_DECL): Use OMP_CLAUSE__SCANTEMP_ instead of
OMP_CLAUSE__CONDTEMP_ as range's upper bound.
(OMP_CLAUSE__SCANTEMP__ALLOC, OMP_CLAUSE__SCANTEMP__CONTROL): Define.
* tree.c (omp_clause_num_ops, omp_clause_code_name): Add
OMP_CLAUSE__SCANTEMP_ entry.
(walk_tree_1): Handle OMP_CLAUSE__SCANTEMP_.
* tree-pretty-print.c (dump_omp_clause): Likewise.
* tree-nested.c (convert_nonlocal_omp_clauses,
convert_local_omp_clauses): Likewise.
* omp-general.h (struct omp_for_data): Add have_scantemp and
have_nonctrl_scantemp members.
* omp-general.c (omp_extract_for_data): Initialize them.
* omp-low.c (struct omp_context): Add scan_exclusive member.
(scan_omp_1_stmt): Don't unnecessarily mask gimple_omp_for_kind
result again with GF_OMP_FOR_KIND_MASK. Initialize also
ctx->scan_exclusive.
(lower_rec_simd_input_clauses): Use ctx->scan_exclusive instead
of !ctx->scan_inclusive.
(lower_rec_input_clauses): Simplify gimplification of dtors using
gimplify_and_add. For non-is_simd test OMP_CLAUSE_REDUCTION_INSCAN
rather than rvarp. Handle OMP_CLAUSE_REDUCTION_INSCAN in worksharing
loops. Don't add barrier for reduction_omp_orig_ref if
ctx->scan_??xclusive.
(lower_reduction_clauses): Don't do anything for ctx->scan_??xclusive.
(lower_omp_scan): Use ctx->scan_exclusive instead
of !ctx->scan_inclusive. Handle worksharing loops with inscan
reductions. Use new_vard != new_var instead of repeated
omp_is_reference calls.
(omp_find_scan, lower_omp_for_scan): New functions.
(lower_omp_for): Call lower_omp_for_scan for worksharing loops with
inscan reductions.
* omp-expand.c (expand_omp_scantemp_alloc): New function.
(expand_omp_for_static_nochunk): Handle fd->have_nonctrl_scantemp
and fd->have_scantemp.
* gimplify.c (gimplify_scan_omp_clauses): For inscan reductions * gimplify.c (gimplify_scan_omp_clauses): For inscan reductions
on worksharing loop propagate it as shared clause to containing on worksharing loop propagate it as shared clause to containing
combined parallel. combined parallel.
......
...@@ -3502,6 +3502,98 @@ expand_omp_for_generic (struct omp_region *region, ...@@ -3502,6 +3502,98 @@ expand_omp_for_generic (struct omp_region *region,
} }
} }
/* Helper function for expand_omp_for_static_nochunk. If PTR is NULL,
compute needed allocation size. If !ALLOC of team allocations,
if ALLOC of thread allocation. SZ is the initial needed size for
other purposes, ALLOC_ALIGN guaranteed alignment of allocation in bytes,
CNT number of elements of each array, for !ALLOC this is
omp_get_num_threads (), for ALLOC number of iterations handled by the
current thread. If PTR is non-NULL, it is the start of the allocation
and this routine shall assign to OMP_CLAUSE_DECL (c) of those _scantemp_
clauses pointers to the corresponding arrays. */
static tree
expand_omp_scantemp_alloc (tree clauses, tree ptr, unsigned HOST_WIDE_INT sz,
unsigned HOST_WIDE_INT alloc_align, tree cnt,
gimple_stmt_iterator *gsi, bool alloc)
{
tree eltsz = NULL_TREE;
unsigned HOST_WIDE_INT preval = 0;
if (ptr && sz)
ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
ptr, size_int (sz));
for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
&& !OMP_CLAUSE__SCANTEMP__CONTROL (c)
&& (!OMP_CLAUSE__SCANTEMP__ALLOC (c)) != alloc)
{
tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
unsigned HOST_WIDE_INT al = TYPE_ALIGN_UNIT (pointee_type);
if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
{
unsigned HOST_WIDE_INT szl
= tree_to_uhwi (TYPE_SIZE_UNIT (pointee_type));
szl = least_bit_hwi (szl);
if (szl)
al = MIN (al, szl);
}
if (ptr == NULL_TREE)
{
if (eltsz == NULL_TREE)
eltsz = TYPE_SIZE_UNIT (pointee_type);
else
eltsz = size_binop (PLUS_EXPR, eltsz,
TYPE_SIZE_UNIT (pointee_type));
}
if (preval == 0 && al <= alloc_align)
{
unsigned HOST_WIDE_INT diff = ROUND_UP (sz, al) - sz;
sz += diff;
if (diff && ptr)
ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
ptr, size_int (diff));
}
else if (al > preval)
{
if (ptr)
{
ptr = fold_convert (pointer_sized_int_node, ptr);
ptr = fold_build2 (PLUS_EXPR, pointer_sized_int_node, ptr,
build_int_cst (pointer_sized_int_node,
al - 1));
ptr = fold_build2 (BIT_AND_EXPR, pointer_sized_int_node, ptr,
build_int_cst (pointer_sized_int_node,
-(HOST_WIDE_INT) al));
ptr = fold_convert (ptr_type_node, ptr);
}
else
sz += al - 1;
}
if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
preval = al;
else
preval = 1;
if (ptr)
{
expand_omp_build_assign (gsi, OMP_CLAUSE_DECL (c), ptr, false);
ptr = OMP_CLAUSE_DECL (c);
ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr,
size_binop (MULT_EXPR, cnt,
TYPE_SIZE_UNIT (pointee_type)));
}
}
if (ptr == NULL_TREE)
{
eltsz = size_binop (MULT_EXPR, eltsz, cnt);
if (sz)
eltsz = size_binop (PLUS_EXPR, eltsz, size_int (sz));
return eltsz;
}
else
return ptr;
}
/* A subroutine of expand_omp_for. Generate code for a parallel /* A subroutine of expand_omp_for. Generate code for a parallel
loop with static schedule and no specified chunk size. Given loop with static schedule and no specified chunk size. Given
parameters: parameters:
...@@ -3544,11 +3636,12 @@ expand_omp_for_static_nochunk (struct omp_region *region, ...@@ -3544,11 +3636,12 @@ expand_omp_for_static_nochunk (struct omp_region *region,
struct omp_for_data *fd, struct omp_for_data *fd,
gimple *inner_stmt) gimple *inner_stmt)
{ {
tree n, q, s0, e0, e, t, tt, nthreads, threadid; tree n, q, s0, e0, e, t, tt, nthreads = NULL_TREE, threadid;
tree type, itype, vmain, vback; tree type, itype, vmain, vback;
basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb; basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
basic_block body_bb, cont_bb, collapse_bb = NULL; basic_block body_bb, cont_bb, collapse_bb = NULL;
basic_block fin_bb; basic_block fin_bb, fourth_bb = NULL, fifth_bb = NULL, sixth_bb = NULL;
basic_block exit1_bb = NULL, exit2_bb = NULL, exit3_bb = NULL;
gimple_stmt_iterator gsi, gsip; gimple_stmt_iterator gsi, gsip;
edge ep; edge ep;
bool broken_loop = region->cont == NULL; bool broken_loop = region->cont == NULL;
...@@ -3650,7 +3743,9 @@ expand_omp_for_static_nochunk (struct omp_region *region, ...@@ -3650,7 +3743,9 @@ expand_omp_for_static_nochunk (struct omp_region *region,
c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_); c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
cond_var = OMP_CLAUSE_DECL (c); cond_var = OMP_CLAUSE_DECL (c);
} }
if (fd->have_reductemp || fd->have_pointer_condtemp) if (fd->have_reductemp
|| fd->have_pointer_condtemp
|| fd->have_nonctrl_scantemp)
{ {
tree t1 = build_int_cst (long_integer_type_node, 0); tree t1 = build_int_cst (long_integer_type_node, 0);
tree t2 = build_int_cst (long_integer_type_node, 1); tree t2 = build_int_cst (long_integer_type_node, 1);
...@@ -3660,8 +3755,11 @@ expand_omp_for_static_nochunk (struct omp_region *region, ...@@ -3660,8 +3755,11 @@ expand_omp_for_static_nochunk (struct omp_region *region,
gimple_stmt_iterator gsi2 = gsi_none (); gimple_stmt_iterator gsi2 = gsi_none ();
gimple *g = NULL; gimple *g = NULL;
tree mem = null_pointer_node, memv = NULL_TREE; tree mem = null_pointer_node, memv = NULL_TREE;
unsigned HOST_WIDE_INT condtemp_sz = 0;
unsigned HOST_WIDE_INT alloc_align = 0;
if (fd->have_reductemp) if (fd->have_reductemp)
{ {
gcc_assert (!fd->have_nonctrl_scantemp);
tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_); tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
reductions = OMP_CLAUSE_DECL (c); reductions = OMP_CLAUSE_DECL (c);
gcc_assert (TREE_CODE (reductions) == SSA_NAME); gcc_assert (TREE_CODE (reductions) == SSA_NAME);
...@@ -3678,16 +3776,40 @@ expand_omp_for_static_nochunk (struct omp_region *region, ...@@ -3678,16 +3776,40 @@ expand_omp_for_static_nochunk (struct omp_region *region,
gsi2 = gsip; gsi2 = gsip;
reductions = null_pointer_node; reductions = null_pointer_node;
} }
if (fd->have_pointer_condtemp) if (fd->have_pointer_condtemp || fd->have_nonctrl_scantemp)
{ {
tree type = TREE_TYPE (condtemp); tree type;
if (fd->have_pointer_condtemp)
type = TREE_TYPE (condtemp);
else
type = ptr_type_node;
memv = create_tmp_var (type); memv = create_tmp_var (type);
TREE_ADDRESSABLE (memv) = 1; TREE_ADDRESSABLE (memv) = 1;
unsigned HOST_WIDE_INT sz unsigned HOST_WIDE_INT sz = 0;
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))); tree size = NULL_TREE;
if (fd->have_pointer_condtemp)
{
sz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
sz *= fd->lastprivate_conditional; sz *= fd->lastprivate_conditional;
expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz), condtemp_sz = sz;
}
if (fd->have_nonctrl_scantemp)
{
nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
gimple *g = gimple_build_call (nthreads, 0);
nthreads = create_tmp_var (integer_type_node);
gimple_call_set_lhs (g, nthreads);
gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
nthreads = fold_convert (sizetype, nthreads);
alloc_align = TYPE_ALIGN_UNIT (long_long_integer_type_node);
size = expand_omp_scantemp_alloc (clauses, NULL_TREE, sz,
alloc_align, nthreads, NULL,
false); false);
size = fold_convert (type, size);
}
else
size = build_int_cst (type, sz);
expand_omp_build_assign (&gsi2, memv, size, false);
mem = build_fold_addr_expr (memv); mem = build_fold_addr_expr (memv);
} }
tree t tree t
...@@ -3698,6 +3820,12 @@ expand_omp_for_static_nochunk (struct omp_region *region, ...@@ -3698,6 +3820,12 @@ expand_omp_for_static_nochunk (struct omp_region *region,
true, GSI_SAME_STMT); true, GSI_SAME_STMT);
if (fd->have_pointer_condtemp) if (fd->have_pointer_condtemp)
expand_omp_build_assign (&gsi2, condtemp, memv, false); expand_omp_build_assign (&gsi2, condtemp, memv, false);
if (fd->have_nonctrl_scantemp)
{
tree ptr = fd->have_pointer_condtemp ? condtemp : memv;
expand_omp_scantemp_alloc (clauses, ptr, condtemp_sz,
alloc_align, nthreads, &gsi2, false);
}
if (fd->have_reductemp) if (fd->have_reductemp)
{ {
gsi_remove (&gsi2, true); gsi_remove (&gsi2, true);
...@@ -3788,6 +3916,72 @@ expand_omp_for_static_nochunk (struct omp_region *region, ...@@ -3788,6 +3916,72 @@ expand_omp_for_static_nochunk (struct omp_region *region,
gsi = gsi_last_nondebug_bb (third_bb); gsi = gsi_last_nondebug_bb (third_bb);
gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
if (fd->have_nonctrl_scantemp)
{
tree clauses = gimple_omp_for_clauses (fd->for_stmt);
tree controlp = NULL_TREE, controlb = NULL_TREE;
for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
&& OMP_CLAUSE__SCANTEMP__CONTROL (c))
{
if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
controlb = OMP_CLAUSE_DECL (c);
else
controlp = OMP_CLAUSE_DECL (c);
if (controlb && controlp)
break;
}
gcc_assert (controlp && controlb);
tree cnt = create_tmp_var (sizetype);
gimple *g = gimple_build_assign (cnt, NOP_EXPR, q);
gsi_insert_before (&gsi, g, GSI_SAME_STMT);
unsigned HOST_WIDE_INT alloc_align = TYPE_ALIGN_UNIT (ptr_type_node);
tree sz = expand_omp_scantemp_alloc (clauses, NULL_TREE, 0,
alloc_align, cnt, NULL, true);
tree size = create_tmp_var (sizetype);
expand_omp_build_assign (&gsi, size, sz, false);
tree cmp = fold_build2 (GT_EXPR, boolean_type_node,
size, size_int (16384));
expand_omp_build_assign (&gsi, controlb, cmp);
g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
NULL_TREE, NULL_TREE);
gsi_insert_before (&gsi, g, GSI_SAME_STMT);
fourth_bb = split_block (third_bb, g)->dest;
gsi = gsi_last_nondebug_bb (fourth_bb);
/* FIXME: Once we have allocators, this should use allocator. */
g = gimple_build_call (builtin_decl_explicit (BUILT_IN_MALLOC), 1, size);
gimple_call_set_lhs (g, controlp);
gsi_insert_before (&gsi, g, GSI_SAME_STMT);
expand_omp_scantemp_alloc (clauses, controlp, 0, alloc_align, cnt,
&gsi, true);
gsi_prev (&gsi);
g = gsi_stmt (gsi);
fifth_bb = split_block (fourth_bb, g)->dest;
gsi = gsi_last_nondebug_bb (fifth_bb);
g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_SAVE), 0);
gimple_call_set_lhs (g, controlp);
gsi_insert_before (&gsi, g, GSI_SAME_STMT);
tree alloca_decl = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
&& OMP_CLAUSE__SCANTEMP__ALLOC (c))
{
tree tmp = create_tmp_var (sizetype);
tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
g = gimple_build_assign (tmp, MULT_EXPR, cnt,
TYPE_SIZE_UNIT (pointee_type));
gsi_insert_before (&gsi, g, GSI_SAME_STMT);
g = gimple_build_call (alloca_decl, 2, tmp,
size_int (TYPE_ALIGN (pointee_type)));
gimple_call_set_lhs (g, OMP_CLAUSE_DECL (c));
gsi_insert_before (&gsi, g, GSI_SAME_STMT);
}
sixth_bb = split_block (fifth_bb, g)->dest;
gsi = gsi_last_nondebug_bb (sixth_bb);
}
t = build2 (MULT_EXPR, itype, q, threadid); t = build2 (MULT_EXPR, itype, q, threadid);
t = build2 (PLUS_EXPR, itype, t, tt); t = build2 (PLUS_EXPR, itype, t, tt);
s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT); s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
...@@ -4018,7 +4212,9 @@ expand_omp_for_static_nochunk (struct omp_region *region, ...@@ -4018,7 +4212,9 @@ expand_omp_for_static_nochunk (struct omp_region *region,
if (!gimple_omp_return_nowait_p (gsi_stmt (gsi))) if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
{ {
t = gimple_omp_return_lhs (gsi_stmt (gsi)); t = gimple_omp_return_lhs (gsi_stmt (gsi));
if (fd->have_reductemp || fd->have_pointer_condtemp) if (fd->have_reductemp
|| ((fd->have_pointer_condtemp || fd->have_scantemp)
&& !fd->have_nonctrl_scantemp))
{ {
tree fn; tree fn;
if (t) if (t)
...@@ -4045,6 +4241,38 @@ expand_omp_for_static_nochunk (struct omp_region *region, ...@@ -4045,6 +4241,38 @@ expand_omp_for_static_nochunk (struct omp_region *region,
gcall *g = gimple_build_call (fn, 0); gcall *g = gimple_build_call (fn, 0);
gsi_insert_after (&gsi, g, GSI_SAME_STMT); gsi_insert_after (&gsi, g, GSI_SAME_STMT);
} }
if (fd->have_scantemp && !fd->have_nonctrl_scantemp)
{
tree clauses = gimple_omp_for_clauses (fd->for_stmt);
tree controlp = NULL_TREE, controlb = NULL_TREE;
for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
&& OMP_CLAUSE__SCANTEMP__CONTROL (c))
{
if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
controlb = OMP_CLAUSE_DECL (c);
else
controlp = OMP_CLAUSE_DECL (c);
if (controlb && controlp)
break;
}
gcc_assert (controlp && controlb);
gimple *g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
NULL_TREE, NULL_TREE);
gsi_insert_before (&gsi, g, GSI_SAME_STMT);
exit1_bb = split_block (exit_bb, g)->dest;
gsi = gsi_after_labels (exit1_bb);
g = gimple_build_call (builtin_decl_explicit (BUILT_IN_FREE), 1,
controlp);
gsi_insert_before (&gsi, g, GSI_SAME_STMT);
exit2_bb = split_block (exit1_bb, g)->dest;
gsi = gsi_after_labels (exit2_bb);
g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_RESTORE), 1,
controlp);
gsi_insert_before (&gsi, g, GSI_SAME_STMT);
exit3_bb = split_block (exit2_bb, g)->dest;
gsi = gsi_after_labels (exit3_bb);
}
gsi_remove (&gsi, true); gsi_remove (&gsi, true);
/* Connect all the blocks. */ /* Connect all the blocks. */
...@@ -4053,8 +4281,34 @@ expand_omp_for_static_nochunk (struct omp_region *region, ...@@ -4053,8 +4281,34 @@ expand_omp_for_static_nochunk (struct omp_region *region,
ep = find_edge (entry_bb, second_bb); ep = find_edge (entry_bb, second_bb);
ep->flags = EDGE_TRUE_VALUE; ep->flags = EDGE_TRUE_VALUE;
ep->probability = profile_probability::guessed_always ().apply_scale (1, 4); ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE; if (fourth_bb)
find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE; {
ep = make_edge (third_bb, fifth_bb, EDGE_FALSE_VALUE);
ep->probability
= profile_probability::guessed_always ().apply_scale (1, 2);
ep = find_edge (third_bb, fourth_bb);
ep->flags = EDGE_TRUE_VALUE;
ep->probability
= profile_probability::guessed_always ().apply_scale (1, 2);
ep = find_edge (fourth_bb, fifth_bb);
redirect_edge_and_branch (ep, sixth_bb);
}
else
sixth_bb = third_bb;
find_edge (sixth_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
find_edge (sixth_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
if (exit1_bb)
{
ep = make_edge (exit_bb, exit2_bb, EDGE_FALSE_VALUE);
ep->probability
= profile_probability::guessed_always ().apply_scale (1, 2);
ep = find_edge (exit_bb, exit1_bb);
ep->flags = EDGE_TRUE_VALUE;
ep->probability
= profile_probability::guessed_always ().apply_scale (1, 2);
ep = find_edge (exit1_bb, exit2_bb);
redirect_edge_and_branch (ep, exit3_bb);
}
if (!broken_loop) if (!broken_loop)
{ {
...@@ -4082,12 +4336,22 @@ expand_omp_for_static_nochunk (struct omp_region *region, ...@@ -4082,12 +4336,22 @@ expand_omp_for_static_nochunk (struct omp_region *region,
set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb); set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb); set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb); if (fourth_bb)
{
set_immediate_dominator (CDI_DOMINATORS, fifth_bb, third_bb);
set_immediate_dominator (CDI_DOMINATORS, sixth_bb, third_bb);
}
set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, sixth_bb);
set_immediate_dominator (CDI_DOMINATORS, body_bb, set_immediate_dominator (CDI_DOMINATORS, body_bb,
recompute_dominator (CDI_DOMINATORS, body_bb)); recompute_dominator (CDI_DOMINATORS, body_bb));
set_immediate_dominator (CDI_DOMINATORS, fin_bb, set_immediate_dominator (CDI_DOMINATORS, fin_bb,
recompute_dominator (CDI_DOMINATORS, fin_bb)); recompute_dominator (CDI_DOMINATORS, fin_bb));
if (exit1_bb)
{
set_immediate_dominator (CDI_DOMINATORS, exit2_bb, exit_bb);
set_immediate_dominator (CDI_DOMINATORS, exit3_bb, exit_bb);
}
struct loop *loop = body_bb->loop_father; struct loop *loop = body_bb->loop_father;
if (loop != entry_bb->loop_father) if (loop != entry_bb->loop_father)
......
...@@ -169,6 +169,8 @@ omp_extract_for_data (gomp_for *for_stmt, struct omp_for_data *fd, ...@@ -169,6 +169,8 @@ omp_extract_for_data (gomp_for *for_stmt, struct omp_for_data *fd,
fd->have_ordered = false; fd->have_ordered = false;
fd->have_reductemp = false; fd->have_reductemp = false;
fd->have_pointer_condtemp = false; fd->have_pointer_condtemp = false;
fd->have_scantemp = false;
fd->have_nonctrl_scantemp = false;
fd->lastprivate_conditional = 0; fd->lastprivate_conditional = 0;
fd->tiling = NULL_TREE; fd->tiling = NULL_TREE;
fd->collapse = 1; fd->collapse = 1;
...@@ -231,6 +233,12 @@ omp_extract_for_data (gomp_for *for_stmt, struct omp_for_data *fd, ...@@ -231,6 +233,12 @@ omp_extract_for_data (gomp_for *for_stmt, struct omp_for_data *fd,
if (POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (t)))) if (POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (t))))
fd->have_pointer_condtemp = true; fd->have_pointer_condtemp = true;
break; break;
case OMP_CLAUSE__SCANTEMP_:
fd->have_scantemp = true;
if (!OMP_CLAUSE__SCANTEMP__ALLOC (t)
&& !OMP_CLAUSE__SCANTEMP__CONTROL (t))
fd->have_nonctrl_scantemp = true;
break;
default: default:
break; break;
} }
......
...@@ -63,7 +63,7 @@ struct omp_for_data ...@@ -63,7 +63,7 @@ struct omp_for_data
int collapse; /* Collapsed loops, 1 for a non-collapsed loop. */ int collapse; /* Collapsed loops, 1 for a non-collapsed loop. */
int ordered; int ordered;
bool have_nowait, have_ordered, simd_schedule, have_reductemp; bool have_nowait, have_ordered, simd_schedule, have_reductemp;
bool have_pointer_condtemp; bool have_pointer_condtemp, have_scantemp, have_nonctrl_scantemp;
int lastprivate_conditional; int lastprivate_conditional;
unsigned char sched_modifiers; unsigned char sched_modifiers;
enum omp_clause_schedule_kind sched_kind; enum omp_clause_schedule_kind sched_kind;
......
...@@ -144,6 +144,9 @@ struct omp_context ...@@ -144,6 +144,9 @@ struct omp_context
/* True if there is nested scan context with inclusive clause. */ /* True if there is nested scan context with inclusive clause. */
bool scan_inclusive; bool scan_inclusive;
/* True if there is nested scan context with exclusive clause. */
bool scan_exclusive;
}; };
static splay_tree all_contexts; static splay_tree all_contexts;
...@@ -3316,8 +3319,8 @@ scan_omp_1_stmt (gimple_stmt_iterator *gsi, bool *handled_ops_p, ...@@ -3316,8 +3319,8 @@ scan_omp_1_stmt (gimple_stmt_iterator *gsi, bool *handled_ops_p,
break; break;
case GIMPLE_OMP_FOR: case GIMPLE_OMP_FOR:
if (((gimple_omp_for_kind (as_a <gomp_for *> (stmt)) if ((gimple_omp_for_kind (as_a <gomp_for *> (stmt))
& GF_OMP_FOR_KIND_MASK) == GF_OMP_FOR_KIND_SIMD) == GF_OMP_FOR_KIND_SIMD)
&& omp_maybe_offloaded_ctx (ctx) && omp_maybe_offloaded_ctx (ctx)
&& omp_max_simt_vf ()) && omp_max_simt_vf ())
scan_omp_simd (gsi, as_a <gomp_for *> (stmt), ctx); scan_omp_simd (gsi, as_a <gomp_for *> (stmt), ctx);
...@@ -3335,8 +3338,12 @@ scan_omp_1_stmt (gimple_stmt_iterator *gsi, bool *handled_ops_p, ...@@ -3335,8 +3338,12 @@ scan_omp_1_stmt (gimple_stmt_iterator *gsi, bool *handled_ops_p,
case GIMPLE_OMP_SCAN: case GIMPLE_OMP_SCAN:
if (tree clauses = gimple_omp_scan_clauses (as_a <gomp_scan *> (stmt))) if (tree clauses = gimple_omp_scan_clauses (as_a <gomp_scan *> (stmt)))
{
if (OMP_CLAUSE_CODE (clauses) == OMP_CLAUSE_INCLUSIVE) if (OMP_CLAUSE_CODE (clauses) == OMP_CLAUSE_INCLUSIVE)
ctx->scan_inclusive = true; ctx->scan_inclusive = true;
else if (OMP_CLAUSE_CODE (clauses) == OMP_CLAUSE_EXCLUSIVE)
ctx->scan_exclusive = true;
}
/* FALLTHRU */ /* FALLTHRU */
case GIMPLE_OMP_SECTION: case GIMPLE_OMP_SECTION:
case GIMPLE_OMP_MASTER: case GIMPLE_OMP_MASTER:
...@@ -3769,7 +3776,7 @@ lower_rec_simd_input_clauses (tree new_var, omp_context *ctx, ...@@ -3769,7 +3776,7 @@ lower_rec_simd_input_clauses (tree new_var, omp_context *ctx,
sctx->lastlane, NULL_TREE, NULL_TREE); sctx->lastlane, NULL_TREE, NULL_TREE);
TREE_THIS_NOTRAP (*rvar) = 1; TREE_THIS_NOTRAP (*rvar) = 1;
if (!ctx->scan_inclusive) if (ctx->scan_exclusive)
{ {
/* And for exclusive scan yet another one, which will /* And for exclusive scan yet another one, which will
hold the value during the scan phase. */ hold the value during the scan phase. */
...@@ -3854,7 +3861,7 @@ static void ...@@ -3854,7 +3861,7 @@ static void
lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
omp_context *ctx, struct omp_for_data *fd) omp_context *ctx, struct omp_for_data *fd)
{ {
tree c, dtor, copyin_seq, x, ptr; tree c, copyin_seq, x, ptr;
bool copyin_by_ref = false; bool copyin_by_ref = false;
bool lastprivate_firstprivate = false; bool lastprivate_firstprivate = false;
bool reduction_omp_orig_ref = false; bool reduction_omp_orig_ref = false;
...@@ -4541,12 +4548,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, ...@@ -4541,12 +4548,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
x = lang_hooks.decls.omp_clause_dtor x = lang_hooks.decls.omp_clause_dtor
(c, build_simple_mem_ref (y2)); (c, build_simple_mem_ref (y2));
if (x) if (x)
{ gimplify_and_add (x, dlist);
gimple_seq tseq = NULL;
dtor = x;
gimplify_stmt (&dtor, &tseq);
gimple_seq_add_seq (dlist, tseq);
}
} }
} }
else else
...@@ -4913,13 +4915,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, ...@@ -4913,13 +4915,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
{ {
y = lang_hooks.decls.omp_clause_dtor (c, ivar); y = lang_hooks.decls.omp_clause_dtor (c, ivar);
if (y) if (y)
{ gimplify_and_add (y, &llist[1]);
gimple_seq tseq = NULL;
dtor = y;
gimplify_stmt (&dtor, &tseq);
gimple_seq_add_seq (&llist[1], tseq);
}
} }
break; break;
} }
...@@ -4949,13 +4945,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, ...@@ -4949,13 +4945,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
do_dtor: do_dtor:
x = lang_hooks.decls.omp_clause_dtor (c, new_var); x = lang_hooks.decls.omp_clause_dtor (c, new_var);
if (x) if (x)
{ gimplify_and_add (x, dlist);
gimple_seq tseq = NULL;
dtor = x;
gimplify_stmt (&dtor, &tseq);
gimple_seq_add_seq (dlist, tseq);
}
break; break;
case OMP_CLAUSE_LINEAR: case OMP_CLAUSE_LINEAR:
...@@ -5103,13 +5093,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, ...@@ -5103,13 +5093,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
gimplify_and_add (x, &llist[0]); gimplify_and_add (x, &llist[0]);
x = lang_hooks.decls.omp_clause_dtor (c, ivar); x = lang_hooks.decls.omp_clause_dtor (c, ivar);
if (x) if (x)
{ gimplify_and_add (x, &llist[1]);
gimple_seq tseq = NULL;
dtor = x;
gimplify_stmt (&dtor, &tseq);
gimple_seq_add_seq (&llist[1], tseq);
}
break; break;
} }
if (omp_is_reference (var)) if (omp_is_reference (var))
...@@ -5282,12 +5266,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, ...@@ -5282,12 +5266,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
x = lang_hooks.decls.omp_clause_dtor (c, nv); x = lang_hooks.decls.omp_clause_dtor (c, nv);
if (x) if (x)
{ gimplify_and_add (x, dlist);
tseq = NULL;
dtor = x;
gimplify_stmt (&dtor, &tseq);
gimple_seq_add_seq (dlist, tseq);
}
} }
tree ref = build_outer_var_ref (var, ctx); tree ref = build_outer_var_ref (var, ctx);
...@@ -5310,34 +5289,19 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, ...@@ -5310,34 +5289,19 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
x = lang_hooks.decls.omp_clause_dtor (c, ivar); x = lang_hooks.decls.omp_clause_dtor (c, ivar);
if (x) if (x)
{ gimplify_and_add (x, &llist[1]);
tseq = NULL;
dtor = x;
gimplify_stmt (&dtor, &tseq);
gimple_seq_add_seq (&llist[1], tseq);
}
tree ivar2 = unshare_expr (lvar); tree ivar2 = unshare_expr (lvar);
TREE_OPERAND (ivar2, 1) = sctx.idx; TREE_OPERAND (ivar2, 1) = sctx.idx;
x = lang_hooks.decls.omp_clause_dtor (c, ivar2); x = lang_hooks.decls.omp_clause_dtor (c, ivar2);
if (x) if (x)
{ gimplify_and_add (x, &llist[1]);
tseq = NULL;
dtor = x;
gimplify_stmt (&dtor, &tseq);
gimple_seq_add_seq (&llist[1], tseq);
}
if (rvar2) if (rvar2)
{ {
x = lang_hooks.decls.omp_clause_dtor (c, rvar2); x = lang_hooks.decls.omp_clause_dtor (c, rvar2);
if (x) if (x)
{ gimplify_and_add (x, &llist[1]);
tseq = NULL;
dtor = x;
gimplify_stmt (&dtor, &tseq);
gimple_seq_add_seq (&llist[1], tseq);
}
} }
break; break;
} }
...@@ -5362,12 +5326,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, ...@@ -5362,12 +5326,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
build_fold_addr_expr (lvar)); build_fold_addr_expr (lvar));
x = lang_hooks.decls.omp_clause_dtor (c, ivar); x = lang_hooks.decls.omp_clause_dtor (c, ivar);
if (x) if (x)
{ gimplify_and_add (x, &llist[1]);
tseq = NULL;
dtor = x;
gimplify_stmt (&dtor, &tseq);
gimple_seq_add_seq (&llist[1], tseq);
}
break; break;
} }
/* If this is a reference to constant size reduction var /* If this is a reference to constant size reduction var
...@@ -5409,15 +5368,18 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, ...@@ -5409,15 +5368,18 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
if (x) if (x)
gimplify_and_add (x, ilist); gimplify_and_add (x, ilist);
if (rvarp) if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
&& OMP_CLAUSE_REDUCTION_INSCAN (c))
{ {
if (x) if (x || (!is_simd
&& OMP_CLAUSE_REDUCTION_OMP_ORIG_REF (c)))
{ {
tree nv = create_tmp_var_raw (TREE_TYPE (new_var)); tree nv = create_tmp_var_raw (TREE_TYPE (new_var));
gimple_add_tmp_var (nv); gimple_add_tmp_var (nv);
ctx->cb.decl_map->put (new_vard, nv); ctx->cb.decl_map->put (new_vard, nv);
x = lang_hooks.decls.omp_clause_default_ctor x = lang_hooks.decls.omp_clause_default_ctor
(c, nv, build_outer_var_ref (var, ctx)); (c, nv, build_outer_var_ref (var, ctx));
if (x)
gimplify_and_add (x, ilist); gimplify_and_add (x, ilist);
if (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c)) if (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c))
{ {
...@@ -5433,7 +5395,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, ...@@ -5433,7 +5395,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
gimple_seq_add_seq (ilist, tseq); gimple_seq_add_seq (ilist, tseq);
} }
OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL; OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL;
if (!ctx->scan_inclusive) if (is_simd && ctx->scan_exclusive)
{ {
tree nv2 tree nv2
= create_tmp_var_raw (TREE_TYPE (new_var)); = create_tmp_var_raw (TREE_TYPE (new_var));
...@@ -5444,23 +5406,14 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, ...@@ -5444,23 +5406,14 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
gimplify_and_add (x, ilist); gimplify_and_add (x, ilist);
x = lang_hooks.decls.omp_clause_dtor (c, nv2); x = lang_hooks.decls.omp_clause_dtor (c, nv2);
if (x) if (x)
{ gimplify_and_add (x, dlist);
tseq = NULL;
dtor = x;
gimplify_stmt (&dtor, &tseq);
gimple_seq_add_seq (dlist, tseq);
}
} }
x = lang_hooks.decls.omp_clause_dtor (c, nv); x = lang_hooks.decls.omp_clause_dtor (c, nv);
if (x) if (x)
{ gimplify_and_add (x, dlist);
tseq = NULL;
dtor = x;
gimplify_stmt (&dtor, &tseq);
gimple_seq_add_seq (dlist, tseq);
}
} }
else if (!ctx->scan_inclusive else if (is_simd
&& ctx->scan_exclusive
&& TREE_ADDRESSABLE (TREE_TYPE (new_var))) && TREE_ADDRESSABLE (TREE_TYPE (new_var)))
{ {
tree nv2 = create_tmp_var_raw (TREE_TYPE (new_var)); tree nv2 = create_tmp_var_raw (TREE_TYPE (new_var));
...@@ -5468,12 +5421,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, ...@@ -5468,12 +5421,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
ctx->cb.decl_map->put (new_vard, nv2); ctx->cb.decl_map->put (new_vard, nv2);
x = lang_hooks.decls.omp_clause_dtor (c, nv2); x = lang_hooks.decls.omp_clause_dtor (c, nv2);
if (x) if (x)
{ gimplify_and_add (x, dlist);
tseq = NULL;
dtor = x;
gimplify_stmt (&dtor, &tseq);
gimple_seq_add_seq (dlist, tseq);
}
} }
DECL_HAS_VALUE_EXPR_P (placeholder) = 0; DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
goto do_dtor; goto do_dtor;
...@@ -5611,7 +5559,8 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, ...@@ -5611,7 +5559,8 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
{ {
if (omp_is_reference (var) && is_simd) if (omp_is_reference (var) && is_simd)
handle_simd_reference (clause_loc, new_vard, ilist); handle_simd_reference (clause_loc, new_vard, ilist);
if (rvarp) if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
&& OMP_CLAUSE_REDUCTION_INSCAN (c))
break; break;
gimplify_assign (new_var, x, ilist); gimplify_assign (new_var, x, ilist);
if (is_simd) if (is_simd)
...@@ -5815,7 +5764,10 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, ...@@ -5815,7 +5764,10 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
lastprivate clauses we need to ensure the lastprivate copying lastprivate clauses we need to ensure the lastprivate copying
happens after firstprivate copying in all threads. And similarly happens after firstprivate copying in all threads. And similarly
for UDRs if initializer expression refers to omp_orig. */ for UDRs if initializer expression refers to omp_orig. */
if (copyin_by_ref || lastprivate_firstprivate || reduction_omp_orig_ref) if (copyin_by_ref || lastprivate_firstprivate
|| (reduction_omp_orig_ref
&& !ctx->scan_inclusive
&& !ctx->scan_exclusive))
{ {
/* Don't add any barrier for #pragma omp simd or /* Don't add any barrier for #pragma omp simd or
#pragma omp distribute. */ #pragma omp distribute. */
...@@ -6464,6 +6416,10 @@ lower_reduction_clauses (tree clauses, gimple_seq *stmt_seqp, ...@@ -6464,6 +6416,10 @@ lower_reduction_clauses (tree clauses, gimple_seq *stmt_seqp,
&& gimple_omp_for_kind (ctx->stmt) & GF_OMP_FOR_SIMD) && gimple_omp_for_kind (ctx->stmt) & GF_OMP_FOR_SIMD)
return; return;
/* inscan reductions are handled elsewhere. */
if (ctx->scan_inclusive || ctx->scan_exclusive)
return;
/* First see if there is exactly one reduction clause. Use OMP_ATOMIC /* First see if there is exactly one reduction clause. Use OMP_ATOMIC
update in that case, otherwise use a lock. */ update in that case, otherwise use a lock. */
for (c = clauses; c && count < 2; c = OMP_CLAUSE_CHAIN (c)) for (c = clauses; c && count < 2; c = OMP_CLAUSE_CHAIN (c))
...@@ -8650,7 +8606,7 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx) ...@@ -8650,7 +8606,7 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx)
gimple_seq before = NULL; gimple_seq before = NULL;
omp_context *octx = ctx->outer; omp_context *octx = ctx->outer;
gcc_assert (octx); gcc_assert (octx);
if (!octx->scan_inclusive && !has_clauses) if (octx->scan_exclusive && !has_clauses)
{ {
gimple_stmt_iterator gsi2 = *gsi_p; gimple_stmt_iterator gsi2 = *gsi_p;
gsi_next (&gsi2); gsi_next (&gsi2);
...@@ -8672,10 +8628,13 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx) ...@@ -8672,10 +8628,13 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx)
} }
bool input_phase = has_clauses ^ octx->scan_inclusive; bool input_phase = has_clauses ^ octx->scan_inclusive;
if (gimple_code (octx->stmt) == GIMPLE_OMP_FOR bool is_simd = (gimple_code (octx->stmt) == GIMPLE_OMP_FOR
&& (gimple_omp_for_kind (octx->stmt) & GF_OMP_FOR_SIMD) && (gimple_omp_for_kind (octx->stmt) & GF_OMP_FOR_SIMD)
&& !gimple_omp_for_combined_into_p (octx->stmt)) && !gimple_omp_for_combined_into_p (octx->stmt));
{ bool is_for = (gimple_code (octx->stmt) == GIMPLE_OMP_FOR
&& gimple_omp_for_kind (octx->stmt) == GF_OMP_FOR_KIND_FOR
&& !gimple_omp_for_combined_p (octx->stmt));
if (is_simd)
if (tree c = omp_find_clause (gimple_omp_for_clauses (octx->stmt), if (tree c = omp_find_clause (gimple_omp_for_clauses (octx->stmt),
OMP_CLAUSE__SIMDUID_)) OMP_CLAUSE__SIMDUID_))
{ {
...@@ -8689,6 +8648,9 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx) ...@@ -8689,6 +8648,9 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx)
gimple_call_set_lhs (g, lane); gimple_call_set_lhs (g, lane);
gimple_seq_add_stmt (&before, g); gimple_seq_add_stmt (&before, g);
} }
if (is_simd || is_for)
{
for (tree c = gimple_omp_for_clauses (octx->stmt); for (tree c = gimple_omp_for_clauses (octx->stmt);
c; c = OMP_CLAUSE_CHAIN (c)) c; c = OMP_CLAUSE_CHAIN (c))
if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
...@@ -8711,7 +8673,7 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx) ...@@ -8711,7 +8673,7 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx)
if (DECL_HAS_VALUE_EXPR_P (new_vard)) if (DECL_HAS_VALUE_EXPR_P (new_vard))
{ {
val = DECL_VALUE_EXPR (new_vard); val = DECL_VALUE_EXPR (new_vard);
if (omp_is_reference (var)) if (new_vard != new_var)
{ {
gcc_assert (TREE_CODE (val) == ADDR_EXPR); gcc_assert (TREE_CODE (val) == ADDR_EXPR);
val = TREE_OPERAND (val, 0); val = TREE_OPERAND (val, 0);
...@@ -8727,7 +8689,7 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx) ...@@ -8727,7 +8689,7 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx)
lane0 = TREE_OPERAND (val, 1); lane0 = TREE_OPERAND (val, 1);
TREE_OPERAND (val, 1) = lane; TREE_OPERAND (val, 1) = lane;
var2 = lookup_decl (v, octx); var2 = lookup_decl (v, octx);
if (!octx->scan_inclusive) if (octx->scan_exclusive)
var4 = lookup_decl (var2, octx); var4 = lookup_decl (var2, octx);
if (input_phase if (input_phase
&& OMP_CLAUSE_REDUCTION_PLACEHOLDER (c)) && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
...@@ -8737,7 +8699,7 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx) ...@@ -8737,7 +8699,7 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx)
var2 = build4 (ARRAY_REF, TREE_TYPE (val), var2 = build4 (ARRAY_REF, TREE_TYPE (val),
var2, lane, NULL_TREE, NULL_TREE); var2, lane, NULL_TREE, NULL_TREE);
TREE_THIS_NOTRAP (var2) = 1; TREE_THIS_NOTRAP (var2) = 1;
if (!octx->scan_inclusive) if (octx->scan_exclusive)
{ {
var4 = build4 (ARRAY_REF, TREE_TYPE (val), var4 = build4 (ARRAY_REF, TREE_TYPE (val),
var4, lane, NULL_TREE, var4, lane, NULL_TREE,
...@@ -8759,7 +8721,7 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx) ...@@ -8759,7 +8721,7 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx)
var3 = maybe_lookup_decl (new_vard, octx); var3 = maybe_lookup_decl (new_vard, octx);
if (var3 == new_vard || var3 == NULL_TREE) if (var3 == new_vard || var3 == NULL_TREE)
var3 = NULL_TREE; var3 = NULL_TREE;
else if (!octx->scan_inclusive && !input_phase) else if (is_simd && octx->scan_exclusive && !input_phase)
{ {
var4 = maybe_lookup_decl (var3, octx); var4 = maybe_lookup_decl (var3, octx);
if (var4 == var3 || var4 == NULL_TREE) if (var4 == var3 || var4 == NULL_TREE)
...@@ -8774,7 +8736,10 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx) ...@@ -8774,7 +8736,10 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx)
} }
} }
} }
if (!octx->scan_inclusive && !input_phase && var4 == NULL_TREE) if (is_simd
&& octx->scan_exclusive
&& !input_phase
&& var4 == NULL_TREE)
var4 = create_tmp_var (TREE_TYPE (val)); var4 = create_tmp_var (TREE_TYPE (val));
} }
if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c)) if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
...@@ -8794,12 +8759,14 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx) ...@@ -8794,12 +8759,14 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx)
{ {
/* Otherwise, assign to it the identity element. */ /* Otherwise, assign to it the identity element. */
gimple_seq tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c); gimple_seq tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c);
if (is_for)
tseq = copy_gimple_seq_and_replace_locals (tseq);
tree ref = build_outer_var_ref (var, octx); tree ref = build_outer_var_ref (var, octx);
tree x = (DECL_HAS_VALUE_EXPR_P (new_vard) tree x = (DECL_HAS_VALUE_EXPR_P (new_vard)
? DECL_VALUE_EXPR (new_vard) : NULL_TREE); ? DECL_VALUE_EXPR (new_vard) : NULL_TREE);
if (x) if (x)
{ {
if (omp_is_reference (var)) if (new_vard != new_var)
val = build_fold_addr_expr_loc (clause_loc, val); val = build_fold_addr_expr_loc (clause_loc, val);
SET_DECL_VALUE_EXPR (new_vard, val); SET_DECL_VALUE_EXPR (new_vard, val);
} }
...@@ -8811,13 +8778,14 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx) ...@@ -8811,13 +8778,14 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx)
SET_DECL_VALUE_EXPR (placeholder, NULL_TREE); SET_DECL_VALUE_EXPR (placeholder, NULL_TREE);
DECL_HAS_VALUE_EXPR_P (placeholder) = 0; DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
gimple_seq_add_seq (&before, tseq); gimple_seq_add_seq (&before, tseq);
if (is_simd)
OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL; OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL;
} }
} }
else else if (is_simd)
{ {
tree x; tree x;
if (!octx->scan_inclusive) if (octx->scan_exclusive)
{ {
tree v4 = unshare_expr (var4); tree v4 = unshare_expr (var4);
tree v2 = unshare_expr (var2); tree v2 = unshare_expr (var2);
...@@ -8828,7 +8796,7 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx) ...@@ -8828,7 +8796,7 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx)
x = (DECL_HAS_VALUE_EXPR_P (new_vard) x = (DECL_HAS_VALUE_EXPR_P (new_vard)
? DECL_VALUE_EXPR (new_vard) : NULL_TREE); ? DECL_VALUE_EXPR (new_vard) : NULL_TREE);
tree vexpr = val; tree vexpr = val;
if (x && omp_is_reference (var)) if (x && new_vard != new_var)
vexpr = build_fold_addr_expr_loc (clause_loc, val); vexpr = build_fold_addr_expr_loc (clause_loc, val);
if (x) if (x)
SET_DECL_VALUE_EXPR (new_vard, vexpr); SET_DECL_VALUE_EXPR (new_vard, vexpr);
...@@ -8864,7 +8832,7 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx) ...@@ -8864,7 +8832,7 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx)
tree x = omp_reduction_init (c, TREE_TYPE (new_var)); tree x = omp_reduction_init (c, TREE_TYPE (new_var));
gimplify_assign (val, x, &before); gimplify_assign (val, x, &before);
} }
else else if (is_simd)
{ {
/* scan phase. */ /* scan phase. */
enum tree_code code = OMP_CLAUSE_REDUCTION_CODE (c); enum tree_code code = OMP_CLAUSE_REDUCTION_CODE (c);
...@@ -8888,11 +8856,11 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx) ...@@ -8888,11 +8856,11 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx)
} }
} }
} }
if (!octx->scan_inclusive && !input_phase && lane0) if (octx->scan_exclusive && !input_phase && lane0)
{ {
tree vexpr = unshare_expr (var4); tree vexpr = unshare_expr (var4);
TREE_OPERAND (vexpr, 1) = lane0; TREE_OPERAND (vexpr, 1) = lane0;
if (omp_is_reference (var)) if (new_vard != new_var)
vexpr = build_fold_addr_expr_loc (clause_loc, vexpr); vexpr = build_fold_addr_expr_loc (clause_loc, vexpr);
SET_DECL_VALUE_EXPR (new_vard, vexpr); SET_DECL_VALUE_EXPR (new_vard, vexpr);
} }
...@@ -8901,9 +8869,17 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx) ...@@ -8901,9 +8869,17 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx)
else if (has_clauses) else if (has_clauses)
sorry_at (gimple_location (stmt), sorry_at (gimple_location (stmt),
"%<#pragma omp scan%> not supported yet"); "%<#pragma omp scan%> not supported yet");
if (!is_for)
{
gsi_insert_seq_after (gsi_p, gimple_omp_body (stmt), GSI_SAME_STMT); gsi_insert_seq_after (gsi_p, gimple_omp_body (stmt), GSI_SAME_STMT);
gsi_insert_seq_after (gsi_p, before, GSI_SAME_STMT); gsi_insert_seq_after (gsi_p, before, GSI_SAME_STMT);
gsi_replace (gsi_p, gimple_build_nop (), true); gsi_replace (gsi_p, gimple_build_nop (), true);
}
else if (before)
{
gimple_stmt_iterator gsi = gsi_start_1 (gimple_omp_body_ptr (stmt));
gsi_insert_seq_before (&gsi, before, GSI_SAME_STMT);
}
} }
...@@ -9124,6 +9100,712 @@ lower_omp_for_lastprivate (struct omp_for_data *fd, gimple_seq *body_p, ...@@ -9124,6 +9100,712 @@ lower_omp_for_lastprivate (struct omp_for_data *fd, gimple_seq *body_p,
} }
} }
/* Callback for walk_gimple_seq. Find #pragma omp scan statement. */
tree
omp_find_scan (gimple_stmt_iterator *gsi_p, bool *handled_ops_p,
struct walk_stmt_info *wi)
{
gimple *stmt = gsi_stmt (*gsi_p);
*handled_ops_p = true;
switch (gimple_code (stmt))
{
WALK_SUBSTMTS;
case GIMPLE_OMP_SCAN:
*(gimple_stmt_iterator *) (wi->info) = *gsi_p;
return integer_zero_node;
default:
break;
}
return NULL;
}
/* Helper function for lower_omp_for, add transformations for a worksharing
loop with scan directives inside of it.
For worksharing loop not combined with simd, transform:
#pragma omp for reduction(inscan,+:r) private(i)
for (i = 0; i < n; i = i + 1)
{
{
update (r);
}
#pragma omp scan inclusive(r)
{
use (r);
}
}
into two worksharing loops + code to merge results:
num_threads = omp_get_num_threads ();
thread_num = omp_get_thread_num ();
if (thread_num == 0) goto <D.2099>; else goto <D.2100>;
<D.2099>:
var2 = r;
goto <D.2101>;
<D.2100>:
// For UDRs this is UDR init, or if ctors are needed, copy from
// var3 that has been constructed to contain the neutral element.
var2 = 0;
<D.2101>:
ivar = 0;
// The _scantemp_ clauses will arrange for rpriva to be initialized to
// a shared array with num_threads elements and rprivb to a local array
// number of elements equal to the number of (contiguous) iterations the
// current thread will perform. controlb and controlp variables are
// temporaries to handle deallocation of rprivb at the end of second
// GOMP_FOR.
#pragma omp for _scantemp_(rpriva) _scantemp_(rprivb) _scantemp_(controlb) \
_scantemp_(controlp) reduction(inscan,+:r) private(i) nowait
for (i = 0; i < n; i = i + 1)
{
{
// For UDRs this is UDR init or copy from var3.
r = 0;
// This is the input phase from user code.
update (r);
}
{
// For UDRs this is UDR merge.
var2 = var2 + r;
// Rather than handing it over to the user, save to local thread's
// array.
rprivb[ivar] = var2;
// For exclusive scan, the above two statements are swapped.
ivar = ivar + 1;
}
}
// And remember the final value from this thread's into the shared
// rpriva array.
rpriva[(sizetype) thread_num] = var2;
// If more than one thread, compute using Work-Efficient prefix sum
// the inclusive parallel scan of the rpriva array.
if (num_threads > 1) goto <D.2102>; else goto <D.2103>;
<D.2102>:
GOMP_barrier ();
down = 0;
k = 1;
num_threadsu = (unsigned int) num_threads;
thread_numup1 = (unsigned int) thread_num + 1;
<D.2108>:
twok = k << 1;
if (twok > num_threadsu) goto <D.2110>; else goto <D.2111>;
<D.2110>:
down = 4294967295;
k = k >> 1;
if (k == num_threadsu) goto <D.2112>; else goto <D.2111>;
<D.2112>:
k = k >> 1;
<D.2111>:
twok = k << 1;
cplx = .MUL_OVERFLOW (thread_nump1, twok);
mul = REALPART_EXPR <cplx>;
ovf = IMAGPART_EXPR <cplx>;
if (ovf == 0) goto <D.2116>; else goto <D.2117>;
<D.2116>:
andv = k & down;
andvm1 = andv + 4294967295;
l = mul + andvm1;
if (l < num_threadsu) goto <D.2120>; else goto <D.2117>;
<D.2120>:
// For UDRs this is UDR merge, performed using var2 variable as temporary,
// i.e. var2 = rpriva[l - k]; UDR merge (var2, rpriva[l]); rpriva[l] = var2;
rpriva[l] = rpriva[l - k] + rpriva[l];
<D.2117>:
if (down == 0) goto <D.2121>; else goto <D.2122>;
<D.2121>:
k = k << 1;
goto <D.2123>;
<D.2122>:
k = k >> 1;
<D.2123>:
GOMP_barrier ();
if (k != 0) goto <D.2108>; else goto <D.2103>;
<D.2103>:
if (thread_num == 0) goto <D.2124>; else goto <D.2125>;
<D.2124>:
// For UDRs this is UDR init or copy from var3.
var2 = 0;
goto <D.2126>;
<D.2125>:
var2 = rpriva[thread_num - 1];
<D.2126>:
ivar = 0;
#pragma omp for _scantemp_(controlb) _scantemp_(controlp) \
reduction(inscan,+:r) private(i)
for (i = 0; i < n; i = i + 1)
{
{
// For UDRs, this is UDR merge (rprivb[ivar], var2); r = rprivb[ivar];
r = rprivb[ivar] + var2;
}
{
// This is the scan phase from user code.
use (r);
// Plus a bump of the iterator.
ivar = ivar + 1;
}
} */
static void
lower_omp_for_scan (gimple_seq *body_p, gimple_seq *dlist, gomp_for *stmt,
struct omp_for_data *fd, omp_context *ctx)
{
gcc_assert (ctx->scan_inclusive || ctx->scan_exclusive);
gimple_seq body = gimple_omp_body (stmt);
gimple_stmt_iterator input1_gsi = gsi_none ();
struct walk_stmt_info wi;
memset (&wi, 0, sizeof (wi));
wi.val_only = true;
wi.info = (void *) &input1_gsi;
walk_gimple_seq_mod (&body, omp_find_scan, NULL, &wi);
gcc_assert (!gsi_end_p (input1_gsi));
gimple *input_stmt1 = gsi_stmt (input1_gsi);
gimple_stmt_iterator gsi = input1_gsi;
gsi_next (&gsi);
gimple_stmt_iterator scan1_gsi = gsi;
gimple *scan_stmt1 = gsi_stmt (gsi);
gcc_assert (scan_stmt1 && gimple_code (scan_stmt1) == GIMPLE_OMP_SCAN);
gimple_seq input_body = gimple_omp_body (input_stmt1);
gimple_seq scan_body = gimple_omp_body (scan_stmt1);
gimple_omp_set_body (input_stmt1, NULL);
gimple_omp_set_body (scan_stmt1, NULL);
gimple_omp_set_body (stmt, NULL);
gomp_for *new_stmt = as_a <gomp_for *> (gimple_copy (stmt));
gimple_seq new_body = copy_gimple_seq_and_replace_locals (body);
gimple_omp_set_body (stmt, body);
gimple_omp_set_body (input_stmt1, input_body);
gimple_stmt_iterator input2_gsi = gsi_none ();
memset (&wi, 0, sizeof (wi));
wi.val_only = true;
wi.info = (void *) &input2_gsi;
walk_gimple_seq_mod (&new_body, omp_find_scan, NULL, &wi);
gcc_assert (!gsi_end_p (input2_gsi));
gimple *input_stmt2 = gsi_stmt (input2_gsi);
gsi = input2_gsi;
gsi_next (&gsi);
gimple_stmt_iterator scan2_gsi = gsi;
gimple *scan_stmt2 = gsi_stmt (gsi);
gcc_assert (scan_stmt2 && gimple_code (scan_stmt2) == GIMPLE_OMP_SCAN);
gimple_omp_set_body (scan_stmt2, scan_body);
tree num_threads = create_tmp_var (integer_type_node);
tree thread_num = create_tmp_var (integer_type_node);
tree nthreads_decl = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
tree threadnum_decl = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
gimple *g = gimple_build_call (nthreads_decl, 0);
gimple_call_set_lhs (g, num_threads);
gimple_seq_add_stmt (body_p, g);
g = gimple_build_call (threadnum_decl, 0);
gimple_call_set_lhs (g, thread_num);
gimple_seq_add_stmt (body_p, g);
tree ivar = create_tmp_var (sizetype);
tree new_clauses1 = NULL_TREE, new_clauses2 = NULL_TREE;
tree *cp1 = &new_clauses1, *cp2 = &new_clauses2;
tree k = create_tmp_var (unsigned_type_node);
tree l = create_tmp_var (unsigned_type_node);
gimple_seq clist = NULL, mdlist = NULL;
gimple_seq thr01_list = NULL, thrn1_list = NULL;
gimple_seq thr02_list = NULL, thrn2_list = NULL;
gimple_seq scan1_list = NULL, input2_list = NULL;
gimple_seq last_list = NULL, reduc_list = NULL;
for (tree c = gimple_omp_for_clauses (stmt); c; c = OMP_CLAUSE_CHAIN (c))
if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
&& OMP_CLAUSE_REDUCTION_INSCAN (c))
{
location_t clause_loc = OMP_CLAUSE_LOCATION (c);
tree var = OMP_CLAUSE_DECL (c);
tree new_var = lookup_decl (var, ctx);
tree var3 = NULL_TREE;
tree new_vard = new_var;
if (omp_is_reference (var))
new_var = build_simple_mem_ref_loc (clause_loc, new_var);
if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
{
var3 = maybe_lookup_decl (new_vard, ctx);
if (var3 == new_vard)
var3 = NULL_TREE;
}
tree ptype = build_pointer_type (TREE_TYPE (new_var));
tree rpriva = create_tmp_var (ptype);
tree nc = build_omp_clause (clause_loc, OMP_CLAUSE__SCANTEMP_);
OMP_CLAUSE_DECL (nc) = rpriva;
*cp1 = nc;
cp1 = &OMP_CLAUSE_CHAIN (nc);
tree rprivb = create_tmp_var (ptype);
nc = build_omp_clause (clause_loc, OMP_CLAUSE__SCANTEMP_);
OMP_CLAUSE_DECL (nc) = rprivb;
OMP_CLAUSE__SCANTEMP__ALLOC (nc) = 1;
*cp1 = nc;
cp1 = &OMP_CLAUSE_CHAIN (nc);
tree var2 = create_tmp_var_raw (TREE_TYPE (new_var));
if (new_vard != new_var)
TREE_ADDRESSABLE (var2) = 1;
gimple_add_tmp_var (var2);
tree x = fold_convert_loc (clause_loc, sizetype, thread_num);
x = fold_build2_loc (clause_loc, MULT_EXPR, sizetype, x,
TYPE_SIZE_UNIT (TREE_TYPE (ptype)));
x = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (rpriva), rpriva, x);
tree rpriva_ref = build_simple_mem_ref_loc (clause_loc, x);
x = fold_build2_loc (clause_loc, PLUS_EXPR, integer_type_node,
thread_num, integer_minus_one_node);
x = fold_convert_loc (clause_loc, sizetype, x);
x = fold_build2_loc (clause_loc, MULT_EXPR, sizetype, x,
TYPE_SIZE_UNIT (TREE_TYPE (ptype)));
x = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (rpriva), rpriva, x);
tree rprivam1_ref = build_simple_mem_ref_loc (clause_loc, x);
x = fold_convert_loc (clause_loc, sizetype, l);
x = fold_build2_loc (clause_loc, MULT_EXPR, sizetype, x,
TYPE_SIZE_UNIT (TREE_TYPE (ptype)));
x = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (rpriva), rpriva, x);
tree rprival_ref = build_simple_mem_ref_loc (clause_loc, x);
x = fold_build2_loc (clause_loc, MINUS_EXPR, unsigned_type_node, l, k);
x = fold_convert_loc (clause_loc, sizetype, x);
x = fold_build2_loc (clause_loc, MULT_EXPR, sizetype, x,
TYPE_SIZE_UNIT (TREE_TYPE (ptype)));
x = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (rpriva), rpriva, x);
tree rprivalmk_ref = build_simple_mem_ref_loc (clause_loc, x);
x = fold_build2_loc (clause_loc, MULT_EXPR, sizetype, ivar,
TYPE_SIZE_UNIT (TREE_TYPE (ptype)));
x = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (rprivb), rprivb, x);
tree rprivb_ref = build_simple_mem_ref_loc (clause_loc, x);
if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
{
tree placeholder = OMP_CLAUSE_REDUCTION_PLACEHOLDER (c);
tree val = var2;
if (new_vard != new_var)
val = build_fold_addr_expr_loc (clause_loc, val);
x = lang_hooks.decls.omp_clause_default_ctor
(c, var2, build_outer_var_ref (var, ctx));
if (x)
gimplify_and_add (x, &clist);
x = build_outer_var_ref (var, ctx);
x = lang_hooks.decls.omp_clause_assign_op (c, var2, x);
gimplify_and_add (x, &thr01_list);
tree y = (DECL_HAS_VALUE_EXPR_P (new_vard)
? DECL_VALUE_EXPR (new_vard) : NULL_TREE);
if (var3)
{
x = lang_hooks.decls.omp_clause_assign_op (c, var2, var3);
gimplify_and_add (x, &thrn1_list);
x = lang_hooks.decls.omp_clause_assign_op (c, var2, var3);
gimplify_and_add (x, &thr02_list);
}
else if (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c))
{
/* Otherwise, assign to it the identity element. */
gimple_seq tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c);
tseq = copy_gimple_seq_and_replace_locals (tseq);
SET_DECL_VALUE_EXPR (new_vard, val);
DECL_HAS_VALUE_EXPR_P (new_vard) = 1;
SET_DECL_VALUE_EXPR (placeholder, error_mark_node);
DECL_HAS_VALUE_EXPR_P (placeholder) = 1;
lower_omp (&tseq, ctx);
gimple_seq_add_seq (&thrn1_list, tseq);
tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c);
lower_omp (&tseq, ctx);
gimple_seq_add_seq (&thr02_list, tseq);
SET_DECL_VALUE_EXPR (placeholder, NULL_TREE);
DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL;
if (y)
SET_DECL_VALUE_EXPR (new_vard, y);
else
{
DECL_HAS_VALUE_EXPR_P (new_vard) = 0;
SET_DECL_VALUE_EXPR (new_vard, NULL_TREE);
}
}
x = lang_hooks.decls.omp_clause_assign_op (c, var2, rprivam1_ref);
gimplify_and_add (x, &thrn2_list);
if (ctx->scan_exclusive)
{
x = unshare_expr (rprivb_ref);
x = lang_hooks.decls.omp_clause_assign_op (c, x, var2);
gimplify_and_add (x, &scan1_list);
}
gimple_seq tseq = OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c);
tseq = copy_gimple_seq_and_replace_locals (tseq);
SET_DECL_VALUE_EXPR (placeholder, var2);
DECL_HAS_VALUE_EXPR_P (placeholder) = 1;
lower_omp (&tseq, ctx);
gimple_seq_add_seq (&scan1_list, tseq);
if (ctx->scan_inclusive)
{
x = unshare_expr (rprivb_ref);
x = lang_hooks.decls.omp_clause_assign_op (c, x, var2);
gimplify_and_add (x, &scan1_list);
}
x = unshare_expr (rpriva_ref);
x = lang_hooks.decls.omp_clause_assign_op (c, x, var2);
gimplify_and_add (x, &mdlist);
tseq = OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c);
tseq = copy_gimple_seq_and_replace_locals (tseq);
SET_DECL_VALUE_EXPR (new_vard, val);
DECL_HAS_VALUE_EXPR_P (new_vard) = 1;
SET_DECL_VALUE_EXPR (placeholder, rprivb_ref);
lower_omp (&tseq, ctx);
if (y)
SET_DECL_VALUE_EXPR (new_vard, y);
else
{
DECL_HAS_VALUE_EXPR_P (new_vard) = 0;
SET_DECL_VALUE_EXPR (new_vard, NULL_TREE);
}
gimple_seq_add_seq (&input2_list, tseq);
x = unshare_expr (new_var);
x = lang_hooks.decls.omp_clause_assign_op (c, x, rprivb_ref);
gimplify_and_add (x, &input2_list);
x = build_outer_var_ref (var, ctx);
x = lang_hooks.decls.omp_clause_assign_op (c, x, rpriva_ref);
gimplify_and_add (x, &last_list);
x = lang_hooks.decls.omp_clause_assign_op (c, var2, rprivalmk_ref);
gimplify_and_add (x, &reduc_list);
tseq = OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c);
tseq = copy_gimple_seq_and_replace_locals (tseq);
val = rprival_ref;
if (new_vard != new_var)
val = build_fold_addr_expr_loc (clause_loc, val);
SET_DECL_VALUE_EXPR (new_vard, val);
DECL_HAS_VALUE_EXPR_P (new_vard) = 1;
SET_DECL_VALUE_EXPR (placeholder, var2);
lower_omp (&tseq, ctx);
OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c) = NULL;
SET_DECL_VALUE_EXPR (placeholder, NULL_TREE);
DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
if (y)
SET_DECL_VALUE_EXPR (new_vard, y);
else
{
DECL_HAS_VALUE_EXPR_P (new_vard) = 0;
SET_DECL_VALUE_EXPR (new_vard, NULL_TREE);
}
gimple_seq_add_seq (&reduc_list, tseq);
x = lang_hooks.decls.omp_clause_assign_op (c, rprival_ref, var2);
gimplify_and_add (x, &reduc_list);
x = lang_hooks.decls.omp_clause_dtor (c, var2);
if (x)
gimplify_and_add (x, dlist);
}
else
{
x = build_outer_var_ref (var, ctx);
gimplify_assign (var2, x, &thr01_list);
x = omp_reduction_init (c, TREE_TYPE (new_var));
gimplify_assign (var2, unshare_expr (x), &thrn1_list);
gimplify_assign (var2, x, &thr02_list);
gimplify_assign (var2, rprivam1_ref, &thrn2_list);
enum tree_code code = OMP_CLAUSE_REDUCTION_CODE (c);
if (code == MINUS_EXPR)
code = PLUS_EXPR;
if (ctx->scan_exclusive)
gimplify_assign (unshare_expr (rprivb_ref), var2, &scan1_list);
x = build2 (code, TREE_TYPE (new_var), var2, new_var);
gimplify_assign (var2, x, &scan1_list);
if (ctx->scan_inclusive)
gimplify_assign (unshare_expr (rprivb_ref), var2, &scan1_list);
gimplify_assign (unshare_expr (rpriva_ref), var2, &mdlist);
x = build2 (code, TREE_TYPE (new_var), rprivb_ref, var2);
gimplify_assign (new_var, x, &input2_list);
gimplify_assign (build_outer_var_ref (var, ctx), rpriva_ref,
&last_list);
x = build2 (code, TREE_TYPE (new_var), rprivalmk_ref,
unshare_expr (rprival_ref));
gimplify_assign (rprival_ref, x, &reduc_list);
}
}
g = gimple_build_assign (ivar, PLUS_EXPR, ivar, size_one_node);
gimple_seq_add_stmt (&scan1_list, g);
g = gimple_build_assign (ivar, PLUS_EXPR, ivar, size_one_node);
gimple_seq_add_stmt (gimple_omp_body_ptr (scan_stmt2), g);
tree controlb = create_tmp_var (boolean_type_node);
tree controlp = create_tmp_var (ptr_type_node);
tree nc = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__SCANTEMP_);
OMP_CLAUSE_DECL (nc) = controlb;
OMP_CLAUSE__SCANTEMP__CONTROL (nc) = 1;
*cp1 = nc;
cp1 = &OMP_CLAUSE_CHAIN (nc);
nc = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__SCANTEMP_);
OMP_CLAUSE_DECL (nc) = controlp;
OMP_CLAUSE__SCANTEMP__CONTROL (nc) = 1;
*cp1 = nc;
cp1 = &OMP_CLAUSE_CHAIN (nc);
nc = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__SCANTEMP_);
OMP_CLAUSE_DECL (nc) = controlb;
OMP_CLAUSE__SCANTEMP__CONTROL (nc) = 1;
*cp2 = nc;
cp2 = &OMP_CLAUSE_CHAIN (nc);
nc = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__SCANTEMP_);
OMP_CLAUSE_DECL (nc) = controlp;
OMP_CLAUSE__SCANTEMP__CONTROL (nc) = 1;
*cp2 = nc;
cp2 = &OMP_CLAUSE_CHAIN (nc);
*cp1 = gimple_omp_for_clauses (stmt);
gimple_omp_for_set_clauses (stmt, new_clauses1);
*cp2 = gimple_omp_for_clauses (new_stmt);
gimple_omp_for_set_clauses (new_stmt, new_clauses2);
gimple_omp_set_body (scan_stmt1, scan1_list);
gimple_omp_set_body (input_stmt2, input2_list);
gsi_insert_seq_after (&input1_gsi, gimple_omp_body (input_stmt1),
GSI_SAME_STMT);
gsi_remove (&input1_gsi, true);
gsi_insert_seq_after (&scan1_gsi, gimple_omp_body (scan_stmt1),
GSI_SAME_STMT);
gsi_remove (&scan1_gsi, true);
gsi_insert_seq_after (&input2_gsi, gimple_omp_body (input_stmt2),
GSI_SAME_STMT);
gsi_remove (&input2_gsi, true);
gsi_insert_seq_after (&scan2_gsi, gimple_omp_body (scan_stmt2),
GSI_SAME_STMT);
gsi_remove (&scan2_gsi, true);
gimple_seq_add_seq (body_p, clist);
tree lab1 = create_artificial_label (UNKNOWN_LOCATION);
tree lab2 = create_artificial_label (UNKNOWN_LOCATION);
tree lab3 = create_artificial_label (UNKNOWN_LOCATION);
g = gimple_build_cond (EQ_EXPR, thread_num, integer_zero_node, lab1, lab2);
gimple_seq_add_stmt (body_p, g);
g = gimple_build_label (lab1);
gimple_seq_add_stmt (body_p, g);
gimple_seq_add_seq (body_p, thr01_list);
g = gimple_build_goto (lab3);
gimple_seq_add_stmt (body_p, g);
g = gimple_build_label (lab2);
gimple_seq_add_stmt (body_p, g);
gimple_seq_add_seq (body_p, thrn1_list);
g = gimple_build_label (lab3);
gimple_seq_add_stmt (body_p, g);
g = gimple_build_assign (ivar, size_zero_node);
gimple_seq_add_stmt (body_p, g);
gimple_seq_add_stmt (body_p, stmt);
gimple_seq_add_seq (body_p, body);
gimple_seq_add_stmt (body_p, gimple_build_omp_continue (fd->loop.v,
fd->loop.v));
g = gimple_build_omp_return (true);
gimple_seq_add_stmt (body_p, g);
gimple_seq_add_seq (body_p, mdlist);
lab1 = create_artificial_label (UNKNOWN_LOCATION);
lab2 = create_artificial_label (UNKNOWN_LOCATION);
g = gimple_build_cond (GT_EXPR, num_threads, integer_one_node, lab1, lab2);
gimple_seq_add_stmt (body_p, g);
g = gimple_build_label (lab1);
gimple_seq_add_stmt (body_p, g);
g = omp_build_barrier (NULL);
gimple_seq_add_stmt (body_p, g);
tree down = create_tmp_var (unsigned_type_node);
g = gimple_build_assign (down, build_zero_cst (unsigned_type_node));
gimple_seq_add_stmt (body_p, g);
g = gimple_build_assign (k, build_one_cst (unsigned_type_node));
gimple_seq_add_stmt (body_p, g);
tree num_threadsu = create_tmp_var (unsigned_type_node);
g = gimple_build_assign (num_threadsu, NOP_EXPR, num_threads);
gimple_seq_add_stmt (body_p, g);
tree thread_numu = create_tmp_var (unsigned_type_node);
g = gimple_build_assign (thread_numu, NOP_EXPR, thread_num);
gimple_seq_add_stmt (body_p, g);
tree thread_nump1 = create_tmp_var (unsigned_type_node);
g = gimple_build_assign (thread_nump1, PLUS_EXPR, thread_numu,
build_int_cst (unsigned_type_node, 1));
gimple_seq_add_stmt (body_p, g);
lab3 = create_artificial_label (UNKNOWN_LOCATION);
g = gimple_build_label (lab3);
gimple_seq_add_stmt (body_p, g);
tree twok = create_tmp_var (unsigned_type_node);
g = gimple_build_assign (twok, LSHIFT_EXPR, k, integer_one_node);
gimple_seq_add_stmt (body_p, g);
tree lab4 = create_artificial_label (UNKNOWN_LOCATION);
tree lab5 = create_artificial_label (UNKNOWN_LOCATION);
tree lab6 = create_artificial_label (UNKNOWN_LOCATION);
g = gimple_build_cond (GT_EXPR, twok, num_threadsu, lab4, lab5);
gimple_seq_add_stmt (body_p, g);
g = gimple_build_label (lab4);
gimple_seq_add_stmt (body_p, g);
g = gimple_build_assign (down, build_all_ones_cst (unsigned_type_node));
gimple_seq_add_stmt (body_p, g);
g = gimple_build_assign (k, RSHIFT_EXPR, k, integer_one_node);
gimple_seq_add_stmt (body_p, g);
g = gimple_build_cond (EQ_EXPR, k, num_threadsu, lab6, lab5);
gimple_seq_add_stmt (body_p, g);
g = gimple_build_label (lab6);
gimple_seq_add_stmt (body_p, g);
g = gimple_build_assign (k, RSHIFT_EXPR, k, integer_one_node);
gimple_seq_add_stmt (body_p, g);
g = gimple_build_label (lab5);
gimple_seq_add_stmt (body_p, g);
g = gimple_build_assign (twok, LSHIFT_EXPR, k, integer_one_node);
gimple_seq_add_stmt (body_p, g);
tree cplx = create_tmp_var (build_complex_type (unsigned_type_node, false));
g = gimple_build_call_internal (IFN_MUL_OVERFLOW, 2, thread_nump1, twok);
gimple_call_set_lhs (g, cplx);
gimple_seq_add_stmt (body_p, g);
tree mul = create_tmp_var (unsigned_type_node);
g = gimple_build_assign (mul, REALPART_EXPR,
build1 (REALPART_EXPR, unsigned_type_node, cplx));
gimple_seq_add_stmt (body_p, g);
tree ovf = create_tmp_var (unsigned_type_node);
g = gimple_build_assign (ovf, IMAGPART_EXPR,
build1 (IMAGPART_EXPR, unsigned_type_node, cplx));
gimple_seq_add_stmt (body_p, g);
tree lab7 = create_artificial_label (UNKNOWN_LOCATION);
tree lab8 = create_artificial_label (UNKNOWN_LOCATION);
g = gimple_build_cond (EQ_EXPR, ovf, build_zero_cst (unsigned_type_node),
lab7, lab8);
gimple_seq_add_stmt (body_p, g);
g = gimple_build_label (lab7);
gimple_seq_add_stmt (body_p, g);
tree andv = create_tmp_var (unsigned_type_node);
g = gimple_build_assign (andv, BIT_AND_EXPR, k, down);
gimple_seq_add_stmt (body_p, g);
tree andvm1 = create_tmp_var (unsigned_type_node);
g = gimple_build_assign (andvm1, PLUS_EXPR, andv,
build_minus_one_cst (unsigned_type_node));
gimple_seq_add_stmt (body_p, g);
g = gimple_build_assign (l, PLUS_EXPR, mul, andvm1);
gimple_seq_add_stmt (body_p, g);
tree lab9 = create_artificial_label (UNKNOWN_LOCATION);
g = gimple_build_cond (LT_EXPR, l, num_threadsu, lab9, lab8);
gimple_seq_add_stmt (body_p, g);
g = gimple_build_label (lab9);
gimple_seq_add_stmt (body_p, g);
gimple_seq_add_seq (body_p, reduc_list);
g = gimple_build_label (lab8);
gimple_seq_add_stmt (body_p, g);
tree lab10 = create_artificial_label (UNKNOWN_LOCATION);
tree lab11 = create_artificial_label (UNKNOWN_LOCATION);
tree lab12 = create_artificial_label (UNKNOWN_LOCATION);
g = gimple_build_cond (EQ_EXPR, down, build_zero_cst (unsigned_type_node),
lab10, lab11);
gimple_seq_add_stmt (body_p, g);
g = gimple_build_label (lab10);
gimple_seq_add_stmt (body_p, g);
g = gimple_build_assign (k, LSHIFT_EXPR, k, integer_one_node);
gimple_seq_add_stmt (body_p, g);
g = gimple_build_goto (lab12);
gimple_seq_add_stmt (body_p, g);
g = gimple_build_label (lab11);
gimple_seq_add_stmt (body_p, g);
g = gimple_build_assign (k, RSHIFT_EXPR, k, integer_one_node);
gimple_seq_add_stmt (body_p, g);
g = gimple_build_label (lab12);
gimple_seq_add_stmt (body_p, g);
g = omp_build_barrier (NULL);
gimple_seq_add_stmt (body_p, g);
g = gimple_build_cond (NE_EXPR, k, build_zero_cst (unsigned_type_node),
lab3, lab2);
gimple_seq_add_stmt (body_p, g);
g = gimple_build_label (lab2);
gimple_seq_add_stmt (body_p, g);
lab1 = create_artificial_label (UNKNOWN_LOCATION);
lab2 = create_artificial_label (UNKNOWN_LOCATION);
lab3 = create_artificial_label (UNKNOWN_LOCATION);
g = gimple_build_cond (EQ_EXPR, thread_num, integer_zero_node, lab1, lab2);
gimple_seq_add_stmt (body_p, g);
g = gimple_build_label (lab1);
gimple_seq_add_stmt (body_p, g);
gimple_seq_add_seq (body_p, thr02_list);
g = gimple_build_goto (lab3);
gimple_seq_add_stmt (body_p, g);
g = gimple_build_label (lab2);
gimple_seq_add_stmt (body_p, g);
gimple_seq_add_seq (body_p, thrn2_list);
g = gimple_build_label (lab3);
gimple_seq_add_stmt (body_p, g);
g = gimple_build_assign (ivar, size_zero_node);
gimple_seq_add_stmt (body_p, g);
gimple_seq_add_stmt (body_p, new_stmt);
gimple_seq_add_seq (body_p, new_body);
gimple_seq new_dlist = NULL;
lab1 = create_artificial_label (UNKNOWN_LOCATION);
lab2 = create_artificial_label (UNKNOWN_LOCATION);
tree num_threadsm1 = create_tmp_var (integer_type_node);
g = gimple_build_assign (num_threadsm1, PLUS_EXPR, num_threads,
integer_minus_one_node);
gimple_seq_add_stmt (&new_dlist, g);
g = gimple_build_cond (EQ_EXPR, thread_num, num_threadsm1, lab1, lab2);
gimple_seq_add_stmt (&new_dlist, g);
g = gimple_build_label (lab1);
gimple_seq_add_stmt (&new_dlist, g);
gimple_seq_add_seq (&new_dlist, last_list);
g = gimple_build_label (lab2);
gimple_seq_add_stmt (&new_dlist, g);
gimple_seq_add_seq (&new_dlist, *dlist);
*dlist = new_dlist;
}
/* Lower code for an OMP loop directive. */ /* Lower code for an OMP loop directive. */
...@@ -9317,9 +9999,18 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx) ...@@ -9317,9 +9999,18 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx)
bool phony_loop = (gimple_omp_for_kind (stmt) != GF_OMP_FOR_KIND_GRID_LOOP bool phony_loop = (gimple_omp_for_kind (stmt) != GF_OMP_FOR_KIND_GRID_LOOP
&& gimple_omp_for_grid_phony (stmt)); && gimple_omp_for_grid_phony (stmt));
if ((ctx->scan_inclusive || ctx->scan_exclusive)
&& gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_FOR)
{
gcc_assert (!phony_loop);
lower_omp_for_scan (&body, &dlist, stmt, &fd, ctx);
}
else
{
if (!phony_loop) if (!phony_loop)
gimple_seq_add_stmt (&body, stmt); gimple_seq_add_stmt (&body, stmt);
gimple_seq_add_seq (&body, gimple_omp_body (stmt)); gimple_seq_add_seq (&body, gimple_omp_body (stmt));
}
if (!phony_loop) if (!phony_loop)
gimple_seq_add_stmt (&body, gimple_build_omp_continue (fd.loop.v, gimple_seq_add_stmt (&body, gimple_build_omp_continue (fd.loop.v,
......
2019-07-03 Jakub Jelinek <jakub@redhat.com> 2019-07-03 Jakub Jelinek <jakub@redhat.com>
* c-c++-common/gomp/scan-3.c (f1): Don't expect a sorry message.
* c-c++-common/gomp/scan-5.c (foo): Likewise.
* c-c++-common/gomp/scan-5.c: New test. * c-c++-common/gomp/scan-5.c: New test.
* c-c++-common/gomp/lastprivate-conditional-5.c: New test. * c-c++-common/gomp/lastprivate-conditional-5.c: New test.
......
...@@ -8,7 +8,7 @@ f1 (int *c, int *d) ...@@ -8,7 +8,7 @@ f1 (int *c, int *d)
for (i = 0; i < 64; i++) for (i = 0; i < 64; i++)
{ {
d[i] = a; d[i] = a;
#pragma omp scan inclusive (a) /* { dg-message "sorry, unimplemented: '#pragma omp scan' not supported yet" } */ #pragma omp scan inclusive (a)
a += c[i]; a += c[i];
} }
} }
...@@ -6,7 +6,7 @@ foo (int *a, int *b) ...@@ -6,7 +6,7 @@ foo (int *a, int *b)
for (int i = 0; i < 64; i++) for (int i = 0; i < 64; i++)
{ {
r += a[i]; r += a[i];
#pragma omp scan inclusive (r) /* { dg-message "sorry, unimplemented: '#pragma omp scan' not supported yet" } */ #pragma omp scan inclusive (r)
b[i] = r; b[i] = r;
} }
return r; return r;
......
...@@ -352,6 +352,9 @@ enum omp_clause_code { ...@@ -352,6 +352,9 @@ enum omp_clause_code {
/* Internal clause: temporary for lastprivate(conditional:). */ /* Internal clause: temporary for lastprivate(conditional:). */
OMP_CLAUSE__CONDTEMP_, OMP_CLAUSE__CONDTEMP_,
/* Internal clause: temporary for inscan reductions. */
OMP_CLAUSE__SCANTEMP_,
/* OpenACC/OpenMP clause: if (scalar-expression). */ /* OpenACC/OpenMP clause: if (scalar-expression). */
OMP_CLAUSE_IF, OMP_CLAUSE_IF,
......
...@@ -1349,6 +1349,7 @@ convert_nonlocal_omp_clauses (tree *pclauses, struct walk_stmt_info *wi) ...@@ -1349,6 +1349,7 @@ convert_nonlocal_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
case OMP_CLAUSE_IF_PRESENT: case OMP_CLAUSE_IF_PRESENT:
case OMP_CLAUSE_FINALIZE: case OMP_CLAUSE_FINALIZE:
case OMP_CLAUSE__CONDTEMP_: case OMP_CLAUSE__CONDTEMP_:
case OMP_CLAUSE__SCANTEMP_:
break; break;
/* The following clause belongs to the OpenACC cache directive, which /* The following clause belongs to the OpenACC cache directive, which
...@@ -2078,6 +2079,7 @@ convert_local_omp_clauses (tree *pclauses, struct walk_stmt_info *wi) ...@@ -2078,6 +2079,7 @@ convert_local_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
case OMP_CLAUSE_IF_PRESENT: case OMP_CLAUSE_IF_PRESENT:
case OMP_CLAUSE_FINALIZE: case OMP_CLAUSE_FINALIZE:
case OMP_CLAUSE__CONDTEMP_: case OMP_CLAUSE__CONDTEMP_:
case OMP_CLAUSE__SCANTEMP_:
break; break;
/* The following clause belongs to the OpenACC cache directive, which /* The following clause belongs to the OpenACC cache directive, which
......
...@@ -483,6 +483,9 @@ dump_omp_clause (pretty_printer *pp, tree clause, int spc, dump_flags_t flags) ...@@ -483,6 +483,9 @@ dump_omp_clause (pretty_printer *pp, tree clause, int spc, dump_flags_t flags)
case OMP_CLAUSE__CONDTEMP_: case OMP_CLAUSE__CONDTEMP_:
name = "_condtemp_"; name = "_condtemp_";
goto print_remap; goto print_remap;
case OMP_CLAUSE__SCANTEMP_:
name = "_scantemp_";
goto print_remap;
case OMP_CLAUSE_TO_DECLARE: case OMP_CLAUSE_TO_DECLARE:
name = "to"; name = "to";
goto print_remap; goto print_remap;
......
...@@ -311,6 +311,7 @@ unsigned const char omp_clause_num_ops[] = ...@@ -311,6 +311,7 @@ unsigned const char omp_clause_num_ops[] =
1, /* OMP_CLAUSE__LOOPTEMP_ */ 1, /* OMP_CLAUSE__LOOPTEMP_ */
1, /* OMP_CLAUSE__REDUCTEMP_ */ 1, /* OMP_CLAUSE__REDUCTEMP_ */
1, /* OMP_CLAUSE__CONDTEMP_ */ 1, /* OMP_CLAUSE__CONDTEMP_ */
1, /* OMP_CLAUSE__SCANTEMP_ */
1, /* OMP_CLAUSE_IF */ 1, /* OMP_CLAUSE_IF */
1, /* OMP_CLAUSE_NUM_THREADS */ 1, /* OMP_CLAUSE_NUM_THREADS */
1, /* OMP_CLAUSE_SCHEDULE */ 1, /* OMP_CLAUSE_SCHEDULE */
...@@ -391,6 +392,7 @@ const char * const omp_clause_code_name[] = ...@@ -391,6 +392,7 @@ const char * const omp_clause_code_name[] =
"_looptemp_", "_looptemp_",
"_reductemp_", "_reductemp_",
"_condtemp_", "_condtemp_",
"_scantemp_",
"if", "if",
"num_threads", "num_threads",
"schedule", "schedule",
...@@ -12316,6 +12318,7 @@ walk_tree_1 (tree *tp, walk_tree_fn func, void *data, ...@@ -12316,6 +12318,7 @@ walk_tree_1 (tree *tp, walk_tree_fn func, void *data,
case OMP_CLAUSE__LOOPTEMP_: case OMP_CLAUSE__LOOPTEMP_:
case OMP_CLAUSE__REDUCTEMP_: case OMP_CLAUSE__REDUCTEMP_:
case OMP_CLAUSE__CONDTEMP_: case OMP_CLAUSE__CONDTEMP_:
case OMP_CLAUSE__SCANTEMP_:
case OMP_CLAUSE__SIMDUID_: case OMP_CLAUSE__SIMDUID_:
WALK_SUBTREE (OMP_CLAUSE_OPERAND (*tp, 0)); WALK_SUBTREE (OMP_CLAUSE_OPERAND (*tp, 0));
/* FALLTHRU */ /* FALLTHRU */
......
...@@ -1449,7 +1449,7 @@ class auto_suppress_location_wrappers ...@@ -1449,7 +1449,7 @@ class auto_suppress_location_wrappers
#define OMP_CLAUSE_DECL(NODE) \ #define OMP_CLAUSE_DECL(NODE) \
OMP_CLAUSE_OPERAND (OMP_CLAUSE_RANGE_CHECK (OMP_CLAUSE_CHECK (NODE), \ OMP_CLAUSE_OPERAND (OMP_CLAUSE_RANGE_CHECK (OMP_CLAUSE_CHECK (NODE), \
OMP_CLAUSE_PRIVATE, \ OMP_CLAUSE_PRIVATE, \
OMP_CLAUSE__CONDTEMP_), 0) OMP_CLAUSE__SCANTEMP_), 0)
#define OMP_CLAUSE_HAS_LOCATION(NODE) \ #define OMP_CLAUSE_HAS_LOCATION(NODE) \
(LOCATION_LOCUS ((OMP_CLAUSE_CHECK (NODE))->omp_clause.locus) \ (LOCATION_LOCUS ((OMP_CLAUSE_CHECK (NODE))->omp_clause.locus) \
!= UNKNOWN_LOCATION) != UNKNOWN_LOCATION)
...@@ -1761,6 +1761,17 @@ class auto_suppress_location_wrappers ...@@ -1761,6 +1761,17 @@ class auto_suppress_location_wrappers
#define OMP_CLAUSE__CONDTEMP__ITER(NODE) \ #define OMP_CLAUSE__CONDTEMP__ITER(NODE) \
(OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE__CONDTEMP_)->base.public_flag) (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE__CONDTEMP_)->base.public_flag)
/* _SCANTEMP_ holding temporary with pointer to thread's local array;
allocation. */
#define OMP_CLAUSE__SCANTEMP__ALLOC(NODE) \
(OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE__SCANTEMP_)->base.public_flag)
/* _SCANTEMP_ holding temporary with a control variable for deallocation;
one boolean_type_node for test whether alloca was used, another one
to pass to __builtin_stack_restore or free. */
#define OMP_CLAUSE__SCANTEMP__CONTROL(NODE) \
TREE_PRIVATE (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE__SCANTEMP_))
/* SSA_NAME accessors. */ /* SSA_NAME accessors. */
/* Whether SSA_NAME NODE is a virtual operand. This simply caches the /* Whether SSA_NAME NODE is a virtual operand. This simply caches the
......
2019-07-03 Jakub Jelinek <jakub@redhat.com>
* testsuite/libgomp.c++/scan-1.C: New test.
* testsuite/libgomp.c++/scan-2.C: New test.
* testsuite/libgomp.c++/scan-3.C: New test.
* testsuite/libgomp.c++/scan-4.C: New test.
* testsuite/libgomp.c++/scan-5.C: New test.
* testsuite/libgomp.c++/scan-6.C: New test.
* testsuite/libgomp.c++/scan-7.C: New test.
* testsuite/libgomp.c++/scan-8.C: New test.
* testsuite/libgomp.c/scan-1.c: New test.
* testsuite/libgomp.c/scan-2.c: New test.
* testsuite/libgomp.c/scan-3.c: New test.
* testsuite/libgomp.c/scan-4.c: New test.
* testsuite/libgomp.c/scan-5.c: New test.
* testsuite/libgomp.c/scan-6.c: New test.
* testsuite/libgomp.c/scan-7.c: New test.
* testsuite/libgomp.c/scan-8.c: New test.
2019-06-18 Thomas Schwinge <thomas@codesourcery.com> 2019-06-18 Thomas Schwinge <thomas@codesourcery.com>
* testsuite/libgomp.oacc-c++/firstprivate-mappings-1.C: New file. * testsuite/libgomp.oacc-c++/firstprivate-mappings-1.C: New file.
......
// { dg-require-effective-target size32plus }
extern "C" void abort ();
struct S {
inline S ();
inline ~S ();
inline S (const S &);
inline S & operator= (const S &);
int s;
};
S::S () : s (0)
{
}
S::~S ()
{
}
S::S (const S &x)
{
s = x.s;
}
S &
S::operator= (const S &x)
{
s = x.s;
return *this;
}
static inline void
ini (S &x)
{
x.s = 0;
}
S r, a[1024], b[1024];
#pragma omp declare reduction (+: S: omp_out.s += omp_in.s)
#pragma omp declare reduction (plus: S: omp_out.s += omp_in.s) initializer (ini (omp_priv))
__attribute__((noipa)) void
foo (S *a, S *b)
{
#pragma omp for reduction (inscan, +:r)
for (int i = 0; i < 1024; i++)
{
r.s += a[i].s;
#pragma omp scan inclusive(r)
b[i] = r;
}
}
__attribute__((noipa)) S
bar (void)
{
S s;
#pragma omp parallel
#pragma omp for reduction (inscan, plus:s)
for (int i = 0; i < 1024; i++)
{
s.s += 2 * a[i].s;
#pragma omp scan inclusive(s)
b[i] = s;
}
return S (s);
}
__attribute__((noipa)) void
baz (S *a, S *b)
{
#pragma omp parallel for reduction (inscan, +:r)
for (int i = 0; i < 1024; i++)
{
r.s += a[i].s;
#pragma omp scan inclusive(r)
b[i] = r;
}
}
__attribute__((noipa)) S
qux (void)
{
S s;
#pragma omp parallel for reduction (inscan, plus:s)
for (int i = 0; i < 1024; i++)
{
s.s += 2 * a[i].s;
#pragma omp scan inclusive(s)
b[i] = s;
}
return S (s);
}
int
main ()
{
S s;
for (int i = 0; i < 1024; ++i)
{
a[i].s = i;
b[i].s = -1;
asm ("" : "+g" (i));
}
#pragma omp parallel
foo (a, b);
if (r.s != 1024 * 1023 / 2)
abort ();
for (int i = 0; i < 1024; ++i)
{
s.s += i;
if (b[i].s != s.s)
abort ();
else
b[i].s = 25;
}
if (bar ().s != 1024 * 1023)
abort ();
s.s = 0;
for (int i = 0; i < 1024; ++i)
{
s.s += 2 * i;
if (b[i].s != s.s)
abort ();
}
r.s = 0;
baz (a, b);
if (r.s != 1024 * 1023 / 2)
abort ();
s.s = 0;
for (int i = 0; i < 1024; ++i)
{
s.s += i;
if (b[i].s != s.s)
abort ();
else
b[i].s = 25;
}
if (qux ().s != 1024 * 1023)
abort ();
s.s = 0;
for (int i = 0; i < 1024; ++i)
{
s.s += 2 * i;
if (b[i].s != s.s)
abort ();
}
return 0;
}
// { dg-require-effective-target size32plus }
extern "C" void abort ();
int r, a[1024], b[1024], q;
__attribute__((noipa)) void
foo (int *a, int *b, int &r)
{
#pragma omp for reduction (inscan, +:r) nowait
for (int i = 0; i < 1024; i++)
{
r += a[i];
#pragma omp scan inclusive(r)
b[i] = r;
}
}
__attribute__((noipa)) int
bar (void)
{
int &s = q;
q = 0;
#pragma omp parallel
#pragma omp for reduction (inscan, +:s) nowait
for (int i = 0; i < 1024; i++)
{
s += 2 * a[i];
#pragma omp scan inclusive(s)
b[i] = s;
}
return s;
}
__attribute__((noipa)) void
baz (int *a, int *b, int &r)
{
#pragma omp parallel for reduction (inscan, +:r)
for (int i = 0; i < 1024; i++)
{
r += a[i];
#pragma omp scan inclusive(r)
b[i] = r;
}
}
__attribute__((noipa)) int
qux (void)
{
int &s = q;
q = 0;
#pragma omp parallel for reduction (inscan, +:s)
for (int i = 0; i < 1024; i++)
{
s += 2 * a[i];
#pragma omp scan inclusive(s)
b[i] = s;
}
return s;
}
int
main ()
{
int s = 0;
for (int i = 0; i < 1024; ++i)
{
a[i] = i;
b[i] = -1;
asm ("" : "+g" (i));
}
#pragma omp parallel
foo (a, b, r);
if (r != 1024 * 1023 / 2)
abort ();
for (int i = 0; i < 1024; ++i)
{
s += i;
if (b[i] != s)
abort ();
else
b[i] = 25;
}
if (bar () != 1024 * 1023)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
s += 2 * i;
if (b[i] != s)
abort ();
else
b[i] = -1;
}
r = 0;
baz (a, b, r);
if (r != 1024 * 1023 / 2)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
s += i;
if (b[i] != s)
abort ();
else
b[i] = -25;
}
if (qux () != 1024 * 1023)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
s += 2 * i;
if (b[i] != s)
abort ();
}
}
// { dg-require-effective-target size32plus }
extern "C" void abort ();
int r, a[1024], b[1024], q;
#pragma omp declare reduction (foo: int: omp_out += omp_in) initializer (omp_priv = 0)
__attribute__((noipa)) void
foo (int *a, int *b, int &r)
{
#pragma omp for reduction (inscan, foo:r)
for (int i = 0; i < 1024; i++)
{
r += a[i];
#pragma omp scan inclusive(r)
b[i] = r;
}
}
__attribute__((noipa)) int
bar (void)
{
int &s = q;
q = 0;
#pragma omp parallel
#pragma omp for reduction (inscan, foo:s)
for (int i = 0; i < 1024; i++)
{
s += 2 * a[i];
#pragma omp scan inclusive(s)
b[i] = s;
}
return s;
}
__attribute__((noipa)) void
baz (int *a, int *b, int &r)
{
#pragma omp parallel for reduction (inscan, foo:r)
for (int i = 0; i < 1024; i++)
{
r += a[i];
#pragma omp scan inclusive(r)
b[i] = r;
}
}
__attribute__((noipa)) int
qux (void)
{
int &s = q;
q = 0;
#pragma omp parallel for reduction (inscan, foo:s)
for (int i = 0; i < 1024; i++)
{
s += 2 * a[i];
#pragma omp scan inclusive(s)
b[i] = s;
}
return s;
}
int
main ()
{
int s = 0;
for (int i = 0; i < 1024; ++i)
{
a[i] = i;
b[i] = -1;
asm ("" : "+g" (i));
}
#pragma omp parallel
foo (a, b, r);
if (r != 1024 * 1023 / 2)
abort ();
for (int i = 0; i < 1024; ++i)
{
s += i;
if (b[i] != s)
abort ();
else
b[i] = 25;
}
if (bar () != 1024 * 1023)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
s += 2 * i;
if (b[i] != s)
abort ();
else
b[i] = -1;
}
r = 0;
baz (a, b, r);
if (r != 1024 * 1023 / 2)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
s += i;
if (b[i] != s)
abort ();
else
b[i] = -25;
}
if (qux () != 1024 * 1023)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
s += 2 * i;
if (b[i] != s)
abort ();
}
return 0;
}
// { dg-require-effective-target size32plus }
extern "C" void abort ();
struct S {
inline S ();
inline ~S ();
inline S (const S &);
inline S & operator= (const S &);
int s;
};
S::S () : s (0)
{
}
S::~S ()
{
}
S::S (const S &x)
{
s = x.s;
}
S &
S::operator= (const S &x)
{
s = x.s;
return *this;
}
static inline void
ini (S &x)
{
x.s = 0;
}
S r, a[1024], b[1024];
#pragma omp declare reduction (+: S: omp_out.s += omp_in.s)
#pragma omp declare reduction (plus: S: omp_out.s += omp_in.s) initializer (ini (omp_priv))
__attribute__((noipa)) void
foo (S *a, S *b, S &r)
{
#pragma omp for reduction (inscan, +:r)
for (int i = 0; i < 1024; i++)
{
r.s += a[i].s;
#pragma omp scan inclusive(r)
b[i] = r;
}
}
__attribute__((noipa)) S
bar ()
{
S s;
#pragma omp parallel
#pragma omp for reduction (inscan, plus:s)
for (int i = 0; i < 1024; i++)
{
s.s += 2 * a[i].s;
#pragma omp scan inclusive(s)
b[i] = s;
}
return s;
}
__attribute__((noipa)) void
baz (S *a, S *b, S &r)
{
#pragma omp parallel for reduction (inscan, +:r)
for (int i = 0; i < 1024; i++)
{
r.s += a[i].s;
#pragma omp scan inclusive(r)
b[i] = r;
}
}
__attribute__((noipa)) S
qux ()
{
S s;
#pragma omp parallel for reduction (inscan, plus:s)
for (int i = 0; i < 1024; i++)
{
s.s += 2 * a[i].s;
#pragma omp scan inclusive(s)
b[i] = s;
}
return s;
}
int
main ()
{
S s;
for (int i = 0; i < 1024; ++i)
{
a[i].s = i;
b[i].s = -1;
asm ("" : "+g" (i));
}
#pragma omp parallel
foo (a, b, r);
if (r.s != 1024 * 1023 / 2)
abort ();
for (int i = 0; i < 1024; ++i)
{
s.s += i;
if (b[i].s != s.s)
abort ();
else
b[i].s = 25;
}
if (bar ().s != 1024 * 1023)
abort ();
s.s = 0;
for (int i = 0; i < 1024; ++i)
{
s.s += 2 * i;
if (b[i].s != s.s)
abort ();
}
r.s = 0;
baz (a, b, r);
if (r.s != 1024 * 1023 / 2)
abort ();
s.s = 0;
for (int i = 0; i < 1024; ++i)
{
s.s += i;
if (b[i].s != s.s)
abort ();
else
b[i].s = 25;
}
if (qux ().s != 1024 * 1023)
abort ();
s.s = 0;
for (int i = 0; i < 1024; ++i)
{
s.s += 2 * i;
if (b[i].s != s.s)
abort ();
}
}
// { dg-require-effective-target size32plus }
extern "C" void abort ();
template <typename T>
struct S {
inline S ();
inline ~S ();
inline S (const S &);
inline S & operator= (const S &);
T s;
};
template <typename T>
S<T>::S () : s (0)
{
}
template <typename T>
S<T>::~S ()
{
}
template <typename T>
S<T>::S (const S &x)
{
s = x.s;
}
template <typename T>
S<T> &
S<T>::operator= (const S &x)
{
s = x.s;
return *this;
}
template <typename T>
static inline void
ini (S<T> &x)
{
x.s = 0;
}
S<int> r, a[1024], b[1024];
#pragma omp declare reduction (+: S<int>: omp_out.s += omp_in.s)
#pragma omp declare reduction (plus: S<int>: omp_out.s += omp_in.s) initializer (ini (omp_priv))
template <typename T>
__attribute__((noipa)) void
foo (S<T> *a, S<T> *b)
{
#pragma omp for reduction (inscan, +:r)
for (int i = 0; i < 1024; i++)
{
b[i] = r;
#pragma omp scan exclusive(r)
r.s += a[i].s;
}
}
template <typename T>
__attribute__((noipa)) S<T>
bar (void)
{
S<T> s;
#pragma omp parallel
#pragma omp for reduction (inscan, plus:s)
for (int i = 0; i < 1024; i++)
{
b[i] = s;
#pragma omp scan exclusive(s)
s.s += 2 * a[i].s;
}
return S<T> (s);
}
__attribute__((noipa)) void
baz (S<int> *a, S<int> *b)
{
#pragma omp parallel for reduction (inscan, +:r)
for (int i = 0; i < 1024; i++)
{
b[i] = r;
#pragma omp scan exclusive(r)
r.s += a[i].s;
}
}
__attribute__((noipa)) S<int>
qux (void)
{
S<int> s;
#pragma omp parallel for reduction (inscan, plus:s)
for (int i = 0; i < 1024; i++)
{
b[i] = s;
#pragma omp scan exclusive(s)
s.s += 2 * a[i].s;
}
return S<int> (s);
}
int
main ()
{
S<int> s;
for (int i = 0; i < 1024; ++i)
{
a[i].s = i;
b[i].s = -1;
asm ("" : "+g" (i));
}
#pragma omp parallel
foo (a, b);
if (r.s != 1024 * 1023 / 2)
abort ();
for (int i = 0; i < 1024; ++i)
{
if (b[i].s != s.s)
abort ();
else
b[i].s = 25;
s.s += i;
}
if (bar<int> ().s != 1024 * 1023)
abort ();
s.s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i].s != s.s)
abort ();
s.s += 2 * i;
}
r.s = 0;
baz (a, b);
if (r.s != 1024 * 1023 / 2)
abort ();
s.s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i].s != s.s)
abort ();
else
b[i].s = 25;
s.s += i;
}
if (qux ().s != 1024 * 1023)
abort ();
s.s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i].s != s.s)
abort ();
s.s += 2 * i;
}
}
// { dg-require-effective-target size32plus }
extern "C" void abort ();
int r, a[1024], b[1024], q;
template <typename T, typename U>
__attribute__((noipa)) void
foo (T a, T b, U r)
{
#pragma omp for reduction (inscan, +:r)
for (int i = 0; i < 1024; i++)
{
b[i] = r;
#pragma omp scan exclusive(r)
r += a[i];
}
}
template <typename T>
__attribute__((noipa)) T
bar ()
{
T &s = q;
q = 0;
#pragma omp parallel
#pragma omp for reduction (inscan, +:s)
for (int i = 0; i < 1024; i++)
{
b[i] = s;
#pragma omp scan exclusive(s)
s += 2 * a[i];
}
return s;
}
template <typename T>
__attribute__((noipa)) void
baz (T *a, T *b, T &r)
{
#pragma omp parallel for reduction (inscan, +:r)
for (T i = 0; i < 1024; i++)
{
b[i] = r;
#pragma omp scan exclusive(r)
r += a[i];
}
}
template <typename T>
__attribute__((noipa)) int
qux ()
{
T s = q;
q = 0;
#pragma omp parallel for reduction (inscan, +:s)
for (int i = 0; i < 1024; i++)
{
b[i] = s;
#pragma omp scan exclusive(s)
s += 2 * a[i];
}
return s;
}
int
main ()
{
int s = 0;
for (int i = 0; i < 1024; ++i)
{
a[i] = i;
b[i] = -1;
asm ("" : "+g" (i));
}
#pragma omp parallel
foo<int *, int &> (a, b, r);
if (r != 1024 * 1023 / 2)
abort ();
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
else
b[i] = 25;
s += i;
}
if (bar<int> () != 1024 * 1023)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
else
b[i] = -1;
s += 2 * i;
}
r = 0;
baz<int> (a, b, r);
if (r != 1024 * 1023 / 2)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
else
b[i] = -25;
s += i;
}
if (qux<int &> () != 1024 * 1023)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
s += 2 * i;
}
}
// { dg-require-effective-target size32plus }
extern "C" void abort ();
int r, a[1024], b[1024], q;
#pragma omp declare reduction (foo: int: omp_out += omp_in) initializer (omp_priv = 0)
__attribute__((noipa)) void
foo (int *a, int *b, int &r)
{
#pragma omp for reduction (inscan, foo:r)
for (int i = 0; i < 1024; i++)
{
b[i] = r;
#pragma omp scan exclusive(r)
r += a[i];
}
}
__attribute__((noipa)) int
bar (void)
{
int &s = q;
q = 0;
#pragma omp parallel
#pragma omp for reduction (inscan, foo:s) nowait
for (int i = 0; i < 1024; i++)
{
b[i] = s;
#pragma omp scan exclusive(s)
s += 2 * a[i];
}
return s;
}
__attribute__((noipa)) void
baz (int *a, int *b, int &r)
{
#pragma omp parallel for reduction (inscan, foo:r)
for (int i = 0; i < 1024; i++)
{
b[i] = r;
#pragma omp scan exclusive(r)
r += a[i];
}
}
__attribute__((noipa)) int
qux (void)
{
int &s = q;
q = 0;
#pragma omp parallel for reduction (inscan, foo:s)
for (int i = 0; i < 1024; i++)
{
b[i] = s;
#pragma omp scan exclusive(s)
s += 2 * a[i];
}
return s;
}
int
main ()
{
int s = 0;
for (int i = 0; i < 1024; ++i)
{
a[i] = i;
b[i] = -1;
asm ("" : "+g" (i));
}
#pragma omp parallel
foo (a, b, r);
if (r != 1024 * 1023 / 2)
abort ();
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
else
b[i] = 25;
s += i;
}
if (bar () != 1024 * 1023)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
else
b[i] = -1;
s += 2 * i;
}
r = 0;
baz (a, b, r);
if (r != 1024 * 1023 / 2)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
else
b[i] = -25;
s += i;
}
if (qux () != 1024 * 1023)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
s += 2 * i;
}
}
// { dg-require-effective-target size32plus }
extern "C" void abort ();
struct S {
inline S ();
inline ~S ();
inline S (const S &);
inline S & operator= (const S &);
int s;
};
S::S () : s (0)
{
}
S::~S ()
{
}
S::S (const S &x)
{
s = x.s;
}
S &
S::operator= (const S &x)
{
s = x.s;
return *this;
}
static inline void
ini (S &x)
{
x.s = 0;
}
S r, a[1024], b[1024];
#pragma omp declare reduction (+: S: omp_out.s += omp_in.s)
#pragma omp declare reduction (plus: S: omp_out.s += omp_in.s) initializer (ini (omp_priv))
__attribute__((noipa)) void
foo (S *a, S *b, S &r)
{
#pragma omp for reduction (inscan, +:r)
for (int i = 0; i < 1024; i++)
{
b[i] = r;
#pragma omp scan exclusive(r)
r.s += a[i].s;
}
}
__attribute__((noipa)) S
bar (void)
{
S s;
#pragma omp parallel
#pragma omp for reduction (inscan, plus:s)
for (int i = 0; i < 1024; i++)
{
b[i] = s;
#pragma omp scan exclusive(s)
s.s += 2 * a[i].s;
}
return s;
}
__attribute__((noipa)) void
baz (S *a, S *b, S &r)
{
#pragma omp parallel for reduction (inscan, +:r)
for (int i = 0; i < 1024; i++)
{
b[i] = r;
#pragma omp scan exclusive(r)
r.s += a[i].s;
}
}
__attribute__((noipa)) S
qux (void)
{
S s;
#pragma omp parallel for reduction (inscan, plus:s)
for (int i = 0; i < 1024; i++)
{
b[i] = s;
#pragma omp scan exclusive(s)
s.s += 2 * a[i].s;
}
return s;
}
int
main ()
{
S s;
for (int i = 0; i < 1024; ++i)
{
a[i].s = i;
b[i].s = -1;
asm ("" : "+g" (i));
}
#pragma omp parallel
foo (a, b, r);
if (r.s != 1024 * 1023 / 2)
abort ();
for (int i = 0; i < 1024; ++i)
{
if (b[i].s != s.s)
abort ();
else
b[i].s = 25;
s.s += i;
}
if (bar ().s != 1024 * 1023)
abort ();
s.s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i].s != s.s)
abort ();
s.s += 2 * i;
}
r.s = 0;
baz (a, b, r);
if (r.s != 1024 * 1023 / 2)
abort ();
s.s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i].s != s.s)
abort ();
else
b[i].s = 25;
s.s += i;
}
if (qux ().s != 1024 * 1023)
abort ();
s.s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i].s != s.s)
abort ();
s.s += 2 * i;
}
}
/* { dg-require-effective-target size32plus } */
extern void abort (void);
int r, a[1024], b[1024];
__attribute__((noipa)) void
foo (int *a, int *b)
{
#pragma omp for reduction (inscan, +:r)
for (int i = 0; i < 1024; i++)
{
r += a[i];
#pragma omp scan inclusive(r)
b[i] = r;
}
}
__attribute__((noipa)) int
bar (void)
{
int s = 0;
#pragma omp parallel
#pragma omp for reduction (inscan, +:s)
for (int i = 0; i < 1024; i++)
{
s += 2 * a[i];
#pragma omp scan inclusive(s)
b[i] = s;
}
return s;
}
__attribute__((noipa)) void
baz (int *a, int *b)
{
#pragma omp parallel for reduction (inscan, +:r)
for (int i = 0; i < 1024; i++)
{
r += a[i];
#pragma omp scan inclusive(r)
b[i] = r;
}
}
__attribute__((noipa)) int
qux (void)
{
int s = 0;
#pragma omp parallel for reduction (inscan, +:s)
for (int i = 0; i < 1024; i++)
{
s += 2 * a[i];
#pragma omp scan inclusive(s)
b[i] = s;
}
return s;
}
int
main ()
{
int s = 0;
for (int i = 0; i < 1024; ++i)
{
a[i] = i;
b[i] = -1;
asm ("" : "+g" (i));
}
#pragma omp parallel
foo (a, b);
if (r != 1024 * 1023 / 2)
abort ();
for (int i = 0; i < 1024; ++i)
{
s += i;
if (b[i] != s)
abort ();
else
b[i] = 25;
}
if (bar () != 1024 * 1023)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
s += 2 * i;
if (b[i] != s)
abort ();
else
b[i] = -1;
}
r = 0;
baz (a, b);
if (r != 1024 * 1023 / 2)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
s += i;
if (b[i] != s)
abort ();
else
b[i] = -25;
}
if (qux () != 1024 * 1023)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
s += 2 * i;
if (b[i] != s)
abort ();
}
return 0;
}
/* { dg-require-effective-target size32plus } */
extern void abort (void);
int r, a[1024], b[1024];
#pragma omp declare reduction (foo: int: omp_out += omp_in) initializer (omp_priv = 0)
__attribute__((noipa)) void
foo (int *a, int *b)
{
#pragma omp for reduction (inscan, foo:r)
for (int i = 0; i < 1024; i++)
{
r += a[i];
#pragma omp scan inclusive(r)
b[i] = r;
}
}
__attribute__((noipa)) int
bar (void)
{
int s = 0;
#pragma omp parallel
#pragma omp for reduction (inscan, foo:s)
for (int i = 0; i < 1024; i++)
{
s += 2 * a[i];
#pragma omp scan inclusive(s)
b[i] = s;
}
return s;
}
__attribute__((noipa)) void
baz (int *a, int *b)
{
#pragma omp parallel for reduction (inscan, foo:r)
for (int i = 0; i < 1024; i++)
{
r += a[i];
#pragma omp scan inclusive(r)
b[i] = r;
}
}
__attribute__((noipa)) int
qux (void)
{
int s = 0;
#pragma omp parallel for reduction (inscan, foo:s)
for (int i = 0; i < 1024; i++)
{
s += 2 * a[i];
#pragma omp scan inclusive(s)
b[i] = s;
}
return s;
}
int
main ()
{
int s = 0;
for (int i = 0; i < 1024; ++i)
{
a[i] = i;
b[i] = -1;
asm ("" : "+g" (i));
}
#pragma omp parallel
foo (a, b);
if (r != 1024 * 1023 / 2)
abort ();
for (int i = 0; i < 1024; ++i)
{
s += i;
if (b[i] != s)
abort ();
else
b[i] = 25;
}
if (bar () != 1024 * 1023)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
s += 2 * i;
if (b[i] != s)
abort ();
else
b[i] = -1;
}
r = 0;
baz (a, b);
if (r != 1024 * 1023 / 2)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
s += i;
if (b[i] != s)
abort ();
else
b[i] = -25;
}
if (qux () != 1024 * 1023)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
s += 2 * i;
if (b[i] != s)
abort ();
}
return 0;
}
/* { dg-require-effective-target size32plus } */
extern void abort (void);
float r = 1.0f, a[1024], b[1024];
__attribute__((noipa)) void
foo (float *a, float *b)
{
#pragma omp for reduction (inscan, *:r)
for (int i = 0; i < 1024; i++)
{
r *= a[i];
#pragma omp scan inclusive(r)
b[i] = r;
}
}
__attribute__((noipa)) float
bar (void)
{
float s = -__builtin_inff ();
#pragma omp parallel for reduction (inscan, max:s)
for (int i = 0; i < 1024; i++)
{
s = s > a[i] ? s : a[i];
#pragma omp scan inclusive(s)
b[i] = s;
}
return s;
}
int
main ()
{
float s = 1.0f;
for (int i = 0; i < 1024; ++i)
{
if (i < 80)
a[i] = (i & 1) ? 0.25f : 0.5f;
else if (i < 200)
a[i] = (i % 3) == 0 ? 2.0f : (i % 3) == 1 ? 4.0f : 1.0f;
else if (i < 280)
a[i] = (i & 1) ? 0.25f : 0.5f;
else if (i < 380)
a[i] = (i % 3) == 0 ? 2.0f : (i % 3) == 1 ? 4.0f : 1.0f;
else
switch (i % 6)
{
case 0: a[i] = 0.25f; break;
case 1: a[i] = 2.0f; break;
case 2: a[i] = -1.0f; break;
case 3: a[i] = -4.0f; break;
case 4: a[i] = 0.5f; break;
case 5: a[i] = 1.0f; break;
default: a[i] = 0.0f; break;
}
b[i] = -19.0f;
asm ("" : "+g" (i));
}
#pragma omp parallel
foo (a, b);
if (r * 16384.0f != 0.125f)
abort ();
float m = -175.25f;
for (int i = 0; i < 1024; ++i)
{
s *= a[i];
if (b[i] != s)
abort ();
else
{
a[i] = m - ((i % 3) == 1 ? 2.0f : (i % 3) == 2 ? 4.0f : 0.0f);
b[i] = -231.75f;
m += 0.75f;
}
}
if (bar () != 592.0f)
abort ();
s = -__builtin_inff ();
for (int i = 0; i < 1024; ++i)
{
if (s < a[i])
s = a[i];
if (b[i] != s)
abort ();
}
return 0;
}
/* { dg-require-effective-target size32plus } */
extern void abort (void);
int r, a[1024], b[1024];
unsigned short r2, b2[1024];
unsigned char r3, b3[1024];
__attribute__((noipa)) void
foo (int *a, int *b, unsigned short *b2, unsigned char *b3)
{
#pragma omp for reduction (inscan, +:r, r2, r3)
for (int i = 0; i < 1024; i++)
{
{ r += a[i]; r2 += a[i]; r3 += a[i]; }
#pragma omp scan inclusive(r, r2, r3)
{
b[i] = r;
b2[i] = r2;
b3[i] = r3;
}
}
}
__attribute__((noipa)) int
bar (unsigned short *s2p, unsigned char *s3p)
{
int s = 0;
unsigned short s2 = 0;
unsigned char s3 = 0;
#pragma omp parallel
#pragma omp for reduction (inscan, +:s, s2, s3)
for (int i = 0; i < 1024; i++)
{
{
s += 2 * a[i];
s2 += 2 * a[i];
s3 += 2 * a[i];
}
#pragma omp scan inclusive(s, s2, s3)
{ b[i] = s; b2[i] = s2; b3[i] = s3; }
}
*s2p = s2;
*s3p = s3;
return s;
}
__attribute__((noipa)) void
baz (int *a, int *b, unsigned short *b2, unsigned char *b3)
{
#pragma omp parallel for reduction (inscan, +:r, r2, r3)
for (int i = 0; i < 1024; i++)
{
{
r += a[i];
r2 += a[i];
r3 += a[i];
}
#pragma omp scan inclusive(r, r2, r3)
{
b[i] = r;
b2[i] = r2;
b3[i] = r3;
}
}
}
__attribute__((noipa)) int
qux (unsigned short *s2p, unsigned char *s3p)
{
int s = 0;
unsigned short s2 = 0;
unsigned char s3 = 0;
#pragma omp parallel for reduction (inscan, +:s, s2, s3)
for (int i = 0; i < 1024; i++)
{
{ s += 2 * a[i]; s2 += 2 * a[i]; s3 += 2 * a[i]; }
#pragma omp scan inclusive(s, s2, s3)
{ b[i] = s; b2[i] = s2; b3[i] = s3; }
}
*s2p = s2;
*s3p = s3;
return s;
}
int
main ()
{
int s = 0;
unsigned short s2;
unsigned char s3;
for (int i = 0; i < 1024; ++i)
{
a[i] = i;
b[i] = -1;
b2[i] = -1;
b3[i] = -1;
asm ("" : "+g" (i));
}
#pragma omp parallel
foo (a, b, b2, b3);
if (r != 1024 * 1023 / 2
|| r2 != (unsigned short) r
|| r3 != (unsigned char) r)
abort ();
for (int i = 0; i < 1024; ++i)
{
s += i;
if (b[i] != s
|| b2[i] != (unsigned short) s
|| b3[i] != (unsigned char) s)
abort ();
else
{
b[i] = 25;
b2[i] = 24;
b3[i] = 26;
}
}
if (bar (&s2, &s3) != 1024 * 1023)
abort ();
if (s2 != (unsigned short) (1024 * 1023)
|| s3 != (unsigned char) (1024 * 1023))
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
s += 2 * i;
if (b[i] != s
|| b2[i] != (unsigned short) s
|| b3[i] != (unsigned char) s)
abort ();
else
{
b[i] = -1;
b2[i] = -1;
b3[i] = -1;
}
}
r = 0;
r2 = 0;
r3 = 0;
baz (a, b, b2, b3);
if (r != 1024 * 1023 / 2
|| r2 != (unsigned short) r
|| r3 != (unsigned char) r)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
s += i;
if (b[i] != s
|| b2[i] != (unsigned short) s
|| b3[i] != (unsigned char) s)
abort ();
else
{
b[i] = 25;
b2[i] = 24;
b3[i] = 26;
}
}
s2 = 0;
s3 = 0;
if (qux (&s2, &s3) != 1024 * 1023)
abort ();
if (s2 != (unsigned short) (1024 * 1023)
|| s3 != (unsigned char) (1024 * 1023))
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
s += 2 * i;
if (b[i] != s
|| b2[i] != (unsigned short) s
|| b3[i] != (unsigned char) s)
abort ();
}
return 0;
}
/* { dg-require-effective-target size32plus } */
extern void abort (void);
int r, a[1024], b[1024];
__attribute__((noipa)) void
foo (int *a, int *b)
{
#pragma omp for reduction (inscan, +:r)
for (int i = 0; i < 1024; i++)
{
b[i] = r;
#pragma omp scan exclusive(r)
r += a[i];
}
}
__attribute__((noipa)) int
bar (void)
{
int s = 0;
#pragma omp parallel
#pragma omp for reduction (inscan, +:s)
for (int i = 0; i < 1024; i++)
{
b[i] = s;
#pragma omp scan exclusive(s)
s += 2 * a[i];
}
return s;
}
__attribute__((noipa)) void
baz (int *a, int *b)
{
#pragma omp parallel for reduction (inscan, +:r)
for (int i = 0; i < 1024; i++)
{
b[i] = r;
#pragma omp scan exclusive(r)
r += a[i];
}
}
__attribute__((noipa)) int
qux (void)
{
int s = 0;
#pragma omp parallel for reduction (inscan, +:s)
for (int i = 0; i < 1024; i++)
{
b[i] = s;
#pragma omp scan exclusive(s)
s += 2 * a[i];
}
return s;
}
int
main ()
{
int s = 0;
for (int i = 0; i < 1024; ++i)
{
a[i] = i;
b[i] = -1;
asm ("" : "+g" (i));
}
#pragma omp parallel
foo (a, b);
if (r != 1024 * 1023 / 2)
abort ();
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
else
b[i] = 25;
s += i;
}
if (bar () != 1024 * 1023)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
else
b[i] = -1;
s += 2 * i;
}
r = 0;
baz (a, b);
if (r != 1024 * 1023 / 2)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
else
b[i] = -25;
s += i;
}
if (qux () != 1024 * 1023)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
s += 2 * i;
}
return 0;
}
/* { dg-require-effective-target size32plus } */
extern void abort (void);
int r, a[1024], b[1024];
#pragma omp declare reduction (foo: int: omp_out += omp_in) initializer (omp_priv = 0)
__attribute__((noipa)) void
foo (int *a, int *b)
{
#pragma omp for reduction (inscan, foo:r)
for (int i = 0; i < 1024; i++)
{
b[i] = r;
#pragma omp scan exclusive(r)
r += a[i];
}
}
__attribute__((noipa)) int
bar (void)
{
int s = 0;
#pragma omp parallel
#pragma omp for reduction (inscan, foo:s)
for (int i = 0; i < 1024; i++)
{
b[i] = s;
#pragma omp scan exclusive(s)
s += 2 * a[i];
}
return s;
}
__attribute__((noipa)) void
baz (int *a, int *b)
{
#pragma omp parallel for reduction (inscan, foo:r)
for (int i = 0; i < 1024; i++)
{
b[i] = r;
#pragma omp scan exclusive(r)
r += a[i];
}
}
__attribute__((noipa)) int
qux (void)
{
int s = 0;
#pragma omp parallel for reduction (inscan, foo:s)
for (int i = 0; i < 1024; i++)
{
b[i] = s;
#pragma omp scan exclusive(s)
s += 2 * a[i];
}
return s;
}
int
main ()
{
int s = 0;
for (int i = 0; i < 1024; ++i)
{
a[i] = i;
b[i] = -1;
asm ("" : "+g" (i));
}
#pragma omp parallel
foo (a, b);
if (r != 1024 * 1023 / 2)
abort ();
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
else
b[i] = 25;
s += i;
}
if (bar () != 1024 * 1023)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
else
b[i] = -1;
s += 2 * i;
}
r = 0;
baz (a, b);
if (r != 1024 * 1023 / 2)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
else
b[i] = -25;
s += i;
}
if (qux () != 1024 * 1023)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
s += 2 * i;
}
return 0;
}
/* { dg-require-effective-target size32plus } */
extern void abort (void);
float r = 1.0f, a[1024], b[1024];
__attribute__((noipa)) void
foo (float *a, float *b)
{
#pragma omp for reduction (inscan, *:r)
for (int i = 0; i < 1024; i++)
{
b[i] = r;
#pragma omp scan exclusive(r)
r *= a[i];
}
}
__attribute__((noipa)) float
bar (void)
{
float s = -__builtin_inff ();
#pragma omp parallel for reduction (inscan, max:s)
for (int i = 0; i < 1024; i++)
{
b[i] = s;
#pragma omp scan exclusive(s)
s = s > a[i] ? s : a[i];
}
return s;
}
int
main ()
{
float s = 1.0f;
for (int i = 0; i < 1024; ++i)
{
if (i < 80)
a[i] = (i & 1) ? 0.25f : 0.5f;
else if (i < 200)
a[i] = (i % 3) == 0 ? 2.0f : (i % 3) == 1 ? 4.0f : 1.0f;
else if (i < 280)
a[i] = (i & 1) ? 0.25f : 0.5f;
else if (i < 380)
a[i] = (i % 3) == 0 ? 2.0f : (i % 3) == 1 ? 4.0f : 1.0f;
else
switch (i % 6)
{
case 0: a[i] = 0.25f; break;
case 1: a[i] = 2.0f; break;
case 2: a[i] = -1.0f; break;
case 3: a[i] = -4.0f; break;
case 4: a[i] = 0.5f; break;
case 5: a[i] = 1.0f; break;
default: a[i] = 0.0f; break;
}
b[i] = -19.0f;
asm ("" : "+g" (i));
}
#pragma omp parallel
foo (a, b);
if (r * 16384.0f != 0.125f)
abort ();
float m = -175.25f;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
else
b[i] = -231.75f;
s *= a[i];
a[i] = m - ((i % 3) == 1 ? 2.0f : (i % 3) == 2 ? 4.0f : 0.0f);
m += 0.75f;
}
if (bar () != 592.0f)
abort ();
s = -__builtin_inff ();
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
if (s < a[i])
s = a[i];
}
return 0;
}
/* { dg-require-effective-target size32plus } */
extern void abort (void);
int r, a[1024], b[1024];
unsigned short r2, b2[1024];
unsigned char r3, b3[1024];
__attribute__((noipa)) void
foo (int *a, int *b, unsigned short *b2, unsigned char *b3)
{
#pragma omp for reduction (inscan, +:r, r2, r3)
for (int i = 0; i < 1024; i++)
{
{
b[i] = r;
b2[i] = r2;
b3[i] = r3;
}
#pragma omp scan exclusive(r, r2, r3)
{ r += a[i]; r2 += a[i]; r3 += a[i]; }
}
}
__attribute__((noipa)) int
bar (unsigned short *s2p, unsigned char *s3p)
{
int s = 0;
unsigned short s2 = 0;
unsigned char s3 = 0;
#pragma omp parallel
#pragma omp for reduction (inscan, +:s, s2, s3)
for (int i = 0; i < 1024; i++)
{
{ b[i] = s; b2[i] = s2; b3[i] = s3; }
#pragma omp scan exclusive(s, s2, s3)
{
s += 2 * a[i];
s2 += 2 * a[i];
s3 += 2 * a[i];
}
}
*s2p = s2;
*s3p = s3;
return s;
}
__attribute__((noipa)) void
baz (int *a, int *b, unsigned short *b2, unsigned char *b3)
{
#pragma omp parallel for reduction (inscan, +:r, r2, r3)
for (int i = 0; i < 1024; i++)
{
{
b[i] = r;
b2[i] = r2;
b3[i] = r3;
}
#pragma omp scan exclusive(r, r2, r3)
{
r += a[i];
r2 += a[i];
r3 += a[i];
}
}
}
__attribute__((noipa)) int
qux (unsigned short *s2p, unsigned char *s3p)
{
int s = 0;
unsigned short s2 = 0;
unsigned char s3 = 0;
#pragma omp parallel for reduction (inscan, +:s, s2, s3)
for (int i = 0; i < 1024; i++)
{
{ b[i] = s; b2[i] = s2; b3[i] = s3; }
#pragma omp scan exclusive(s, s2, s3)
{ s += 2 * a[i]; s2 += 2 * a[i]; s3 += 2 * a[i]; }
}
*s2p = s2;
*s3p = s3;
return s;
}
int
main ()
{
int s = 0;
unsigned short s2;
unsigned char s3;
for (int i = 0; i < 1024; ++i)
{
a[i] = i;
b[i] = -1;
b2[i] = -1;
b3[i] = -1;
asm ("" : "+g" (i));
}
#pragma omp parallel
foo (a, b, b2, b3);
if (r != 1024 * 1023 / 2
|| r2 != (unsigned short) r
|| r3 != (unsigned char) r)
abort ();
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s
|| b2[i] != (unsigned short) s
|| b3[i] != (unsigned char) s)
abort ();
else
{
b[i] = 25;
b2[i] = 24;
b3[i] = 26;
}
s += i;
}
if (bar (&s2, &s3) != 1024 * 1023)
abort ();
if (s2 != (unsigned short) (1024 * 1023)
|| s3 != (unsigned char) (1024 * 1023))
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s
|| b2[i] != (unsigned short) s
|| b3[i] != (unsigned char) s)
abort ();
else
{
b[i] = -1;
b2[i] = -1;
b3[i] = -1;
}
s += 2 * i;
}
r = 0;
r2 = 0;
r3 = 0;
baz (a, b, b2, b3);
if (r != 1024 * 1023 / 2
|| r2 != (unsigned short) r
|| r3 != (unsigned char) r)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s
|| b2[i] != (unsigned short) s
|| b3[i] != (unsigned char) s)
abort ();
else
{
b[i] = 25;
b2[i] = 24;
b3[i] = 26;
}
s += i;
}
s2 = 0;
s3 = 0;
if (qux (&s2, &s3) != 1024 * 1023)
abort ();
if (s2 != (unsigned short) (1024 * 1023)
|| s3 != (unsigned char) (1024 * 1023))
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s
|| b2[i] != (unsigned short) s
|| b3[i] != (unsigned char) s)
abort ();
s += 2 * i;
}
return 0;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment