Commit e4834818 by Nathan Sidwell Committed by Nathan Sidwell

omp-low.c (struct omp_context): Remove gwv_below, gwv_this fields.

	* omp-low.c (struct omp_context): Remove gwv_below, gwv_this
	fields.
	(is_oacc_parallel, is_oacc_kernels): New.
	(enclosing_target_ctx): May return NULL.
	(ctx_in_oacc_kernels_region): New.
	(check_oacc_kernel_gwv): New.
	(oacc_loop_or_target_p): Delete.
	(scan_omp_for): Don't calculate gwv mask.  Check parallel clause
	operands.  Strip reductions fro kernels.
	(scan_omp_target): Don't calculate gwv mask.
	(lower_oacc_head_mark, lower_oacc_loop_marker,
	lower_oacc_head_tail): New.
	(struct oacc_collapse): New.
	(expand_oacc_collapse_init, expand_oacc_collapse_vars): New.
	(expand_omp_for_static_nochunk, expand_omp_for_static_chunk):
	Remove OpenACC handling.
	(expand_oacc_for): New.
	(expand_omp_for): Call expand_oacc_for.
	(lower_omp_for): Call lower_oacc_head_tail.

From-SVN: r229472
parent a1c1908b
2015-10-27 Nathan Sidwell <nathan@codesourcery.com>
* omp-low.c (struct omp_context): Remove gwv_below, gwv_this
fields.
(is_oacc_parallel, is_oacc_kernels): New.
(enclosing_target_ctx): May return NULL.
(ctx_in_oacc_kernels_region): New.
(check_oacc_kernel_gwv): New.
(oacc_loop_or_target_p): Delete.
(scan_omp_for): Don't calculate gwv mask. Check parallel clause
operands. Strip reductions fro kernels.
(scan_omp_target): Don't calculate gwv mask.
(lower_oacc_head_mark, lower_oacc_loop_marker,
lower_oacc_head_tail): New.
(struct oacc_collapse): New.
(expand_oacc_collapse_init, expand_oacc_collapse_vars): New.
(expand_omp_for_static_nochunk, expand_omp_for_static_chunk):
Remove OpenACC handling.
(expand_oacc_for): New.
(expand_omp_for): Call expand_oacc_for.
(lower_omp_for): Call lower_oacc_head_tail.
2015-10-27 Mikhail Maltsev <maltsevm@gmail.com>
* attribs.c (check_attribute_tables): New function, broken out from...
......@@ -200,14 +200,6 @@ struct omp_context
/* True if this construct can be cancelled. */
bool cancellable;
/* For OpenACC loops, a mask of gang, worker and vector used at
levels below this one. */
int gwv_below;
/* For OpenACC loops, a mask of gang, worker and vector used at
this level and above. For parallel and kernels clauses, a mask
indicating which of num_gangs/num_workers/num_vectors was used. */
int gwv_this;
};
/* A structure holding the elements of:
......@@ -299,6 +291,28 @@ static gphi *find_phi_with_arg_on_edge (tree, edge);
*handled_ops_p = false; \
break;
/* Return true if CTX corresponds to an oacc parallel region. */
static bool
is_oacc_parallel (omp_context *ctx)
{
enum gimple_code outer_type = gimple_code (ctx->stmt);
return ((outer_type == GIMPLE_OMP_TARGET)
&& (gimple_omp_target_kind (ctx->stmt)
== GF_OMP_TARGET_KIND_OACC_PARALLEL));
}
/* Return true if CTX corresponds to an oacc kernels region. */
static bool
is_oacc_kernels (omp_context *ctx)
{
enum gimple_code outer_type = gimple_code (ctx->stmt);
return ((outer_type == GIMPLE_OMP_TARGET)
&& (gimple_omp_target_kind (ctx->stmt)
== GF_OMP_TARGET_KIND_OACC_KERNELS));
}
/* Helper function to get the name of the array containing the partial
reductions for OpenACC reductions. */
static const char *
......@@ -2933,28 +2947,95 @@ finish_taskreg_scan (omp_context *ctx)
}
}
/* Find the enclosing offload context. */
static omp_context *
enclosing_target_ctx (omp_context *ctx)
{
while (ctx != NULL
&& gimple_code (ctx->stmt) != GIMPLE_OMP_TARGET)
ctx = ctx->outer;
gcc_assert (ctx != NULL);
for (; ctx; ctx = ctx->outer)
if (gimple_code (ctx->stmt) == GIMPLE_OMP_TARGET)
break;
return ctx;
}
/* Return true if ctx is part of an oacc kernels region. */
static bool
oacc_loop_or_target_p (gimple *stmt)
ctx_in_oacc_kernels_region (omp_context *ctx)
{
enum gimple_code outer_type = gimple_code (stmt);
return ((outer_type == GIMPLE_OMP_TARGET
&& ((gimple_omp_target_kind (stmt)
== GF_OMP_TARGET_KIND_OACC_PARALLEL)
|| (gimple_omp_target_kind (stmt)
== GF_OMP_TARGET_KIND_OACC_KERNELS)))
|| (outer_type == GIMPLE_OMP_FOR
&& gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_OACC_LOOP));
for (;ctx != NULL; ctx = ctx->outer)
{
gimple *stmt = ctx->stmt;
if (gimple_code (stmt) == GIMPLE_OMP_TARGET
&& gimple_omp_target_kind (stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
return true;
}
return false;
}
/* Check the parallelism clauses inside a kernels regions.
Until kernels handling moves to use the same loop indirection
scheme as parallel, we need to do this checking early. */
static unsigned
check_oacc_kernel_gwv (gomp_for *stmt, omp_context *ctx)
{
bool checking = true;
unsigned outer_mask = 0;
unsigned this_mask = 0;
bool has_seq = false, has_auto = false;
if (ctx->outer)
outer_mask = check_oacc_kernel_gwv (NULL, ctx->outer);
if (!stmt)
{
checking = false;
if (gimple_code (ctx->stmt) != GIMPLE_OMP_FOR)
return outer_mask;
stmt = as_a <gomp_for *> (ctx->stmt);
}
for (tree c = gimple_omp_for_clauses (stmt); c; c = OMP_CLAUSE_CHAIN (c))
{
switch (OMP_CLAUSE_CODE (c))
{
case OMP_CLAUSE_GANG:
this_mask |= GOMP_DIM_MASK (GOMP_DIM_GANG);
break;
case OMP_CLAUSE_WORKER:
this_mask |= GOMP_DIM_MASK (GOMP_DIM_WORKER);
break;
case OMP_CLAUSE_VECTOR:
this_mask |= GOMP_DIM_MASK (GOMP_DIM_VECTOR);
break;
case OMP_CLAUSE_SEQ:
has_seq = true;
break;
case OMP_CLAUSE_AUTO:
has_auto = true;
break;
default:
break;
}
}
if (checking)
{
if (has_seq && (this_mask || has_auto))
error_at (gimple_location (stmt), "%<seq%> overrides other"
" OpenACC loop specifiers");
else if (has_auto && this_mask)
error_at (gimple_location (stmt), "%<auto%> conflicts with other"
" OpenACC loop specifiers");
if (this_mask & outer_mask)
error_at (gimple_location (stmt), "inner loop uses same"
" OpenACC parallelism as containing loop");
}
return outer_mask | this_mask;
}
/* Scan a GIMPLE_OMP_FOR. */
......@@ -2962,52 +3043,62 @@ oacc_loop_or_target_p (gimple *stmt)
static void
scan_omp_for (gomp_for *stmt, omp_context *outer_ctx)
{
enum gimple_code outer_type = GIMPLE_ERROR_MARK;
omp_context *ctx;
size_t i;
tree clauses = gimple_omp_for_clauses (stmt);
if (outer_ctx)
outer_type = gimple_code (outer_ctx->stmt);
ctx = new_omp_context (stmt, outer_ctx);
if (is_gimple_omp_oacc (stmt))
{
if (outer_ctx && outer_type == GIMPLE_OMP_FOR)
ctx->gwv_this = outer_ctx->gwv_this;
omp_context *tgt = enclosing_target_ctx (outer_ctx);
if (!tgt || is_oacc_parallel (tgt))
for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
{
int val;
if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_GANG)
val = MASK_GANG;
else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WORKER)
val = MASK_WORKER;
else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_VECTOR)
val = MASK_VECTOR;
else
continue;
ctx->gwv_this |= val;
if (!outer_ctx)
char const *check = NULL;
switch (OMP_CLAUSE_CODE (c))
{
/* Skip; not nested inside a region. */
continue;
case OMP_CLAUSE_GANG:
check = "gang";
break;
case OMP_CLAUSE_WORKER:
check = "worker";
break;
case OMP_CLAUSE_VECTOR:
check = "vector";
break;
default:
break;
}
if (!oacc_loop_or_target_p (outer_ctx->stmt))
{
/* Skip; not nested inside an OpenACC region. */
continue;
if (check && OMP_CLAUSE_OPERAND (c, 0))
error_at (gimple_location (stmt),
"argument not permitted on %qs clause in"
" OpenACC %<parallel%>", check);
}
if (outer_type == GIMPLE_OMP_FOR)
outer_ctx->gwv_below |= val;
if (OMP_CLAUSE_OPERAND (c, 0) != NULL_TREE)
if (tgt && is_oacc_kernels (tgt))
{
omp_context *enclosing = enclosing_target_ctx (outer_ctx);
if (gimple_omp_target_kind (enclosing->stmt)
== GF_OMP_TARGET_KIND_OACC_PARALLEL)
error_at (gimple_location (stmt),
"no arguments allowed to gang, worker and vector clauses inside parallel");
/* Strip out reductions, as they are not handled yet. */
tree *prev_ptr = &clauses;
while (tree probe = *prev_ptr)
{
tree *next_ptr = &OMP_CLAUSE_CHAIN (probe);
if (OMP_CLAUSE_CODE (probe) == OMP_CLAUSE_REDUCTION)
*prev_ptr = *next_ptr;
else
prev_ptr = next_ptr;
}
gimple_omp_for_set_clauses (stmt, clauses);
check_oacc_kernel_gwv (stmt, ctx);
}
}
......@@ -3022,19 +3113,6 @@ scan_omp_for (gomp_for *stmt, omp_context *outer_ctx)
scan_omp_op (gimple_omp_for_incr_ptr (stmt, i), ctx);
}
scan_omp (gimple_omp_body_ptr (stmt), ctx);
if (is_gimple_omp_oacc (stmt))
{
if (ctx->gwv_this & ctx->gwv_below)
error_at (gimple_location (stmt),
"gang, worker and vector may occur only once in a loop nest");
else if (ctx->gwv_below != 0
&& ctx->gwv_this > ctx->gwv_below)
error_at (gimple_location (stmt),
"gang, worker and vector must occur in this order in a loop nest");
if (outer_ctx && outer_type == GIMPLE_OMP_FOR)
outer_ctx->gwv_below |= ctx->gwv_below;
}
}
/* Scan an OpenMP sections directive. */
......@@ -3105,19 +3183,6 @@ scan_omp_target (gomp_target *stmt, omp_context *outer_ctx)
gimple_omp_target_set_child_fn (stmt, ctx->cb.dst_fn);
}
if (is_gimple_omp_oacc (stmt))
{
for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
{
if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_NUM_GANGS)
ctx->gwv_this |= MASK_GANG;
else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_NUM_WORKERS)
ctx->gwv_this |= MASK_WORKER;
else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_VECTOR_LENGTH)
ctx->gwv_this |= MASK_VECTOR;
}
}
scan_sharing_clauses (clauses, ctx);
scan_omp (gimple_omp_body_ptr (stmt), ctx);
......@@ -5850,6 +5915,176 @@ lower_send_shared_vars (gimple_seq *ilist, gimple_seq *olist, omp_context *ctx)
}
}
/* Emit an OpenACC head marker call, encapulating the partitioning and
other information that must be processed by the target compiler.
Return the maximum number of dimensions the associated loop might
be partitioned over. */
static unsigned
lower_oacc_head_mark (location_t loc, tree ddvar, tree clauses,
gimple_seq *seq, omp_context *ctx)
{
unsigned levels = 0;
unsigned tag = 0;
tree gang_static = NULL_TREE;
auto_vec<tree, 5> args;
args.quick_push (build_int_cst
(integer_type_node, IFN_UNIQUE_OACC_HEAD_MARK));
args.quick_push (ddvar);
for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
{
switch (OMP_CLAUSE_CODE (c))
{
case OMP_CLAUSE_GANG:
tag |= OLF_DIM_GANG;
gang_static = OMP_CLAUSE_GANG_STATIC_EXPR (c);
/* static:* is represented by -1, and we can ignore it, as
scheduling is always static. */
if (gang_static && integer_minus_onep (gang_static))
gang_static = NULL_TREE;
levels++;
break;
case OMP_CLAUSE_WORKER:
tag |= OLF_DIM_WORKER;
levels++;
break;
case OMP_CLAUSE_VECTOR:
tag |= OLF_DIM_VECTOR;
levels++;
break;
case OMP_CLAUSE_SEQ:
tag |= OLF_SEQ;
break;
case OMP_CLAUSE_AUTO:
tag |= OLF_AUTO;
break;
case OMP_CLAUSE_INDEPENDENT:
tag |= OLF_INDEPENDENT;
break;
default:
continue;
}
}
if (gang_static)
{
if (DECL_P (gang_static))
gang_static = build_outer_var_ref (gang_static, ctx);
tag |= OLF_GANG_STATIC;
}
/* In a parallel region, loops are implicitly INDEPENDENT. */
omp_context *tgt = enclosing_target_ctx (ctx);
if (!tgt || is_oacc_parallel (tgt))
tag |= OLF_INDEPENDENT;
/* A loop lacking SEQ, GANG, WORKER and/or VECTOR is implicitly AUTO. */
if (!(tag & (((GOMP_DIM_MASK (GOMP_DIM_MAX) - 1) << OLF_DIM_BASE)
| OLF_SEQ)))
tag |= OLF_AUTO;
/* Ensure at least one level. */
if (!levels)
levels++;
args.quick_push (build_int_cst (integer_type_node, levels));
args.quick_push (build_int_cst (integer_type_node, tag));
if (gang_static)
args.quick_push (gang_static);
gcall *call = gimple_build_call_internal_vec (IFN_UNIQUE, args);
gimple_set_location (call, loc);
gimple_set_lhs (call, ddvar);
gimple_seq_add_stmt (seq, call);
return levels;
}
/* Emit an OpenACC lopp head or tail marker to SEQ. LEVEL is the
partitioning level of the enclosed region. */
static void
lower_oacc_loop_marker (location_t loc, tree ddvar, bool head,
tree tofollow, gimple_seq *seq)
{
int marker_kind = (head ? IFN_UNIQUE_OACC_HEAD_MARK
: IFN_UNIQUE_OACC_TAIL_MARK);
tree marker = build_int_cst (integer_type_node, marker_kind);
int nargs = 2 + (tofollow != NULL_TREE);
gcall *call = gimple_build_call_internal (IFN_UNIQUE, nargs,
marker, ddvar, tofollow);
gimple_set_location (call, loc);
gimple_set_lhs (call, ddvar);
gimple_seq_add_stmt (seq, call);
}
/* Generate the before and after OpenACC loop sequences. CLAUSES are
the loop clauses, from which we extract reductions. Initialize
HEAD and TAIL. */
static void
lower_oacc_head_tail (location_t loc, tree clauses,
gimple_seq *head, gimple_seq *tail, omp_context *ctx)
{
bool inner = false;
tree ddvar = create_tmp_var (integer_type_node, ".data_dep");
gimple_seq_add_stmt (head, gimple_build_assign (ddvar, integer_zero_node));
unsigned count = lower_oacc_head_mark (loc, ddvar, clauses, head, ctx);
if (!count)
lower_oacc_loop_marker (loc, ddvar, false, integer_zero_node, tail);
tree fork_kind = build_int_cst (unsigned_type_node, IFN_UNIQUE_OACC_FORK);
tree join_kind = build_int_cst (unsigned_type_node, IFN_UNIQUE_OACC_JOIN);
for (unsigned done = 1; count; count--, done++)
{
gimple_seq fork_seq = NULL;
gimple_seq join_seq = NULL;
tree place = build_int_cst (integer_type_node, -1);
gcall *fork = gimple_build_call_internal (IFN_UNIQUE, 3,
fork_kind, ddvar, place);
gimple_set_location (fork, loc);
gimple_set_lhs (fork, ddvar);
gcall *join = gimple_build_call_internal (IFN_UNIQUE, 3,
join_kind, ddvar, place);
gimple_set_location (join, loc);
gimple_set_lhs (join, ddvar);
/* Mark the beginning of this level sequence. */
if (inner)
lower_oacc_loop_marker (loc, ddvar, true,
build_int_cst (integer_type_node, count),
&fork_seq);
lower_oacc_loop_marker (loc, ddvar, false,
build_int_cst (integer_type_node, done),
&join_seq);
gimple_seq_add_stmt (&fork_seq, fork);
gimple_seq_add_stmt (&join_seq, join);
/* Append this level to head. */
gimple_seq_add_seq (head, fork_seq);
/* Prepend it to tail. */
gimple_seq_add_seq (&join_seq, *tail);
*tail = join_seq;
inner = true;
}
/* Mark the end of the sequence. */
lower_oacc_loop_marker (loc, ddvar, true, NULL_TREE, head);
lower_oacc_loop_marker (loc, ddvar, false, NULL_TREE, tail);
}
/* A convenience function to build an empty GIMPLE_COND with just the
condition. */
......@@ -6760,6 +6995,149 @@ expand_omp_taskreg (struct omp_region *region)
update_ssa (TODO_update_ssa_only_virtuals);
}
/* Information about members of an OpenACC collapsed loop nest. */
struct oacc_collapse
{
tree base; /* Base value. */
tree iters; /* Number of steps. */
tree step; /* step size. */
};
/* Helper for expand_oacc_for. Determine collapsed loop information.
Fill in COUNTS array. Emit any initialization code before GSI.
Return the calculated outer loop bound of BOUND_TYPE. */
static tree
expand_oacc_collapse_init (const struct omp_for_data *fd,
gimple_stmt_iterator *gsi,
oacc_collapse *counts, tree bound_type)
{
tree total = build_int_cst (bound_type, 1);
int ix;
gcc_assert (integer_onep (fd->loop.step));
gcc_assert (integer_zerop (fd->loop.n1));
for (ix = 0; ix != fd->collapse; ix++)
{
const omp_for_data_loop *loop = &fd->loops[ix];
tree iter_type = TREE_TYPE (loop->v);
tree diff_type = iter_type;
tree plus_type = iter_type;
gcc_assert (loop->cond_code == fd->loop.cond_code);
if (POINTER_TYPE_P (iter_type))
plus_type = sizetype;
if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
diff_type = signed_type_for (diff_type);
tree b = loop->n1;
tree e = loop->n2;
tree s = loop->step;
bool up = loop->cond_code == LT_EXPR;
tree dir = build_int_cst (diff_type, up ? +1 : -1);
bool negating;
tree expr;
b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
true, GSI_SAME_STMT);
e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
true, GSI_SAME_STMT);
/* Convert the step, avoiding possible unsigned->signed overflow. */
negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
if (negating)
s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
s = fold_convert (diff_type, s);
if (negating)
s = fold_build1 (NEGATE_EXPR, diff_type, s);
s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
true, GSI_SAME_STMT);
/* Determine the range, avoiding possible unsigned->signed overflow. */
negating = !up && TYPE_UNSIGNED (iter_type);
expr = fold_build2 (MINUS_EXPR, plus_type,
fold_convert (plus_type, negating ? b : e),
fold_convert (plus_type, negating ? e : b));
expr = fold_convert (diff_type, expr);
if (negating)
expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
tree range = force_gimple_operand_gsi
(gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
/* Determine number of iterations. */
expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
true, GSI_SAME_STMT);
counts[ix].base = b;
counts[ix].iters = iters;
counts[ix].step = s;
total = fold_build2 (MULT_EXPR, bound_type, total,
fold_convert (bound_type, iters));
}
return total;
}
/* Emit initializers for collapsed loop members. IVAR is the outer
loop iteration variable, from which collapsed loop iteration values
are calculated. COUNTS array has been initialized by
expand_oacc_collapse_inits. */
static void
expand_oacc_collapse_vars (const struct omp_for_data *fd,
gimple_stmt_iterator *gsi,
const oacc_collapse *counts, tree ivar)
{
tree ivar_type = TREE_TYPE (ivar);
/* The most rapidly changing iteration variable is the innermost
one. */
for (int ix = fd->collapse; ix--;)
{
const omp_for_data_loop *loop = &fd->loops[ix];
const oacc_collapse *collapse = &counts[ix];
tree iter_type = TREE_TYPE (loop->v);
tree diff_type = TREE_TYPE (collapse->step);
tree plus_type = iter_type;
enum tree_code plus_code = PLUS_EXPR;
tree expr;
if (POINTER_TYPE_P (iter_type))
{
plus_code = POINTER_PLUS_EXPR;
plus_type = sizetype;
}
expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, ivar,
fold_convert (ivar_type, collapse->iters));
expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
collapse->step);
expr = fold_build2 (plus_code, iter_type, collapse->base,
fold_convert (plus_type, expr));
expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
true, GSI_SAME_STMT);
gassign *ass = gimple_build_assign (loop->v, expr);
gsi_insert_before (gsi, ass, GSI_SAME_STMT);
if (ix)
{
expr = fold_build2 (TRUNC_DIV_EXPR, ivar_type, ivar,
fold_convert (ivar_type, collapse->iters));
ivar = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
true, GSI_SAME_STMT);
}
}
}
/* Helper function for expand_omp_{for_*,simd}. If this is the outermost
of the combined collapse > 1 loop constructs, generate code like:
......@@ -8406,10 +8784,6 @@ expand_omp_for_static_nochunk (struct omp_region *region,
tree *counts = NULL;
tree n1, n2, step;
gcc_checking_assert ((gimple_omp_for_kind (fd->for_stmt)
!= GF_OMP_FOR_KIND_OACC_LOOP)
|| !inner_stmt);
itype = type = TREE_TYPE (fd->loop.v);
if (POINTER_TYPE_P (type))
itype = signed_type_for (type);
......@@ -8502,10 +8876,6 @@ expand_omp_for_static_nochunk (struct omp_region *region,
nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
break;
case GF_OMP_FOR_KIND_OACC_LOOP:
nthreads = builtin_decl_explicit (BUILT_IN_GOACC_GET_NUM_THREADS);
threadid = builtin_decl_explicit (BUILT_IN_GOACC_GET_THREAD_NUM);
break;
default:
gcc_unreachable ();
}
......@@ -8732,9 +9102,6 @@ expand_omp_for_static_nochunk (struct omp_region *region,
if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
{
t = gimple_omp_return_lhs (gsi_stmt (gsi));
if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
gcc_checking_assert (t == NULL_TREE);
else
gsi_insert_after (&gsi, build_omp_barrier (t), GSI_SAME_STMT);
}
gsi_remove (&gsi, true);
......@@ -8873,10 +9240,6 @@ expand_omp_for_static_chunk (struct omp_region *region,
tree *counts = NULL;
tree n1, n2, step;
gcc_checking_assert ((gimple_omp_for_kind (fd->for_stmt)
!= GF_OMP_FOR_KIND_OACC_LOOP)
|| !inner_stmt);
itype = type = TREE_TYPE (fd->loop.v);
if (POINTER_TYPE_P (type))
itype = signed_type_for (type);
......@@ -8973,10 +9336,6 @@ expand_omp_for_static_chunk (struct omp_region *region,
nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
break;
case GF_OMP_FOR_KIND_OACC_LOOP:
nthreads = builtin_decl_explicit (BUILT_IN_GOACC_GET_NUM_THREADS);
threadid = builtin_decl_explicit (BUILT_IN_GOACC_GET_THREAD_NUM);
break;
default:
gcc_unreachable ();
}
......@@ -9236,9 +9595,6 @@ expand_omp_for_static_chunk (struct omp_region *region,
if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
{
t = gimple_omp_return_lhs (gsi_stmt (gsi));
if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
gcc_checking_assert (t == NULL_TREE);
else
gsi_insert_after (&gsi, build_omp_barrier (t), GSI_SAME_STMT);
}
gsi_remove (&gsi, true);
......@@ -10289,6 +10645,410 @@ expand_omp_taskloop_for_inner (struct omp_region *region,
}
}
/* A subroutine of expand_omp_for. Generate code for an OpenACC
partitioned loop. The lowering here is abstracted, in that the
loop parameters are passed through internal functions, which are
further lowered by oacc_device_lower, once we get to the target
compiler. The loop is of the form:
for (V = B; V LTGT E; V += S) {BODY}
where LTGT is < or >. We may have a specified chunking size, CHUNKING
(constant 0 for no chunking) and we will have a GWV partitioning
mask, specifying dimensions over which the loop is to be
partitioned (see note below). We generate code that looks like:
<entry_bb> [incoming FALL->body, BRANCH->exit]
typedef signedintify (typeof (V)) T; // underlying signed integral type
T range = E - B;
T chunk_no = 0;
T DIR = LTGT == '<' ? +1 : -1;
T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
<head_bb> [created by splitting end of entry_bb]
T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
if (!(offset LTGT bound)) goto bottom_bb;
<body_bb> [incoming]
V = B + offset;
{BODY}
<cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
offset += step;
if (offset LTGT bound) goto body_bb; [*]
<bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
chunk_no++;
if (chunk < chunk_max) goto head_bb;
<exit_bb> [incoming]
V = B + ((range -/+ 1) / S +/- 1) * S [*]
[*] Needed if V live at end of loop
Note: CHUNKING & GWV mask are specified explicitly here. This is a
transition, and will be specified by a more general mechanism shortly.
*/
static void
expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
{
tree v = fd->loop.v;
enum tree_code cond_code = fd->loop.cond_code;
enum tree_code plus_code = PLUS_EXPR;
tree chunk_size = integer_minus_one_node;
tree gwv = integer_zero_node;
tree iter_type = TREE_TYPE (v);
tree diff_type = iter_type;
tree plus_type = iter_type;
struct oacc_collapse *counts = NULL;
gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
== GF_OMP_FOR_KIND_OACC_LOOP);
gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
if (POINTER_TYPE_P (iter_type))
{
plus_code = POINTER_PLUS_EXPR;
plus_type = sizetype;
}
if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
diff_type = signed_type_for (diff_type);
basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
basic_block bottom_bb = NULL;
/* entry_bb has two sucessors; the branch edge is to the exit
block, fallthrough edge to body. */
gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
&& BRANCH_EDGE (entry_bb)->dest == exit_bb);
/* If cont_bb non-NULL, it has 2 successors. The branch successor is
body_bb, or to a block whose only successor is the body_bb. Its
fallthrough successor is the final block (same as the branch
successor of the entry_bb). */
if (cont_bb)
{
basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
basic_block bed = BRANCH_EDGE (cont_bb)->dest;
gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
}
else
gcc_assert (!gimple_in_ssa_p (cfun));
/* The exit block only has entry_bb and cont_bb as predecessors. */
gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
tree chunk_no;
tree chunk_max = NULL_TREE;
tree bound, offset;
tree step = create_tmp_var (diff_type, ".step");
bool up = cond_code == LT_EXPR;
tree dir = build_int_cst (diff_type, up ? +1 : -1);
bool chunking = !gimple_in_ssa_p (cfun);;
bool negating;
/* SSA instances. */
tree offset_incr = NULL_TREE;
tree offset_init = NULL_TREE;
gimple_stmt_iterator gsi;
gassign *ass;
gcall *call;
gimple *stmt;
tree expr;
location_t loc;
edge split, be, fte;
/* Split the end of entry_bb to create head_bb. */
split = split_block (entry_bb, last_stmt (entry_bb));
basic_block head_bb = split->dest;
entry_bb = split->src;
/* Chunk setup goes at end of entry_bb, replacing the omp_for. */
gsi = gsi_last_bb (entry_bb);
gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
loc = gimple_location (for_stmt);
if (gimple_in_ssa_p (cfun))
{
offset_init = gimple_omp_for_index (for_stmt, 0);
gcc_assert (integer_zerop (fd->loop.n1));
/* The SSA parallelizer does gang parallelism. */
gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
}
if (fd->collapse > 1)
{
counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
tree total = expand_oacc_collapse_init (fd, &gsi, counts,
TREE_TYPE (fd->loop.n2));
if (SSA_VAR_P (fd->loop.n2))
{
total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
true, GSI_SAME_STMT);
ass = gimple_build_assign (fd->loop.n2, total);
gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
}
}
tree b = fd->loop.n1;
tree e = fd->loop.n2;
tree s = fd->loop.step;
b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
/* Convert the step, avoiding possible unsigned->signed overflow. */
negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
if (negating)
s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
s = fold_convert (diff_type, s);
if (negating)
s = fold_build1 (NEGATE_EXPR, diff_type, s);
s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
if (!chunking)
chunk_size = integer_zero_node;
expr = fold_convert (diff_type, chunk_size);
chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
NULL_TREE, true, GSI_SAME_STMT);
/* Determine the range, avoiding possible unsigned->signed overflow. */
negating = !up && TYPE_UNSIGNED (iter_type);
expr = fold_build2 (MINUS_EXPR, plus_type,
fold_convert (plus_type, negating ? b : e),
fold_convert (plus_type, negating ? e : b));
expr = fold_convert (diff_type, expr);
if (negating)
expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
tree range = force_gimple_operand_gsi (&gsi, expr, true,
NULL_TREE, true, GSI_SAME_STMT);
chunk_no = build_int_cst (diff_type, 0);
if (chunking)
{
gcc_assert (!gimple_in_ssa_p (cfun));
expr = chunk_no;
chunk_max = create_tmp_var (diff_type, ".chunk_max");
chunk_no = create_tmp_var (diff_type, ".chunk_no");
ass = gimple_build_assign (chunk_no, expr);
gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
build_int_cst (integer_type_node,
IFN_GOACC_LOOP_CHUNKS),
dir, range, s, chunk_size, gwv);
gimple_call_set_lhs (call, chunk_max);
gimple_set_location (call, loc);
gsi_insert_before (&gsi, call, GSI_SAME_STMT);
}
else
chunk_size = chunk_no;
call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
build_int_cst (integer_type_node,
IFN_GOACC_LOOP_STEP),
dir, range, s, chunk_size, gwv);
gimple_call_set_lhs (call, step);
gimple_set_location (call, loc);
gsi_insert_before (&gsi, call, GSI_SAME_STMT);
/* Remove the GIMPLE_OMP_FOR. */
gsi_remove (&gsi, true);
/* Fixup edges from head_bb */
be = BRANCH_EDGE (head_bb);
fte = FALLTHRU_EDGE (head_bb);
be->flags |= EDGE_FALSE_VALUE;
fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
basic_block body_bb = fte->dest;
if (gimple_in_ssa_p (cfun))
{
gsi = gsi_last_bb (cont_bb);
gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
offset = gimple_omp_continue_control_use (cont_stmt);
offset_incr = gimple_omp_continue_control_def (cont_stmt);
}
else
{
offset = create_tmp_var (diff_type, ".offset");
offset_init = offset_incr = offset;
}
bound = create_tmp_var (TREE_TYPE (offset), ".bound");
/* Loop offset & bound go into head_bb. */
gsi = gsi_start_bb (head_bb);
call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
build_int_cst (integer_type_node,
IFN_GOACC_LOOP_OFFSET),
dir, range, s,
chunk_size, gwv, chunk_no);
gimple_call_set_lhs (call, offset_init);
gimple_set_location (call, loc);
gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
build_int_cst (integer_type_node,
IFN_GOACC_LOOP_BOUND),
dir, range, s,
chunk_size, gwv, offset_init);
gimple_call_set_lhs (call, bound);
gimple_set_location (call, loc);
gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
expr = build2 (cond_code, boolean_type_node, offset_init, bound);
gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
GSI_CONTINUE_LINKING);
/* V assignment goes into body_bb. */
if (!gimple_in_ssa_p (cfun))
{
gsi = gsi_start_bb (body_bb);
expr = build2 (plus_code, iter_type, b,
fold_convert (plus_type, offset));
expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
true, GSI_SAME_STMT);
ass = gimple_build_assign (v, expr);
gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
if (fd->collapse > 1)
expand_oacc_collapse_vars (fd, &gsi, counts, v);
}
/* Loop increment goes into cont_bb. If this is not a loop, we
will have spawned threads as if it was, and each one will
execute one iteration. The specification is not explicit about
whether such constructs are ill-formed or not, and they can
occur, especially when noreturn routines are involved. */
if (cont_bb)
{
gsi = gsi_last_bb (cont_bb);
gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
loc = gimple_location (cont_stmt);
/* Increment offset. */
if (gimple_in_ssa_p (cfun))
expr= build2 (plus_code, iter_type, offset,
fold_convert (plus_type, step));
else
expr = build2 (PLUS_EXPR, diff_type, offset, step);
expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
true, GSI_SAME_STMT);
ass = gimple_build_assign (offset_incr, expr);
gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
/* Remove the GIMPLE_OMP_CONTINUE. */
gsi_remove (&gsi, true);
/* Fixup edges from cont_bb */
be = BRANCH_EDGE (cont_bb);
fte = FALLTHRU_EDGE (cont_bb);
be->flags |= EDGE_TRUE_VALUE;
fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
if (chunking)
{
/* Split the beginning of exit_bb to make bottom_bb. We
need to insert a nop at the start, because splitting is
after a stmt, not before. */
gsi = gsi_start_bb (exit_bb);
stmt = gimple_build_nop ();
gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
split = split_block (exit_bb, stmt);
bottom_bb = split->src;
exit_bb = split->dest;
gsi = gsi_last_bb (bottom_bb);
/* Chunk increment and test goes into bottom_bb. */
expr = build2 (PLUS_EXPR, diff_type, chunk_no,
build_int_cst (diff_type, 1));
ass = gimple_build_assign (chunk_no, expr);
gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
/* Chunk test at end of bottom_bb. */
expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
GSI_CONTINUE_LINKING);
/* Fixup edges from bottom_bb. */
split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
}
}
gsi = gsi_last_bb (exit_bb);
gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
loc = gimple_location (gsi_stmt (gsi));
if (!gimple_in_ssa_p (cfun))
{
/* Insert the final value of V, in case it is live. This is the
value for the only thread that survives past the join. */
expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
true, GSI_SAME_STMT);
ass = gimple_build_assign (v, expr);
gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
}
/* Remove the OMP_RETURN. */
gsi_remove (&gsi, true);
if (cont_bb)
{
/* We now have one or two nested loops. Update the loop
structures. */
struct loop *parent = entry_bb->loop_father;
struct loop *body = body_bb->loop_father;
if (chunking)
{
struct loop *chunk_loop = alloc_loop ();
chunk_loop->header = head_bb;
chunk_loop->latch = bottom_bb;
add_loop (chunk_loop, parent);
parent = chunk_loop;
}
else if (parent != body)
{
gcc_assert (body->header == body_bb);
gcc_assert (body->latch == cont_bb
|| single_pred (body->latch) == cont_bb);
parent = NULL;
}
if (parent)
{
struct loop *body_loop = alloc_loop ();
body_loop->header = body_bb;
body_loop->latch = cont_bb;
add_loop (body_loop, parent);
}
}
}
/* Expand the OMP loop defined by REGION. */
static void
......@@ -10324,6 +11084,11 @@ expand_omp_for (struct omp_region *region, gimple *inner_stmt)
expand_omp_simd (region, &fd);
else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_CILKFOR)
expand_cilk_for (region, &fd);
else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
{
gcc_assert (!inner_stmt);
expand_oacc_for (region, &fd);
}
else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
{
if (gimple_omp_for_combined_into_p (fd.for_stmt))
......@@ -13521,6 +14286,7 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx)
gomp_for *stmt = as_a <gomp_for *> (gsi_stmt (*gsi_p));
gbind *new_stmt;
gimple_seq omp_for_body, body, dlist;
gimple_seq oacc_head = NULL, oacc_tail = NULL;
size_t i;
push_gimplify_context ();
......@@ -13629,6 +14395,16 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx)
/* Once lowered, extract the bounds and clauses. */
extract_omp_for_data (stmt, &fd, NULL);
if (is_gimple_omp_oacc (ctx->stmt)
&& !ctx_in_oacc_kernels_region (ctx))
lower_oacc_head_tail (gimple_location (stmt),
gimple_omp_for_clauses (stmt),
&oacc_head, &oacc_tail, ctx);
/* Add OpenACC partitioning markers just before the loop */
if (oacc_head)
gimple_seq_add_seq (&body, oacc_head);
lower_omp_for_lastprivate (&fd, &body, &dlist, ctx);
if (gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_FOR)
......@@ -13662,6 +14438,11 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx)
/* Region exit marker goes at the end of the loop body. */
gimple_seq_add_stmt (&body, gimple_build_omp_return (fd.have_nowait));
maybe_add_implicit_barrier_cancel (ctx, &body);
/* Add OpenACC joining and reduction markers just after the loop. */
if (oacc_tail)
gimple_seq_add_seq (&body, oacc_tail);
pop_gimplify_context (new_stmt);
gimple_bind_append_vars (new_stmt, ctx->block_vars);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment