Commit 1612b1fe by Jakub Jelinek Committed by Jakub Jelinek

omp-low.c (lower_rec_simd_input_clauses): Add rvar2 argument...

	* omp-low.c (lower_rec_simd_input_clauses): Add rvar2 argument,
	create another "omp scan inscan exclusive" array if
	!ctx->scan_inclusive.
	(lower_rec_input_clauses): Handle exclusive scan inscan reductions.
	(lower_omp_scan): Likewise.
	* tree-vectorizer.h (struct _stmt_vec_info): Use 3-bit instead of
	2-bit bitfield for simd_lane_access_p member.
	* tree-vect-data-refs.c (vect_analyze_data_refs): Also handle
	aux == (void *)-4 as simd lane access.
	* tree-vect-stmts.c (check_scan_store): Handle exclusive scan.  Update
	comment with permutations to show the canonical permutation order.
	(vectorizable_scan_store): Handle exclusive scan.
	(vectorizable_store): Call vectorizable_scan_store even for
	STMT_VINFO_SIMD_LANE_ACCESS_P > 3.

	* gcc.dg/vect/vect-simd-12.c: New test.
	* gcc.dg/vect/vect-simd-13.c: New test.
	* gcc.dg/vect/vect-simd-14.c: New test.
	* gcc.dg/vect/vect-simd-15.c: New test.
	* gcc.target/i386/sse2-vect-simd-12.c: New test.
	* gcc.target/i386/sse2-vect-simd-13.c: New test.
	* gcc.target/i386/sse2-vect-simd-14.c: New test.
	* gcc.target/i386/sse2-vect-simd-15.c: New test.
	* gcc.target/i386/avx2-vect-simd-12.c: New test.
	* gcc.target/i386/avx2-vect-simd-13.c: New test.
	* gcc.target/i386/avx2-vect-simd-14.c: New test.
	* gcc.target/i386/avx2-vect-simd-15.c: New test.
	* gcc.target/i386/avx512f-vect-simd-12.c: New test.
	* gcc.target/i386/avx512f-vect-simd-13.c: New test.
	* gcc.target/i386/avx512f-vect-simd-14.c: New test.
	* gcc.target/i386/avx512bw-vect-simd-15.c: New test.
	* g++.dg/vect/simd-6.cc: New test.
	* g++.dg/vect/simd-7.cc: New test.
	* g++.dg/vect/simd-8.cc: New test.
	* g++.dg/vect/simd-9.cc: New test.
	* c-c++-common/gomp/scan-2.c: Don't expect any diagnostics.

From-SVN: r272544
parent e73fb06d
2019-06-21 Jakub Jelinek <jakub@redhat.com> 2019-06-21 Jakub Jelinek <jakub@redhat.com>
* omp-low.c (lower_rec_simd_input_clauses): Add rvar2 argument,
create another "omp scan inscan exclusive" array if
!ctx->scan_inclusive.
(lower_rec_input_clauses): Handle exclusive scan inscan reductions.
(lower_omp_scan): Likewise.
* tree-vectorizer.h (struct _stmt_vec_info): Use 3-bit instead of
2-bit bitfield for simd_lane_access_p member.
* tree-vect-data-refs.c (vect_analyze_data_refs): Also handle
aux == (void *)-4 as simd lane access.
* tree-vect-stmts.c (check_scan_store): Handle exclusive scan. Update
comment with permutations to show the canonical permutation order.
(vectorizable_scan_store): Handle exclusive scan.
(vectorizable_store): Call vectorizable_scan_store even for
STMT_VINFO_SIMD_LANE_ACCESS_P > 3.
* tree-vect-data-refs.c (vect_find_stmt_data_reference): Handle * tree-vect-data-refs.c (vect_find_stmt_data_reference): Handle
"omp simd array" arrays with one byte elements. "omp simd array" arrays with one byte elements.
......
...@@ -3692,7 +3692,8 @@ struct omplow_simd_context { ...@@ -3692,7 +3692,8 @@ struct omplow_simd_context {
static bool static bool
lower_rec_simd_input_clauses (tree new_var, omp_context *ctx, lower_rec_simd_input_clauses (tree new_var, omp_context *ctx,
omplow_simd_context *sctx, tree &ivar, omplow_simd_context *sctx, tree &ivar,
tree &lvar, tree *rvar = NULL) tree &lvar, tree *rvar = NULL,
tree *rvar2 = NULL)
{ {
if (known_eq (sctx->max_vf, 0U)) if (known_eq (sctx->max_vf, 0U))
{ {
...@@ -3767,6 +3768,25 @@ lower_rec_simd_input_clauses (tree new_var, omp_context *ctx, ...@@ -3767,6 +3768,25 @@ lower_rec_simd_input_clauses (tree new_var, omp_context *ctx,
*rvar = build4 (ARRAY_REF, TREE_TYPE (new_var), iavar, *rvar = build4 (ARRAY_REF, TREE_TYPE (new_var), iavar,
sctx->lastlane, NULL_TREE, NULL_TREE); sctx->lastlane, NULL_TREE, NULL_TREE);
TREE_THIS_NOTRAP (*rvar) = 1; TREE_THIS_NOTRAP (*rvar) = 1;
if (!ctx->scan_inclusive)
{
/* And for exclusive scan yet another one, which will
hold the value during the scan phase. */
tree savar = create_tmp_var_raw (atype);
if (TREE_ADDRESSABLE (new_var))
TREE_ADDRESSABLE (savar) = 1;
DECL_ATTRIBUTES (savar)
= tree_cons (get_identifier ("omp simd array"), NULL,
tree_cons (get_identifier ("omp simd inscan "
"exclusive"), NULL,
DECL_ATTRIBUTES (savar)));
gimple_add_tmp_var (savar);
ctx->cb.decl_map->put (iavar, savar);
*rvar2 = build4 (ARRAY_REF, TREE_TYPE (new_var), savar,
sctx->idx, NULL_TREE, NULL_TREE);
TREE_THIS_NOTRAP (*rvar2) = 1;
}
} }
ivar = build4 (ARRAY_REF, TREE_TYPE (new_var), iavar, sctx->idx, ivar = build4 (ARRAY_REF, TREE_TYPE (new_var), iavar, sctx->idx,
NULL_TREE, NULL_TREE); NULL_TREE, NULL_TREE);
...@@ -5185,14 +5205,15 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, ...@@ -5185,14 +5205,15 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
new_vard = TREE_OPERAND (new_var, 0); new_vard = TREE_OPERAND (new_var, 0);
gcc_assert (DECL_P (new_vard)); gcc_assert (DECL_P (new_vard));
} }
tree rvar = NULL_TREE, *rvarp = NULL; tree rvar = NULL_TREE, *rvarp = NULL, rvar2 = NULL_TREE;
if (is_simd if (is_simd
&& OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION && OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
&& OMP_CLAUSE_REDUCTION_INSCAN (c)) && OMP_CLAUSE_REDUCTION_INSCAN (c))
rvarp = &rvar; rvarp = &rvar;
if (is_simd if (is_simd
&& lower_rec_simd_input_clauses (new_var, ctx, &sctx, && lower_rec_simd_input_clauses (new_var, ctx, &sctx,
ivar, lvar, rvarp)) ivar, lvar, rvarp,
&rvar2))
{ {
if (new_vard == new_var) if (new_vard == new_var)
{ {
...@@ -5220,6 +5241,14 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, ...@@ -5220,6 +5241,14 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
(c, ivar2, build_outer_var_ref (var, ctx)); (c, ivar2, build_outer_var_ref (var, ctx));
gimplify_and_add (x, &llist[0]); gimplify_and_add (x, &llist[0]);
if (rvar2)
{
x = lang_hooks.decls.omp_clause_default_ctor
(c, unshare_expr (rvar2),
build_outer_var_ref (var, ctx));
gimplify_and_add (x, &llist[0]);
}
/* For types that need construction, add another /* For types that need construction, add another
private var which will be default constructed private var which will be default constructed
and optionally initialized with and optionally initialized with
...@@ -5229,7 +5258,9 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, ...@@ -5229,7 +5258,9 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
iteration. */ iteration. */
tree nv = create_tmp_var_raw (TREE_TYPE (ivar)); tree nv = create_tmp_var_raw (TREE_TYPE (ivar));
gimple_add_tmp_var (nv); gimple_add_tmp_var (nv);
ctx->cb.decl_map->put (TREE_OPERAND (ivar, 0), ctx->cb.decl_map->put (TREE_OPERAND (rvar2
? rvar2
: ivar, 0),
nv); nv);
x = lang_hooks.decls.omp_clause_default_ctor x = lang_hooks.decls.omp_clause_default_ctor
(c, nv, build_outer_var_ref (var, ctx)); (c, nv, build_outer_var_ref (var, ctx));
...@@ -5296,6 +5327,18 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, ...@@ -5296,6 +5327,18 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
gimplify_stmt (&dtor, &tseq); gimplify_stmt (&dtor, &tseq);
gimple_seq_add_seq (&llist[1], tseq); gimple_seq_add_seq (&llist[1], tseq);
} }
if (rvar2)
{
x = lang_hooks.decls.omp_clause_dtor (c, rvar2);
if (x)
{
tseq = NULL;
dtor = x;
gimplify_stmt (&dtor, &tseq);
gimple_seq_add_seq (&llist[1], tseq);
}
}
break; break;
} }
if (x) if (x)
...@@ -5390,6 +5433,24 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, ...@@ -5390,6 +5433,24 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
gimple_seq_add_seq (ilist, tseq); gimple_seq_add_seq (ilist, tseq);
} }
OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL; OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL;
if (!ctx->scan_inclusive)
{
tree nv2
= create_tmp_var_raw (TREE_TYPE (new_var));
gimple_add_tmp_var (nv2);
ctx->cb.decl_map->put (nv, nv2);
x = lang_hooks.decls.omp_clause_default_ctor
(c, nv2, build_outer_var_ref (var, ctx));
gimplify_and_add (x, ilist);
x = lang_hooks.decls.omp_clause_dtor (c, nv2);
if (x)
{
tseq = NULL;
dtor = x;
gimplify_stmt (&dtor, &tseq);
gimple_seq_add_seq (dlist, tseq);
}
}
x = lang_hooks.decls.omp_clause_dtor (c, nv); x = lang_hooks.decls.omp_clause_dtor (c, nv);
if (x) if (x)
{ {
...@@ -5399,6 +5460,21 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, ...@@ -5399,6 +5460,21 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
gimple_seq_add_seq (dlist, tseq); gimple_seq_add_seq (dlist, tseq);
} }
} }
else if (!ctx->scan_inclusive
&& TREE_ADDRESSABLE (TREE_TYPE (new_var)))
{
tree nv2 = create_tmp_var_raw (TREE_TYPE (new_var));
gimple_add_tmp_var (nv2);
ctx->cb.decl_map->put (new_vard, nv2);
x = lang_hooks.decls.omp_clause_dtor (c, nv2);
if (x)
{
tseq = NULL;
dtor = x;
gimplify_stmt (&dtor, &tseq);
gimple_seq_add_seq (dlist, tseq);
}
}
DECL_HAS_VALUE_EXPR_P (placeholder) = 0; DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
goto do_dtor; goto do_dtor;
} }
...@@ -5487,14 +5563,15 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, ...@@ -5487,14 +5563,15 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
new_vard = TREE_OPERAND (new_var, 0); new_vard = TREE_OPERAND (new_var, 0);
gcc_assert (DECL_P (new_vard)); gcc_assert (DECL_P (new_vard));
} }
tree rvar = NULL_TREE, *rvarp = NULL; tree rvar = NULL_TREE, *rvarp = NULL, rvar2 = NULL_TREE;
if (is_simd if (is_simd
&& OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION && OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
&& OMP_CLAUSE_REDUCTION_INSCAN (c)) && OMP_CLAUSE_REDUCTION_INSCAN (c))
rvarp = &rvar; rvarp = &rvar;
if (is_simd if (is_simd
&& lower_rec_simd_input_clauses (new_var, ctx, &sctx, && lower_rec_simd_input_clauses (new_var, ctx, &sctx,
ivar, lvar, rvarp)) ivar, lvar, rvarp,
&rvar2))
{ {
if (new_vard != new_var) if (new_vard != new_var)
{ {
...@@ -8573,18 +8650,40 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx) ...@@ -8573,18 +8650,40 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx)
gimple_seq before = NULL; gimple_seq before = NULL;
omp_context *octx = ctx->outer; omp_context *octx = ctx->outer;
gcc_assert (octx); gcc_assert (octx);
if (!octx->scan_inclusive && !has_clauses)
{
gimple_stmt_iterator gsi2 = *gsi_p;
gsi_next (&gsi2);
gimple *stmt2 = gsi_stmt (gsi2);
/* For exclusive scan, swap GIMPLE_OMP_SCAN without clauses
with following GIMPLE_OMP_SCAN with clauses, so that input_phase,
the one with exclusive clause(s), comes first. */
if (stmt2
&& gimple_code (stmt2) == GIMPLE_OMP_SCAN
&& gimple_omp_scan_clauses (as_a <gomp_scan *> (stmt2)) != NULL)
{
gsi_remove (gsi_p, false);
gsi_insert_after (gsi_p, stmt, GSI_SAME_STMT);
ctx = maybe_lookup_ctx (stmt2);
gcc_assert (ctx);
lower_omp_scan (gsi_p, ctx);
return;
}
}
bool input_phase = has_clauses ^ octx->scan_inclusive; bool input_phase = has_clauses ^ octx->scan_inclusive;
if (gimple_code (octx->stmt) == GIMPLE_OMP_FOR if (gimple_code (octx->stmt) == GIMPLE_OMP_FOR
&& (gimple_omp_for_kind (octx->stmt) & GF_OMP_FOR_SIMD) && (gimple_omp_for_kind (octx->stmt) & GF_OMP_FOR_SIMD)
&& !gimple_omp_for_combined_into_p (octx->stmt) && !gimple_omp_for_combined_into_p (octx->stmt))
&& octx->scan_inclusive)
{ {
if (tree c = omp_find_clause (gimple_omp_for_clauses (octx->stmt), if (tree c = omp_find_clause (gimple_omp_for_clauses (octx->stmt),
OMP_CLAUSE__SIMDUID_)) OMP_CLAUSE__SIMDUID_))
{ {
tree uid = OMP_CLAUSE__SIMDUID__DECL (c); tree uid = OMP_CLAUSE__SIMDUID__DECL (c);
lane = create_tmp_var (unsigned_type_node); lane = create_tmp_var (unsigned_type_node);
tree t = build_int_cst (integer_type_node, 1 + !input_phase); tree t = build_int_cst (integer_type_node,
input_phase ? 1
: octx->scan_inclusive ? 2 : 3);
gimple *g gimple *g
= gimple_build_call_internal (IFN_GOMP_SIMD_LANE, 2, uid, t); = gimple_build_call_internal (IFN_GOMP_SIMD_LANE, 2, uid, t);
gimple_call_set_lhs (g, lane); gimple_call_set_lhs (g, lane);
...@@ -8601,6 +8700,8 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx) ...@@ -8601,6 +8700,8 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx)
tree val = new_var; tree val = new_var;
tree var2 = NULL_TREE; tree var2 = NULL_TREE;
tree var3 = NULL_TREE; tree var3 = NULL_TREE;
tree var4 = NULL_TREE;
tree lane0 = NULL_TREE;
tree new_vard = new_var; tree new_vard = new_var;
if (omp_is_reference (var)) if (omp_is_reference (var))
{ {
...@@ -8623,16 +8724,26 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx) ...@@ -8623,16 +8724,26 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx)
DECL_ATTRIBUTES (v))) DECL_ATTRIBUTES (v)))
{ {
val = unshare_expr (val); val = unshare_expr (val);
lane0 = TREE_OPERAND (val, 1);
TREE_OPERAND (val, 1) = lane; TREE_OPERAND (val, 1) = lane;
var2 = lookup_decl (v, octx); var2 = lookup_decl (v, octx);
if (!octx->scan_inclusive)
var4 = lookup_decl (var2, octx);
if (input_phase if (input_phase
&& OMP_CLAUSE_REDUCTION_PLACEHOLDER (c)) && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
var3 = maybe_lookup_decl (var2, octx); var3 = maybe_lookup_decl (var4 ? var4 : var2, octx);
if (!input_phase) if (!input_phase)
{ {
var2 = build4 (ARRAY_REF, TREE_TYPE (val), var2 = build4 (ARRAY_REF, TREE_TYPE (val),
var2, lane, NULL_TREE, NULL_TREE); var2, lane, NULL_TREE, NULL_TREE);
TREE_THIS_NOTRAP (var2) = 1; TREE_THIS_NOTRAP (var2) = 1;
if (!octx->scan_inclusive)
{
var4 = build4 (ARRAY_REF, TREE_TYPE (val),
var4, lane, NULL_TREE,
NULL_TREE);
TREE_THIS_NOTRAP (var4) = 1;
}
} }
else else
var2 = val; var2 = val;
...@@ -8643,12 +8754,28 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx) ...@@ -8643,12 +8754,28 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx)
else else
{ {
var2 = build_outer_var_ref (var, octx); var2 = build_outer_var_ref (var, octx);
if (input_phase && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c)) if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
{ {
var3 = maybe_lookup_decl (new_vard, octx); var3 = maybe_lookup_decl (new_vard, octx);
if (var3 == new_vard) if (var3 == new_vard || var3 == NULL_TREE)
var3 = NULL_TREE;
else if (!octx->scan_inclusive && !input_phase)
{
var4 = maybe_lookup_decl (var3, octx);
if (var4 == var3 || var4 == NULL_TREE)
{
if (TREE_ADDRESSABLE (TREE_TYPE (new_var)))
{
var4 = var3;
var3 = NULL_TREE; var3 = NULL_TREE;
} }
else
var4 = NULL_TREE;
}
}
}
if (!octx->scan_inclusive && !input_phase && var4 == NULL_TREE)
var4 = create_tmp_var (TREE_TYPE (val));
} }
if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c)) if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
{ {
...@@ -8689,8 +8816,16 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx) ...@@ -8689,8 +8816,16 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx)
} }
else else
{ {
tree x;
if (!octx->scan_inclusive)
{
tree v4 = unshare_expr (var4);
tree v2 = unshare_expr (var2);
x = lang_hooks.decls.omp_clause_assign_op (c, v4, v2);
gimplify_and_add (x, &before);
}
gimple_seq tseq = OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c); gimple_seq tseq = OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c);
tree x = (DECL_HAS_VALUE_EXPR_P (new_vard) x = (DECL_HAS_VALUE_EXPR_P (new_vard)
? DECL_VALUE_EXPR (new_vard) : NULL_TREE); ? DECL_VALUE_EXPR (new_vard) : NULL_TREE);
tree vexpr = val; tree vexpr = val;
if (x && omp_is_reference (var)) if (x && omp_is_reference (var))
...@@ -8706,9 +8841,19 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx) ...@@ -8706,9 +8841,19 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx)
SET_DECL_VALUE_EXPR (new_vard, x); SET_DECL_VALUE_EXPR (new_vard, x);
SET_DECL_VALUE_EXPR (placeholder, NULL_TREE); SET_DECL_VALUE_EXPR (placeholder, NULL_TREE);
DECL_HAS_VALUE_EXPR_P (placeholder) = 0; DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
x = lang_hooks.decls.omp_clause_assign_op (c, val, var2); if (octx->scan_inclusive)
{
x = lang_hooks.decls.omp_clause_assign_op (c, val,
var2);
gimplify_and_add (x, &before); gimplify_and_add (x, &before);
} }
else if (lane0 == NULL_TREE)
{
x = lang_hooks.decls.omp_clause_assign_op (c, val,
var4);
gimplify_and_add (x, &before);
}
}
} }
else else
{ {
...@@ -8728,9 +8873,28 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx) ...@@ -8728,9 +8873,28 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx)
tree x = build2 (code, TREE_TYPE (var2), tree x = build2 (code, TREE_TYPE (var2),
unshare_expr (var2), unshare_expr (val)); unshare_expr (var2), unshare_expr (val));
if (octx->scan_inclusive)
{
gimplify_assign (unshare_expr (var2), x, &before); gimplify_assign (unshare_expr (var2), x, &before);
gimplify_assign (val, var2, &before); gimplify_assign (val, var2, &before);
} }
else
{
gimplify_assign (unshare_expr (var4),
unshare_expr (var2), &before);
gimplify_assign (var2, x, &before);
if (lane0 == NULL_TREE)
gimplify_assign (val, var4, &before);
}
}
}
if (!octx->scan_inclusive && !input_phase && lane0)
{
tree vexpr = unshare_expr (var4);
TREE_OPERAND (vexpr, 1) = lane0;
if (omp_is_reference (var))
vexpr = build_fold_addr_expr_loc (clause_loc, vexpr);
SET_DECL_VALUE_EXPR (new_vard, vexpr);
} }
} }
} }
......
2019-06-21 Jakub Jelinek <jakub@redhat.com> 2019-06-21 Jakub Jelinek <jakub@redhat.com>
* gcc.dg/vect/vect-simd-12.c: New test.
* gcc.dg/vect/vect-simd-13.c: New test.
* gcc.dg/vect/vect-simd-14.c: New test.
* gcc.dg/vect/vect-simd-15.c: New test.
* gcc.target/i386/sse2-vect-simd-12.c: New test.
* gcc.target/i386/sse2-vect-simd-13.c: New test.
* gcc.target/i386/sse2-vect-simd-14.c: New test.
* gcc.target/i386/sse2-vect-simd-15.c: New test.
* gcc.target/i386/avx2-vect-simd-12.c: New test.
* gcc.target/i386/avx2-vect-simd-13.c: New test.
* gcc.target/i386/avx2-vect-simd-14.c: New test.
* gcc.target/i386/avx2-vect-simd-15.c: New test.
* gcc.target/i386/avx512f-vect-simd-12.c: New test.
* gcc.target/i386/avx512f-vect-simd-13.c: New test.
* gcc.target/i386/avx512f-vect-simd-14.c: New test.
* gcc.target/i386/avx512bw-vect-simd-15.c: New test.
* g++.dg/vect/simd-6.cc: New test.
* g++.dg/vect/simd-7.cc: New test.
* g++.dg/vect/simd-8.cc: New test.
* g++.dg/vect/simd-9.cc: New test.
* c-c++-common/gomp/scan-2.c: Don't expect any diagnostics.
PR c++/90950 PR c++/90950
* g++.dg/gomp/lastprivate-1.C: New test. * g++.dg/gomp/lastprivate-1.C: New test.
......
...@@ -8,7 +8,7 @@ f1 (int *c, int *d) ...@@ -8,7 +8,7 @@ f1 (int *c, int *d)
for (i = 0; i < 64; i++) for (i = 0; i < 64; i++)
{ {
d[i] = a; d[i] = a;
#pragma omp scan exclusive (a) /* { dg-message "sorry, unimplemented: '#pragma omp scan' not supported yet" } */ #pragma omp scan exclusive (a)
a += c[i]; a += c[i];
} }
} }
// { dg-require-effective-target size32plus }
// { dg-additional-options "-fopenmp-simd" }
// { dg-additional-options "-mavx" { target avx_runtime } }
// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { xfail *-*-* } } }
#include "../../gcc.dg/vect/tree-vect.h"
template <typename T>
struct S {
inline S ();
inline ~S ();
inline S (const S &);
inline S & operator= (const S &);
T s;
};
template <typename T>
S<T>::S () : s (0)
{
}
template <typename T>
S<T>::~S ()
{
}
template <typename T>
S<T>::S (const S &x)
{
s = x.s;
}
template <typename T>
S<T> &
S<T>::operator= (const S &x)
{
s = x.s;
return *this;
}
template <typename T>
static inline void
ini (S<T> &x)
{
x.s = 0;
}
S<int> r, a[1024], b[1024];
#pragma omp declare reduction (+: S<int>: omp_out.s += omp_in.s)
#pragma omp declare reduction (plus: S<int>: omp_out.s += omp_in.s) initializer (ini (omp_priv))
template <typename T>
__attribute__((noipa)) void
foo (S<T> *a, S<T> *b)
{
#pragma omp simd reduction (inscan, +:r)
for (int i = 0; i < 1024; i++)
{
b[i] = r;
#pragma omp scan exclusive(r)
r.s += a[i].s;
}
}
template <typename T>
__attribute__((noipa)) S<T>
bar (void)
{
S<T> s;
#pragma omp simd reduction (inscan, plus:s)
for (int i = 0; i < 1024; i++)
{
b[i] = s;
#pragma omp scan exclusive(s)
s.s += 2 * a[i].s;
}
return S<T> (s);
}
__attribute__((noipa)) void
baz (S<int> *a, S<int> *b)
{
#pragma omp simd reduction (inscan, +:r) simdlen(1)
for (int i = 0; i < 1024; i++)
{
b[i] = r;
#pragma omp scan exclusive(r)
r.s += a[i].s;
}
}
__attribute__((noipa)) S<int>
qux (void)
{
S<int> s;
#pragma omp simd if (0) reduction (inscan, plus:s)
for (int i = 0; i < 1024; i++)
{
b[i] = s;
#pragma omp scan exclusive(s)
s.s += 2 * a[i].s;
}
return S<int> (s);
}
int
main ()
{
S<int> s;
check_vect ();
for (int i = 0; i < 1024; ++i)
{
a[i].s = i;
b[i].s = -1;
asm ("" : "+g" (i));
}
foo (a, b);
if (r.s != 1024 * 1023 / 2)
abort ();
for (int i = 0; i < 1024; ++i)
{
if (b[i].s != s.s)
abort ();
else
b[i].s = 25;
s.s += i;
}
if (bar<int> ().s != 1024 * 1023)
abort ();
s.s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i].s != s.s)
abort ();
s.s += 2 * i;
}
r.s = 0;
baz (a, b);
if (r.s != 1024 * 1023 / 2)
abort ();
s.s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i].s != s.s)
abort ();
else
b[i].s = 25;
s.s += i;
}
if (qux ().s != 1024 * 1023)
abort ();
s.s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i].s != s.s)
abort ();
s.s += 2 * i;
}
return 0;
}
// { dg-require-effective-target size32plus }
// { dg-additional-options "-fopenmp-simd" }
// { dg-additional-options "-mavx" { target avx_runtime } }
// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */
#include "../../gcc.dg/vect/tree-vect.h"
int r, a[1024], b[1024], q;
template <typename T, typename U>
__attribute__((noipa)) void
foo (T a, T b, U r)
{
#pragma omp simd reduction (inscan, +:r)
for (int i = 0; i < 1024; i++)
{
b[i] = r;
#pragma omp scan exclusive(r)
r += a[i];
}
}
template <typename T>
__attribute__((noipa)) T
bar (void)
{
T &s = q;
q = 0;
#pragma omp simd reduction (inscan, +:s)
for (int i = 0; i < 1024; i++)
{
b[i] = s;
#pragma omp scan exclusive(s)
s += 2 * a[i];
}
return s;
}
template <typename T>
__attribute__((noipa)) void
baz (T *a, T *b, T &r)
{
#pragma omp simd reduction (inscan, +:r) if (simd: 0)
for (T i = 0; i < 1024; i++)
{
b[i] = r;
#pragma omp scan exclusive(r)
r += a[i];
}
}
template <typename T>
__attribute__((noipa)) int
qux (void)
{
T s = q;
q = 0;
#pragma omp simd reduction (inscan, +:s) simdlen (1)
for (int i = 0; i < 1024; i++)
{
b[i] = s;
#pragma omp scan exclusive(s)
s += 2 * a[i];
}
return s;
}
int
main ()
{
int s = 0;
check_vect ();
for (int i = 0; i < 1024; ++i)
{
a[i] = i;
b[i] = -1;
asm ("" : "+g" (i));
}
foo<int *, int &> (a, b, r);
if (r != 1024 * 1023 / 2)
abort ();
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
else
b[i] = 25;
s += i;
}
if (bar<int> () != 1024 * 1023)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
else
b[i] = -1;
s += 2 * i;
}
r = 0;
baz<int> (a, b, r);
if (r != 1024 * 1023 / 2)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
else
b[i] = -25;
s += i;
}
if (qux<int &> () != 1024 * 1023)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
s += 2 * i;
}
return 0;
}
// { dg-require-effective-target size32plus }
// { dg-additional-options "-fopenmp-simd" }
// { dg-additional-options "-mavx" { target avx_runtime } }
// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } }
#include "../../gcc.dg/vect/tree-vect.h"
int r, a[1024], b[1024], q;
#pragma omp declare reduction (foo: int: omp_out += omp_in) initializer (omp_priv = 0)
__attribute__((noipa)) void
foo (int *a, int *b, int &r)
{
#pragma omp simd reduction (inscan, foo:r)
for (int i = 0; i < 1024; i++)
{
b[i] = r;
#pragma omp scan exclusive(r)
r += a[i];
}
}
__attribute__((noipa)) int
bar (void)
{
int &s = q;
q = 0;
#pragma omp simd reduction (inscan, foo:s)
for (int i = 0; i < 1024; i++)
{
b[i] = s;
#pragma omp scan exclusive(s)
s += 2 * a[i];
}
return s;
}
__attribute__((noipa)) void
baz (int *a, int *b, int &r)
{
#pragma omp simd reduction (inscan, foo:r) if (simd: 0)
for (int i = 0; i < 1024; i++)
{
b[i] = r;
#pragma omp scan exclusive(r)
r += a[i];
}
}
__attribute__((noipa)) int
qux (void)
{
int &s = q;
q = 0;
#pragma omp simd reduction (inscan, foo:s) simdlen (1)
for (int i = 0; i < 1024; i++)
{
b[i] = s;
#pragma omp scan exclusive(s)
s += 2 * a[i];
}
return s;
}
int
main ()
{
int s = 0;
check_vect ();
for (int i = 0; i < 1024; ++i)
{
a[i] = i;
b[i] = -1;
asm ("" : "+g" (i));
}
foo (a, b, r);
if (r != 1024 * 1023 / 2)
abort ();
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
else
b[i] = 25;
s += i;
}
if (bar () != 1024 * 1023)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
else
b[i] = -1;
s += 2 * i;
}
r = 0;
baz (a, b, r);
if (r != 1024 * 1023 / 2)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
else
b[i] = -25;
s += i;
}
if (qux () != 1024 * 1023)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
s += 2 * i;
}
return 0;
}
// { dg-require-effective-target size32plus }
// { dg-additional-options "-fopenmp-simd" }
// { dg-additional-options "-mavx" { target avx_runtime } }
// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { xfail *-*-* } } }
#include "../../gcc.dg/vect/tree-vect.h"
struct S {
inline S ();
inline ~S ();
inline S (const S &);
inline S & operator= (const S &);
int s;
};
S::S () : s (0)
{
}
S::~S ()
{
}
S::S (const S &x)
{
s = x.s;
}
S &
S::operator= (const S &x)
{
s = x.s;
return *this;
}
static inline void
ini (S &x)
{
x.s = 0;
}
S r, a[1024], b[1024];
#pragma omp declare reduction (+: S: omp_out.s += omp_in.s)
#pragma omp declare reduction (plus: S: omp_out.s += omp_in.s) initializer (ini (omp_priv))
__attribute__((noipa)) void
foo (S *a, S *b, S &r)
{
#pragma omp simd reduction (inscan, +:r)
for (int i = 0; i < 1024; i++)
{
b[i] = r;
#pragma omp scan exclusive(r)
r.s += a[i].s;
}
}
__attribute__((noipa)) S
bar (void)
{
S s;
#pragma omp simd reduction (inscan, plus:s)
for (int i = 0; i < 1024; i++)
{
b[i] = s;
#pragma omp scan exclusive(s)
s.s += 2 * a[i].s;
}
return s;
}
__attribute__((noipa)) void
baz (S *a, S *b, S &r)
{
#pragma omp simd reduction (inscan, +:r) simdlen(1)
for (int i = 0; i < 1024; i++)
{
b[i] = r;
#pragma omp scan exclusive(r)
r.s += a[i].s;
}
}
__attribute__((noipa)) S
qux (void)
{
S s;
#pragma omp simd if (0) reduction (inscan, plus:s)
for (int i = 0; i < 1024; i++)
{
b[i] = s;
#pragma omp scan exclusive(s)
s.s += 2 * a[i].s;
}
return s;
}
int
main ()
{
S s;
check_vect ();
for (int i = 0; i < 1024; ++i)
{
a[i].s = i;
b[i].s = -1;
asm ("" : "+g" (i));
}
foo (a, b, r);
if (r.s != 1024 * 1023 / 2)
abort ();
for (int i = 0; i < 1024; ++i)
{
if (b[i].s != s.s)
abort ();
else
b[i].s = 25;
s.s += i;
}
if (bar ().s != 1024 * 1023)
abort ();
s.s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i].s != s.s)
abort ();
s.s += 2 * i;
}
r.s = 0;
baz (a, b, r);
if (r.s != 1024 * 1023 / 2)
abort ();
s.s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i].s != s.s)
abort ();
else
b[i].s = 25;
s.s += i;
}
if (qux ().s != 1024 * 1023)
abort ();
s.s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i].s != s.s)
abort ();
s.s += 2 * i;
}
return 0;
}
/* { dg-require-effective-target size32plus } */
/* { dg-additional-options "-fopenmp-simd" } */
/* { dg-additional-options "-mavx" { target avx_runtime } } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */
#ifndef main
#include "tree-vect.h"
#endif
int r, a[1024], b[1024];
__attribute__((noipa)) void
foo (int *a, int *b)
{
#pragma omp simd reduction (inscan, +:r)
for (int i = 0; i < 1024; i++)
{
b[i] = r;
#pragma omp scan exclusive(r)
r += a[i];
}
}
__attribute__((noipa)) int
bar (void)
{
int s = 0;
#pragma omp simd reduction (inscan, +:s)
for (int i = 0; i < 1024; i++)
{
b[i] = s;
#pragma omp scan exclusive(s)
s += 2 * a[i];
}
return s;
}
__attribute__((noipa)) void
baz (int *a, int *b)
{
#pragma omp simd reduction (inscan, +:r) if (simd: 0)
for (int i = 0; i < 1024; i++)
{
b[i] = r;
#pragma omp scan exclusive(r)
r += a[i];
}
}
__attribute__((noipa)) int
qux (void)
{
int s = 0;
#pragma omp simd reduction (inscan, +:s) simdlen (1)
for (int i = 0; i < 1024; i++)
{
b[i] = s;
#pragma omp scan exclusive(s)
s += 2 * a[i];
}
return s;
}
int
main ()
{
int s = 0;
#ifndef main
check_vect ();
#endif
for (int i = 0; i < 1024; ++i)
{
a[i] = i;
b[i] = -1;
asm ("" : "+g" (i));
}
foo (a, b);
if (r != 1024 * 1023 / 2)
abort ();
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
else
b[i] = 25;
s += i;
}
if (bar () != 1024 * 1023)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
else
b[i] = -1;
s += 2 * i;
}
r = 0;
baz (a, b);
if (r != 1024 * 1023 / 2)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
else
b[i] = -25;
s += i;
}
if (qux () != 1024 * 1023)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
s += 2 * i;
}
return 0;
}
/* { dg-require-effective-target size32plus } */
/* { dg-additional-options "-fopenmp-simd" } */
/* { dg-additional-options "-mavx" { target avx_runtime } } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */
#ifndef main
#include "tree-vect.h"
#endif
int r, a[1024], b[1024];
#pragma omp declare reduction (foo: int: omp_out += omp_in) initializer (omp_priv = 0)
__attribute__((noipa)) void
foo (int *a, int *b)
{
#pragma omp simd reduction (inscan, foo:r)
for (int i = 0; i < 1024; i++)
{
b[i] = r;
#pragma omp scan exclusive(r)
r += a[i];
}
}
__attribute__((noipa)) int
bar (void)
{
int s = 0;
#pragma omp simd reduction (inscan, foo:s)
for (int i = 0; i < 1024; i++)
{
b[i] = s;
#pragma omp scan exclusive(s)
s += 2 * a[i];
}
return s;
}
__attribute__((noipa)) void
baz (int *a, int *b)
{
#pragma omp simd reduction (inscan, foo:r) if (simd: 0)
for (int i = 0; i < 1024; i++)
{
b[i] = r;
#pragma omp scan exclusive(r)
r += a[i];
}
}
__attribute__((noipa)) int
qux (void)
{
int s = 0;
#pragma omp simd reduction (inscan, foo:s) simdlen (1)
for (int i = 0; i < 1024; i++)
{
b[i] = s;
#pragma omp scan exclusive(s)
s += 2 * a[i];
}
return s;
}
int
main ()
{
int s = 0;
#ifndef main
check_vect ();
#endif
for (int i = 0; i < 1024; ++i)
{
a[i] = i;
b[i] = -1;
asm ("" : "+g" (i));
}
foo (a, b);
if (r != 1024 * 1023 / 2)
abort ();
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
else
b[i] = 25;
s += i;
}
if (bar () != 1024 * 1023)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
else
b[i] = -1;
s += 2 * i;
}
r = 0;
baz (a, b);
if (r != 1024 * 1023 / 2)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
else
b[i] = -25;
s += i;
}
if (qux () != 1024 * 1023)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
s += 2 * i;
}
return 0;
}
/* { dg-require-effective-target size32plus } */
/* { dg-additional-options "-fopenmp-simd" } */
/* { dg-additional-options "-mavx" { target avx_runtime } } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */
#ifndef main
#include "tree-vect.h"
#endif
float r = 1.0f, a[1024], b[1024];
__attribute__((noipa)) void
foo (float *a, float *b)
{
#pragma omp simd reduction (inscan, *:r)
for (int i = 0; i < 1024; i++)
{
b[i] = r;
#pragma omp scan exclusive(r)
r *= a[i];
}
}
__attribute__((noipa)) float
bar (void)
{
float s = -__builtin_inff ();
#pragma omp simd reduction (inscan, max:s)
for (int i = 0; i < 1024; i++)
{
b[i] = s;
#pragma omp scan exclusive(s)
s = s > a[i] ? s : a[i];
}
return s;
}
int
main ()
{
float s = 1.0f;
#ifndef main
check_vect ();
#endif
for (int i = 0; i < 1024; ++i)
{
if (i < 80)
a[i] = (i & 1) ? 0.25f : 0.5f;
else if (i < 200)
a[i] = (i % 3) == 0 ? 2.0f : (i % 3) == 1 ? 4.0f : 1.0f;
else if (i < 280)
a[i] = (i & 1) ? 0.25f : 0.5f;
else if (i < 380)
a[i] = (i % 3) == 0 ? 2.0f : (i % 3) == 1 ? 4.0f : 1.0f;
else
switch (i % 6)
{
case 0: a[i] = 0.25f; break;
case 1: a[i] = 2.0f; break;
case 2: a[i] = -1.0f; break;
case 3: a[i] = -4.0f; break;
case 4: a[i] = 0.5f; break;
case 5: a[i] = 1.0f; break;
default: a[i] = 0.0f; break;
}
b[i] = -19.0f;
asm ("" : "+g" (i));
}
foo (a, b);
if (r * 16384.0f != 0.125f)
abort ();
float m = -175.25f;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
else
b[i] = -231.75f;
s *= a[i];
a[i] = m - ((i % 3) == 1 ? 2.0f : (i % 3) == 2 ? 4.0f : 0.0f);
m += 0.75f;
}
if (bar () != 592.0f)
abort ();
s = -__builtin_inff ();
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
if (s < a[i])
s = a[i];
}
return 0;
}
/* { dg-require-effective-target size32plus } */
/* { dg-additional-options "-fopenmp-simd" } */
/* { dg-additional-options "-mavx" { target avx_runtime } } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */
#ifndef main
#include "tree-vect.h"
#endif
int r, a[1024], b[1024];
unsigned short r2, b2[1024];
unsigned char r3, b3[1024];
__attribute__((noipa)) void
foo (int *a, int *b, unsigned short *b2, unsigned char *b3)
{
#pragma omp simd reduction (inscan, +:r, r2, r3)
for (int i = 0; i < 1024; i++)
{
{
b[i] = r;
b2[i] = r2;
b3[i] = r3;
}
#pragma omp scan exclusive(r, r2, r3)
{ r += a[i]; r2 += a[i]; r3 += a[i]; }
}
}
__attribute__((noipa)) int
bar (unsigned short *s2p, unsigned char *s3p)
{
int s = 0;
unsigned short s2 = 0;
unsigned char s3 = 0;
#pragma omp simd reduction (inscan, +:s, s2, s3)
for (int i = 0; i < 1024; i++)
{
{ b[i] = s; b2[i] = s2; b3[i] = s3; }
#pragma omp scan exclusive(s, s2, s3)
{
s += 2 * a[i];
s2 += 2 * a[i];
s3 += 2 * a[i];
}
}
*s2p = s2;
*s3p = s3;
return s;
}
__attribute__((noipa)) void
baz (int *a, int *b, unsigned short *b2, unsigned char *b3)
{
#pragma omp simd reduction (inscan, +:r, r2, r3) if (simd: 0)
for (int i = 0; i < 1024; i++)
{
{
b[i] = r;
b2[i] = r2;
b3[i] = r3;
}
#pragma omp scan exclusive(r, r2, r3)
{
r += a[i];
r2 += a[i];
r3 += a[i];
}
}
}
__attribute__((noipa)) int
qux (unsigned short *s2p, unsigned char *s3p)
{
int s = 0;
unsigned short s2 = 0;
unsigned char s3 = 0;
#pragma omp simd reduction (inscan, +:s, s2, s3) simdlen (1)
for (int i = 0; i < 1024; i++)
{
{ b[i] = s; b2[i] = s2; b3[i] = s3; }
#pragma omp scan exclusive(s, s2, s3)
{ s += 2 * a[i]; s2 += 2 * a[i]; s3 += 2 * a[i]; }
}
*s2p = s2;
*s3p = s3;
return s;
}
int
main ()
{
int s = 0;
unsigned short s2;
unsigned char s3;
#ifndef main
check_vect ();
#endif
for (int i = 0; i < 1024; ++i)
{
a[i] = i;
b[i] = -1;
b2[i] = -1;
b3[i] = -1;
asm ("" : "+g" (i));
}
foo (a, b, b2, b3);
if (r != 1024 * 1023 / 2
|| r2 != (unsigned short) r
|| r3 != (unsigned char) r)
abort ();
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s
|| b2[i] != (unsigned short) s
|| b3[i] != (unsigned char) s)
abort ();
else
{
b[i] = 25;
b2[i] = 24;
b3[i] = 26;
}
s += i;
}
if (bar (&s2, &s3) != 1024 * 1023)
abort ();
if (s2 != (unsigned short) (1024 * 1023)
|| s3 != (unsigned char) (1024 * 1023))
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s
|| b2[i] != (unsigned short) s
|| b3[i] != (unsigned char) s)
abort ();
else
{
b[i] = -1;
b2[i] = -1;
b3[i] = -1;
}
s += 2 * i;
}
r = 0;
r2 = 0;
r3 = 0;
baz (a, b, b2, b3);
if (r != 1024 * 1023 / 2
|| r2 != (unsigned short) r
|| r3 != (unsigned char) r)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s
|| b2[i] != (unsigned short) s
|| b3[i] != (unsigned char) s)
abort ();
else
{
b[i] = 25;
b2[i] = 24;
b3[i] = 26;
}
s += i;
}
s2 = 0;
s3 = 0;
if (qux (&s2, &s3) != 1024 * 1023)
abort ();
if (s2 != (unsigned short) (1024 * 1023)
|| s3 != (unsigned char) (1024 * 1023))
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s
|| b2[i] != (unsigned short) s
|| b3[i] != (unsigned char) s)
abort ();
s += 2 * i;
}
return 0;
}
/* { dg-do run } */
/* { dg-options "-O2 -fopenmp-simd -mavx2 -fdump-tree-vect-details" } */
/* { dg-require-effective-target avx2 } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
#include "avx2-check.h"
#define main() do_main ()
#include "../../gcc.dg/vect/vect-simd-12.c"
static void
avx2_test (void)
{
do_main ();
}
/* { dg-do run } */
/* { dg-options "-O2 -fopenmp-simd -mavx2 -fdump-tree-vect-details" } */
/* { dg-require-effective-target avx2 } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
#include "avx2-check.h"
#define main() do_main ()
#include "../../gcc.dg/vect/vect-simd-13.c"
static void
avx2_test (void)
{
do_main ();
}
/* { dg-do run } */
/* { dg-options "-O2 -fopenmp-simd -mavx2 -fdump-tree-vect-details" } */
/* { dg-require-effective-target avx2 } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
#include "avx2-check.h"
#define main() do_main ()
#include "../../gcc.dg/vect/vect-simd-14.c"
static void
avx2_test (void)
{
do_main ();
}
/* { dg-do run } */
/* { dg-options "-O2 -fopenmp-simd -mavx2 -fdump-tree-vect-details" } */
/* { dg-require-effective-target avx2 } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
#include "avx2-check.h"
#define main() do_main ()
#include "../../gcc.dg/vect/vect-simd-15.c"
static void
avx2_test (void)
{
do_main ();
}
/* { dg-do run } */
/* { dg-options "-O2 -fopenmp-simd -mavx512bw -mprefer-vector-width=512 -fdump-tree-vect-details" } */
/* { dg-require-effective-target avx512bw } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
#include "avx512bw-check.h"
#define main() do_main ()
#include "../../gcc.dg/vect/vect-simd-15.c"
static void
avx512bw_test (void)
{
do_main ();
}
/* { dg-do run } */
/* { dg-options "-O2 -fopenmp-simd -mavx512f -mprefer-vector-width=512 -fdump-tree-vect-details" } */
/* { dg-require-effective-target avx512f } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
#include "avx512f-check.h"
#define main() do_main ()
#include "../../gcc.dg/vect/vect-simd-12.c"
static void
avx512f_test (void)
{
do_main ();
}
/* { dg-do run } */
/* { dg-options "-O2 -fopenmp-simd -mavx512f -mprefer-vector-width=512 -fdump-tree-vect-details" } */
/* { dg-require-effective-target avx512f } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
#include "avx512f-check.h"
#define main() do_main ()
#include "../../gcc.dg/vect/vect-simd-13.c"
static void
avx512f_test (void)
{
do_main ();
}
/* { dg-do run } */
/* { dg-options "-O2 -fopenmp-simd -mavx512f -mprefer-vector-width=512 -fdump-tree-vect-details" } */
/* { dg-require-effective-target avx512f } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
#include "avx512f-check.h"
#define main() do_main ()
#include "../../gcc.dg/vect/vect-simd-14.c"
static void
avx512f_test (void)
{
do_main ();
}
/* { dg-do run } */
/* { dg-options "-O2 -fopenmp-simd -msse2 -mno-sse3 -fdump-tree-vect-details" } */
/* { dg-require-effective-target sse2 } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
#include "sse2-check.h"
#define main() do_main ()
#include "../../gcc.dg/vect/vect-simd-12.c"
static void
sse2_test (void)
{
do_main ();
}
/* { dg-do run } */
/* { dg-options "-O2 -fopenmp-simd -msse2 -mno-sse3 -fdump-tree-vect-details" } */
/* { dg-require-effective-target sse2 } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
#include "sse2-check.h"
#define main() do_main ()
#include "../../gcc.dg/vect/vect-simd-13.c"
static void
sse2_test (void)
{
do_main ();
}
/* { dg-do run } */
/* { dg-options "-O2 -fopenmp-simd -msse2 -mno-sse3 -fdump-tree-vect-details" } */
/* { dg-require-effective-target sse2 } */
#include "sse2-check.h"
#define main() do_main ()
#include "../../gcc.dg/vect/vect-simd-14.c"
static void
sse2_test (void)
{
do_main ();
}
/* { dg-do run } */
/* { dg-options "-O2 -fopenmp-simd -msse2 -mno-sse3 -fdump-tree-vect-details" } */
/* { dg-require-effective-target sse2 } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
#include "sse2-check.h"
#define main() do_main ()
#include "../../gcc.dg/vect/vect-simd-15.c"
static void
sse2_test (void)
{
do_main ();
}
...@@ -4223,7 +4223,8 @@ vect_analyze_data_refs (vec_info *vinfo, poly_uint64 *min_vf) ...@@ -4223,7 +4223,8 @@ vect_analyze_data_refs (vec_info *vinfo, poly_uint64 *min_vf)
/* See if this was detected as SIMD lane access. */ /* See if this was detected as SIMD lane access. */
if (dr->aux == (void *)-1 if (dr->aux == (void *)-1
|| dr->aux == (void *)-2 || dr->aux == (void *)-2
|| dr->aux == (void *)-3) || dr->aux == (void *)-3
|| dr->aux == (void *)-4)
{ {
if (nested_in_vect_loop_p (loop, stmt_info)) if (nested_in_vect_loop_p (loop, stmt_info))
return opt_result::failure_at (stmt_info->stmt, return opt_result::failure_at (stmt_info->stmt,
......
...@@ -6512,7 +6512,37 @@ check_scan_store (stmt_vec_info stmt_info, tree vectype, ...@@ -6512,7 +6512,37 @@ check_scan_store (stmt_vec_info stmt_info, tree vectype,
kinds are there in order to allow optimizing the initializer store kinds are there in order to allow optimizing the initializer store
and combiner sequence, e.g. if it is originally some C++ish user and combiner sequence, e.g. if it is originally some C++ish user
defined reduction, but allow the vectorizer to pattern recognize it defined reduction, but allow the vectorizer to pattern recognize it
and turn into the appropriate vectorized scan. */ and turn into the appropriate vectorized scan.
For exclusive scan, this is slightly different:
#pragma omp simd reduction(inscan,+:r)
for (...)
{
use (r);
#pragma omp scan exclusive (r)
r += something ();
}
shall have body with:
// Initialization for input phase, store the reduction initializer:
_20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
_21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
D.2042[_21] = 0;
// Actual input phase:
...
r.0_5 = D.2042[_20];
_6 = _4 + r.0_5;
D.2042[_20] = _6;
// Initialization for scan phase:
_25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
_26 = D.2043[_25];
D.2044[_25] = _26;
_27 = D.2042[_25];
_28 = _26 + _27;
D.2043[_25] = _28;
// Actual scan phase:
...
r.1_8 = D.2044[_20];
... */
if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2) if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
{ {
...@@ -6553,26 +6583,52 @@ check_scan_store (stmt_vec_info stmt_info, tree vectype, ...@@ -6553,26 +6583,52 @@ check_scan_store (stmt_vec_info stmt_info, tree vectype,
if (TREE_CODE (rhs) != SSA_NAME) if (TREE_CODE (rhs) != SSA_NAME)
goto fail; goto fail;
gimple *other_store_stmt = NULL;
tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
bool inscan_var_store
= lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
{
if (!inscan_var_store)
{
use_operand_p use_p; use_operand_p use_p;
imm_use_iterator iter; imm_use_iterator iter;
gimple *other_store_stmt = NULL;
FOR_EACH_IMM_USE_FAST (use_p, iter, rhs) FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
{ {
gimple *use_stmt = USE_STMT (use_p); gimple *use_stmt = USE_STMT (use_p);
if (use_stmt == stmt || is_gimple_debug (use_stmt)) if (use_stmt == stmt || is_gimple_debug (use_stmt))
continue; continue;
if (gimple_bb (use_stmt) != gimple_bb (stmt) if (gimple_bb (use_stmt) != gimple_bb (stmt)
|| !gimple_store_p (use_stmt) || !is_gimple_assign (use_stmt)
|| other_store_stmt) || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
|| other_store_stmt
|| TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
goto fail; goto fail;
other_store_stmt = use_stmt; other_store_stmt = use_stmt;
} }
if (other_store_stmt == NULL) if (other_store_stmt == NULL)
goto fail; goto fail;
stmt_vec_info other_store_stmt_info rhs = gimple_assign_lhs (other_store_stmt);
= loop_vinfo->lookup_stmt (other_store_stmt); if (!single_imm_use (rhs, &use_p, &other_store_stmt))
if (other_store_stmt_info == NULL goto fail;
|| STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info) != 3) }
}
else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
{
use_operand_p use_p;
imm_use_iterator iter;
FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
{
gimple *use_stmt = USE_STMT (use_p);
if (use_stmt == stmt || is_gimple_debug (use_stmt))
continue;
if (other_store_stmt)
goto fail;
other_store_stmt = use_stmt;
}
}
else
goto fail; goto fail;
gimple *def_stmt = SSA_NAME_DEF_STMT (rhs); gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
...@@ -6599,8 +6655,7 @@ check_scan_store (stmt_vec_info stmt_info, tree vectype, ...@@ -6599,8 +6655,7 @@ check_scan_store (stmt_vec_info stmt_info, tree vectype,
tree rhs1 = gimple_assign_rhs1 (def_stmt); tree rhs1 = gimple_assign_rhs1 (def_stmt);
tree rhs2 = gimple_assign_rhs2 (def_stmt); tree rhs2 = gimple_assign_rhs2 (def_stmt);
if (TREE_CODE (rhs1) != SSA_NAME if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
|| TREE_CODE (rhs2) != SSA_NAME)
goto fail; goto fail;
gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1); gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
...@@ -6615,22 +6670,83 @@ check_scan_store (stmt_vec_info stmt_info, tree vectype, ...@@ -6615,22 +6670,83 @@ check_scan_store (stmt_vec_info stmt_info, tree vectype,
stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt); stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
if (load1_stmt_info == NULL if (load1_stmt_info == NULL
|| load2_stmt_info == NULL || load2_stmt_info == NULL
|| STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info) != 3 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
|| STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info) != 3) != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
|| (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
!= STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
goto fail;
if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
{
dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
|| !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
goto fail; goto fail;
tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
tree lrhs;
if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
lrhs = rhs1;
else
lrhs = rhs2;
use_operand_p use_p;
imm_use_iterator iter;
FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
{
gimple *use_stmt = USE_STMT (use_p);
if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
continue;
if (other_store_stmt)
goto fail;
other_store_stmt = use_stmt;
}
}
if (scan_operand_equal_p (gimple_assign_lhs (stmt), if (other_store_stmt == NULL)
goto fail;
if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
|| !gimple_store_p (other_store_stmt))
goto fail;
stmt_vec_info other_store_stmt_info
= loop_vinfo->lookup_stmt (other_store_stmt);
if (other_store_stmt_info == NULL
|| (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
!= STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
goto fail;
gimple *stmt1 = stmt;
gimple *stmt2 = other_store_stmt;
if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
std::swap (stmt1, stmt2);
if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
gimple_assign_rhs1 (load2_stmt))) gimple_assign_rhs1 (load2_stmt)))
{ {
std::swap (rhs1, rhs2); std::swap (rhs1, rhs2);
std::swap (load1_stmt, load2_stmt); std::swap (load1_stmt, load2_stmt);
std::swap (load1_stmt_info, load2_stmt_info); std::swap (load1_stmt_info, load2_stmt_info);
} }
if (!scan_operand_equal_p (gimple_assign_lhs (stmt), if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
gimple_assign_rhs1 (load1_stmt)) gimple_assign_rhs1 (load1_stmt)))
|| !scan_operand_equal_p (gimple_assign_lhs (other_store_stmt), goto fail;
tree var3 = NULL_TREE;
if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
&& !scan_operand_equal_p (gimple_assign_lhs (stmt2),
gimple_assign_rhs1 (load2_stmt))) gimple_assign_rhs1 (load2_stmt)))
goto fail; goto fail;
else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
{
dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
|| !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
goto fail;
var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
|| lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
|| lookup_attribute ("omp simd inscan exclusive",
DECL_ATTRIBUTES (var3)))
goto fail;
}
dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info); dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
...@@ -6648,6 +6764,14 @@ check_scan_store (stmt_vec_info stmt_info, tree vectype, ...@@ -6648,6 +6764,14 @@ check_scan_store (stmt_vec_info stmt_info, tree vectype,
if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1))) if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
std::swap (var1, var2); std::swap (var1, var2);
if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
{
if (!lookup_attribute ("omp simd inscan exclusive",
DECL_ATTRIBUTES (var1)))
goto fail;
var1 = var3;
}
if (loop_vinfo->scan_map == NULL) if (loop_vinfo->scan_map == NULL)
goto fail; goto fail;
tree *init = loop_vinfo->scan_map->get (var1); tree *init = loop_vinfo->scan_map->get (var1);
...@@ -6655,6 +6779,7 @@ check_scan_store (stmt_vec_info stmt_info, tree vectype, ...@@ -6655,6 +6779,7 @@ check_scan_store (stmt_vec_info stmt_info, tree vectype,
goto fail; goto fail;
/* The IL is as expected, now check if we can actually vectorize it. /* The IL is as expected, now check if we can actually vectorize it.
Inclusive scan:
_26 = D.2043[_25]; _26 = D.2043[_25];
_27 = D.2042[_25]; _27 = D.2042[_25];
_28 = _26 + _27; _28 = _26 + _27;
...@@ -6664,21 +6789,49 @@ check_scan_store (stmt_vec_info stmt_info, tree vectype, ...@@ -6664,21 +6789,49 @@ check_scan_store (stmt_vec_info stmt_info, tree vectype,
from the D.2042[_21] = 0; store): from the D.2042[_21] = 0; store):
_30 = MEM <vector(8) int> [(int *)&D.2043]; _30 = MEM <vector(8) int> [(int *)&D.2043];
_31 = MEM <vector(8) int> [(int *)&D.2042]; _31 = MEM <vector(8) int> [(int *)&D.2042];
_32 = VEC_PERM_EXPR <_31, _40, { 8, 0, 1, 2, 3, 4, 5, 6 }>; _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
_33 = _31 + _32; _33 = _31 + _32;
// _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] }; // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
_34 = VEC_PERM_EXPR <_33, _40, { 8, 9, 0, 1, 2, 3, 4, 5 }>; _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
_35 = _33 + _34; _35 = _33 + _34;
// _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3], // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
// _31[1]+.._31[4], ... _31[4]+.._31[7] }; // _31[1]+.._31[4], ... _31[4]+.._31[7] };
_36 = VEC_PERM_EXPR <_35, _40, { 8, 9, 10, 11, 0, 1, 2, 3 }>; _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
_37 = _35 + _36; _37 = _35 + _36;
// _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3], // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
// _31[0]+.._31[4], ... _31[0]+.._31[7] }; // _31[0]+.._31[4], ... _31[0]+.._31[7] };
_38 = _30 + _37; _38 = _30 + _37;
_39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>; _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
MEM <vector(8) int> [(int *)&D.2043] = _39; MEM <vector(8) int> [(int *)&D.2043] = _39;
MEM <vector(8) int> [(int *)&D.2042] = _38; */ MEM <vector(8) int> [(int *)&D.2042] = _38;
Exclusive scan:
_26 = D.2043[_25];
D.2044[_25] = _26;
_27 = D.2042[_25];
_28 = _26 + _27;
D.2043[_25] = _28;
should be vectorized as (where _40 is the vectorized rhs
from the D.2042[_21] = 0; store):
_30 = MEM <vector(8) int> [(int *)&D.2043];
_31 = MEM <vector(8) int> [(int *)&D.2042];
_32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
_33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
_34 = _32 + _33;
// _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
// _31[3]+_31[4], ... _31[5]+.._31[6] };
_35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
_36 = _34 + _35;
// _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
// _31[1]+.._31[4], ... _31[3]+.._31[6] };
_37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
_38 = _36 + _37;
// _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
// _31[0]+.._31[4], ... _31[0]+.._31[6] };
_39 = _30 + _38;
_50 = _31 + _39;
_51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
MEM <vector(8) int> [(int *)&D.2044] = _39;
MEM <vector(8) int> [(int *)&D.2042] = _51; */
enum machine_mode vec_mode = TYPE_MODE (vectype); enum machine_mode vec_mode = TYPE_MODE (vectype);
optab optab = optab_for_tree_code (code, vectype, optab_default); optab optab = optab_for_tree_code (code, vectype, optab_default);
if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing) if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
...@@ -6715,6 +6868,24 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, ...@@ -6715,6 +6868,24 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
tree rhs = gimple_assign_rhs1 (stmt); tree rhs = gimple_assign_rhs1 (stmt);
gcc_assert (TREE_CODE (rhs) == SSA_NAME); gcc_assert (TREE_CODE (rhs) == SSA_NAME);
tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
bool inscan_var_store
= lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
{
use_operand_p use_p;
imm_use_iterator iter;
FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
{
gimple *use_stmt = USE_STMT (use_p);
if (use_stmt == stmt || is_gimple_debug (use_stmt))
continue;
rhs = gimple_assign_lhs (use_stmt);
break;
}
}
gimple *def_stmt = SSA_NAME_DEF_STMT (rhs); gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
enum tree_code code = gimple_assign_rhs_code (def_stmt); enum tree_code code = gimple_assign_rhs_code (def_stmt);
if (code == POINTER_PLUS_EXPR) if (code == POINTER_PLUS_EXPR)
...@@ -6737,15 +6908,12 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, ...@@ -6737,15 +6908,12 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
{ {
std::swap (rhs1, rhs2); std::swap (rhs1, rhs2);
std::swap (var1, var2); std::swap (var1, var2);
std::swap (load1_dr_info, load2_dr_info);
} }
tree *init = loop_vinfo->scan_map->get (var1); tree *init = loop_vinfo->scan_map->get (var1);
gcc_assert (init); gcc_assert (init);
tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
bool inscan_var_store
= lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
unsigned HOST_WIDE_INT nunits; unsigned HOST_WIDE_INT nunits;
if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)) if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
gcc_unreachable (); gcc_unreachable ();
...@@ -6789,16 +6957,20 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, ...@@ -6789,16 +6957,20 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
tree vec_oprnd1 = NULL_TREE; tree vec_oprnd1 = NULL_TREE;
tree vec_oprnd2 = NULL_TREE; tree vec_oprnd2 = NULL_TREE;
tree vec_oprnd3 = NULL_TREE; tree vec_oprnd3 = NULL_TREE;
tree dataref_ptr = unshare_expr (DR_BASE_ADDRESS (dr_info->dr)); tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
tree dataref_offset = build_int_cst (ref_type, 0); tree dataref_offset = build_int_cst (ref_type, 0);
tree bump = vect_get_data_ptr_increment (dr_info, vectype, VMAT_CONTIGUOUS); tree bump = vect_get_data_ptr_increment (dr_info, vectype, VMAT_CONTIGUOUS);
tree ldataref_ptr = NULL_TREE;
tree orig = NULL_TREE; tree orig = NULL_TREE;
if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
for (int j = 0; j < ncopies; j++) for (int j = 0; j < ncopies; j++)
{ {
stmt_vec_info new_stmt_info; stmt_vec_info new_stmt_info;
if (j == 0) if (j == 0)
{ {
vec_oprnd1 = vect_get_vec_def_for_operand (*init, stmt_info); vec_oprnd1 = vect_get_vec_def_for_operand (*init, stmt_info);
if (ldataref_ptr == NULL)
vec_oprnd2 = vect_get_vec_def_for_operand (rhs1, stmt_info); vec_oprnd2 = vect_get_vec_def_for_operand (rhs1, stmt_info);
vec_oprnd3 = vect_get_vec_def_for_operand (rhs2, stmt_info); vec_oprnd3 = vect_get_vec_def_for_operand (rhs2, stmt_info);
orig = vec_oprnd3; orig = vec_oprnd3;
...@@ -6806,12 +6978,29 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, ...@@ -6806,12 +6978,29 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
else else
{ {
vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1); vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
if (ldataref_ptr == NULL)
vec_oprnd2 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd2); vec_oprnd2 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd2);
vec_oprnd3 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd3); vec_oprnd3 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd3);
if (!inscan_var_store) if (!inscan_var_store)
dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump); dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
} }
if (ldataref_ptr)
{
vec_oprnd2 = make_ssa_name (vectype);
tree data_ref = fold_build2 (MEM_REF, vectype,
unshare_expr (ldataref_ptr),
dataref_offset);
vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
if (prev_stmt_info == NULL)
STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
else
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
prev_stmt_info = new_stmt_info;
}
tree v = vec_oprnd2; tree v = vec_oprnd2;
for (int i = 0; i < units_log2; ++i) for (int i = 0; i < units_log2; ++i)
{ {
...@@ -6848,6 +7037,17 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, ...@@ -6848,6 +7037,17 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
new_temp = new_temp2; new_temp = new_temp2;
} }
/* For exclusive scan, perform the perms[i] permutation once
more. */
if (i == 0
&& STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
&& v == vec_oprnd2)
{
v = new_temp;
--i;
continue;
}
tree new_temp2 = make_ssa_name (vectype); tree new_temp2 = make_ssa_name (vectype);
g = gimple_build_assign (new_temp2, code, v, new_temp); g = gimple_build_assign (new_temp2, code, v, new_temp);
new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi); new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
...@@ -6863,16 +7063,30 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, ...@@ -6863,16 +7063,30 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
prev_stmt_info = new_stmt_info; prev_stmt_info = new_stmt_info;
tree last_perm_arg = new_temp;
/* For exclusive scan, new_temp computed above is the exclusive scan
prefix sum. Turn it into inclusive prefix sum for the broadcast
of the last element into orig. */
if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
{
last_perm_arg = make_ssa_name (vectype);
g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
prev_stmt_info = new_stmt_info;
}
orig = make_ssa_name (vectype); orig = make_ssa_name (vectype);
g = gimple_build_assign (orig, VEC_PERM_EXPR, new_temp, new_temp, g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
perms[units_log2]); last_perm_arg, perms[units_log2]);
new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi); new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
prev_stmt_info = new_stmt_info; prev_stmt_info = new_stmt_info;
if (!inscan_var_store) if (!inscan_var_store)
{ {
tree data_ref = fold_build2 (MEM_REF, vectype, dataref_ptr, tree data_ref = fold_build2 (MEM_REF, vectype,
unshare_expr (dataref_ptr),
dataref_offset); dataref_offset);
vect_copy_ref_info (data_ref, DR_REF (dr_info->dr)); vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
g = gimple_build_assign (data_ref, new_temp); g = gimple_build_assign (data_ref, new_temp);
...@@ -6888,7 +7102,8 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, ...@@ -6888,7 +7102,8 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
if (j != 0) if (j != 0)
dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump); dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
tree data_ref = fold_build2 (MEM_REF, vectype, dataref_ptr, tree data_ref = fold_build2 (MEM_REF, vectype,
unshare_expr (dataref_ptr),
dataref_offset); dataref_offset);
vect_copy_ref_info (data_ref, DR_REF (dr_info->dr)); vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
gimple *g = gimple_build_assign (data_ref, orig); gimple *g = gimple_build_assign (data_ref, orig);
...@@ -7325,7 +7540,7 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, ...@@ -7325,7 +7540,7 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
} }
return true; return true;
} }
else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3) else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
return vectorizable_scan_store (stmt_info, gsi, vec_stmt, ncopies); return vectorizable_scan_store (stmt_info, gsi, vec_stmt, ncopies);
if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
......
...@@ -917,7 +917,7 @@ struct _stmt_vec_info { ...@@ -917,7 +917,7 @@ struct _stmt_vec_info {
bool strided_p; bool strided_p;
/* For both loads and stores. */ /* For both loads and stores. */
unsigned simd_lane_access_p : 2; unsigned simd_lane_access_p : 3;
/* Classifies how the load or store is going to be implemented /* Classifies how the load or store is going to be implemented
for loop vectorization. */ for loop vectorization. */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment