Commit 4e65deef by Jakub Jelinek Committed by Jakub Jelinek

omp-low.c (lower_rec_input_clauses): If OMP_CLAUSE_IF has non-constant expression...

	* omp-low.c (lower_rec_input_clauses): If OMP_CLAUSE_IF
	has non-constant expression, force sctx.lane and use two
	argument IFN_GOMP_SIMD_LANE instead of single argument.
	* tree-ssa-dce.c (eliminate_unnecessary_stmts): Don't DCE
	two argument IFN_GOMP_SIMD_LANE without lhs.
	* tree-vectorizer.h (struct _loop_vec_info): Add simd_if_cond
	member.
	(LOOP_VINFO_SIMD_IF_COND, LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND):
	Define.
	(LOOP_REQUIRES_VERSIONING): Or in
	LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND.
	* tree-vect-loop.c (_loop_vec_info::_loop_vec_info): Initialize
	simd_if_cond.
	(vect_analyze_loop_2): Punt if LOOP_VINFO_SIMD_IF_COND is constant 0.
	* tree-vect-loop-manip.c (vect_loop_versioning): Add runtime check
	from simd if clause if needed.

	* gcc.dg/vect/vect-simd-1.c: New test.
	* gcc.dg/vect/vect-simd-2.c: New test.
	* gcc.dg/vect/vect-simd-3.c: New test.
	* gcc.dg/vect/vect-simd-4.c: New test.

From-SVN: r271298
parent d069df01
2019-05-16 Jakub Jelinek <jakub@redhat.com>
* omp-low.c (lower_rec_input_clauses): If OMP_CLAUSE_IF
has non-constant expression, force sctx.lane and use two
argument IFN_GOMP_SIMD_LANE instead of single argument.
* tree-ssa-dce.c (eliminate_unnecessary_stmts): Don't DCE
two argument IFN_GOMP_SIMD_LANE without lhs.
* tree-vectorizer.h (struct _loop_vec_info): Add simd_if_cond
member.
(LOOP_VINFO_SIMD_IF_COND, LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND):
Define.
(LOOP_REQUIRES_VERSIONING): Or in
LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND.
* tree-vect-loop.c (_loop_vec_info::_loop_vec_info): Initialize
simd_if_cond.
(vect_analyze_loop_2): Punt if LOOP_VINFO_SIMD_IF_COND is constant 0.
* tree-vect-loop-manip.c (vect_loop_versioning): Add runtime check
from simd if clause if needed.
2019-05-16 Richard Biener <rguenther@suse.de> 2019-05-16 Richard Biener <rguenther@suse.de>
* tree-affine.c (expr_to_aff_combination): New function split * tree-affine.c (expr_to_aff_combination): New function split
......
...@@ -3783,6 +3783,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, ...@@ -3783,6 +3783,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
tree simt_lane = NULL_TREE, simtrec = NULL_TREE; tree simt_lane = NULL_TREE, simtrec = NULL_TREE;
tree ivar = NULL_TREE, lvar = NULL_TREE, uid = NULL_TREE; tree ivar = NULL_TREE, lvar = NULL_TREE, uid = NULL_TREE;
gimple_seq llist[3] = { }; gimple_seq llist[3] = { };
tree nonconst_simd_if = NULL_TREE;
copyin_seq = NULL; copyin_seq = NULL;
sctx.is_simt = is_simd && omp_find_clause (clauses, OMP_CLAUSE__SIMT_); sctx.is_simt = is_simd && omp_find_clause (clauses, OMP_CLAUSE__SIMT_);
...@@ -3814,6 +3815,8 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, ...@@ -3814,6 +3815,8 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
case OMP_CLAUSE_IF: case OMP_CLAUSE_IF:
if (integer_zerop (OMP_CLAUSE_IF_EXPR (c))) if (integer_zerop (OMP_CLAUSE_IF_EXPR (c)))
sctx.max_vf = 1; sctx.max_vf = 1;
else if (TREE_CODE (OMP_CLAUSE_IF_EXPR (c)) != INTEGER_CST)
nonconst_simd_if = OMP_CLAUSE_IF_EXPR (c);
break; break;
case OMP_CLAUSE_SIMDLEN: case OMP_CLAUSE_SIMDLEN:
if (integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (c))) if (integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (c)))
...@@ -5190,6 +5193,17 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, ...@@ -5190,6 +5193,17 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
if (known_eq (sctx.max_vf, 1U)) if (known_eq (sctx.max_vf, 1U))
sctx.is_simt = false; sctx.is_simt = false;
if (nonconst_simd_if)
{
if (sctx.lane == NULL_TREE)
{
sctx.idx = create_tmp_var (unsigned_type_node);
sctx.lane = create_tmp_var (unsigned_type_node);
}
/* FIXME: For now. */
sctx.is_simt = false;
}
if (sctx.lane || sctx.is_simt) if (sctx.lane || sctx.is_simt)
{ {
uid = create_tmp_var (ptr_type_node, "simduid"); uid = create_tmp_var (ptr_type_node, "simduid");
...@@ -5219,8 +5233,9 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, ...@@ -5219,8 +5233,9 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
} }
if (sctx.lane) if (sctx.lane)
{ {
gimple *g gimple *g = gimple_build_call_internal (IFN_GOMP_SIMD_LANE,
= gimple_build_call_internal (IFN_GOMP_SIMD_LANE, 1, uid); 1 + (nonconst_simd_if != NULL),
uid, nonconst_simd_if);
gimple_call_set_lhs (g, sctx.lane); gimple_call_set_lhs (g, sctx.lane);
gimple_stmt_iterator gsi = gsi_start_1 (gimple_omp_body_ptr (ctx->stmt)); gimple_stmt_iterator gsi = gsi_start_1 (gimple_omp_body_ptr (ctx->stmt));
gsi_insert_before_without_update (&gsi, g, GSI_SAME_STMT); gsi_insert_before_without_update (&gsi, g, GSI_SAME_STMT);
......
2019-05-16 Jakub Jelinek <jakub@redhat.com>
* gcc.dg/vect/vect-simd-1.c: New test.
* gcc.dg/vect/vect-simd-2.c: New test.
* gcc.dg/vect/vect-simd-3.c: New test.
* gcc.dg/vect/vect-simd-4.c: New test.
2019-05-16 Martin Liska <mliska@suse.cz> 2019-05-16 Martin Liska <mliska@suse.cz>
PR lto/90500 PR lto/90500
......
/* { dg-additional-options "-fopenmp-simd" } */
/* { dg-additional-options "-mavx" { target avx_runtime } } */
#include "tree-vect.h"
#define N 1024
int a[N];
int x;
__attribute__((noipa)) int
bar (void)
{
return x;
}
__attribute__((noipa)) void
foo (void)
{
#pragma omp simd if (bar ())
for (int i = 0; i < N; ++i)
a[i] = a[i] + 1;
}
__attribute__((noipa)) void
baz (void)
{
int c = 0;
#pragma omp simd if (c)
for (int i = 0; i < N; ++i)
a[i] = a[i] + 1;
}
__attribute__((noipa)) void
qux (void)
{
int c = 1;
#pragma omp simd if (c)
for (int i = 0; i < N; ++i)
a[i] = a[i] + 1;
}
int
main ()
{
check_vect ();
foo ();
for (int i = 0; i < N; ++i)
if (a[i] != 1)
abort ();
x = 1;
foo ();
for (int i = 0; i < N; ++i)
if (a[i] != 2)
abort ();
baz ();
for (int i = 0; i < N; ++i)
if (a[i] != 3)
abort ();
qux ();
for (int i = 0; i < N; ++i)
if (a[i] != 4)
abort ();
return 0;
}
/* { dg-do compile } */
/* { dg-require-effective-target vect_int } */
/* { dg-additional-options "-fopenmp-simd" } */
#define N 1024
int a[N];
int bar (void);
void
foo (void)
{
#pragma omp simd if (bar ())
for (int i = 0; i < N; ++i)
a[i] = a[i] + 1;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { scan-tree-dump-times "created versioning for simd if condition check" 1 "vect" } } */
/* { dg-do compile } */
/* { dg-require-effective-target vect_int } */
/* { dg-additional-options "-fopenmp-simd" } */
#define N 1024
int a[N];
void
foo (void)
{
int c = 0;
#pragma omp simd if (c)
for (int i = 0; i < N; ++i)
a[i] = a[i] + 1;
}
/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" } } */
/* { dg-do compile } */
/* { dg-require-effective-target vect_int } */
/* { dg-additional-options "-fopenmp-simd" } */
#define N 1024
int a[N];
void
foo (void)
{
int c = 1;
#pragma omp simd if (c)
for (int i = 0; i < N; ++i)
a[i] = a[i] + 1;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { scan-tree-dump-not "created versioning for simd if condition check" "vect" } } */
...@@ -1328,12 +1328,16 @@ eliminate_unnecessary_stmts (void) ...@@ -1328,12 +1328,16 @@ eliminate_unnecessary_stmts (void)
update_stmt (stmt); update_stmt (stmt);
release_ssa_name (name); release_ssa_name (name);
/* GOMP_SIMD_LANE or ASAN_POISON without lhs is not /* GOMP_SIMD_LANE (unless two argument) or ASAN_POISON
needed. */ without lhs is not needed. */
if (gimple_call_internal_p (stmt)) if (gimple_call_internal_p (stmt))
switch (gimple_call_internal_fn (stmt)) switch (gimple_call_internal_fn (stmt))
{ {
case IFN_GOMP_SIMD_LANE: case IFN_GOMP_SIMD_LANE:
if (gimple_call_num_args (stmt) >= 2
&& !integer_nonzerop (gimple_call_arg (stmt, 1)))
break;
/* FALLTHRU */
case IFN_ASAN_POISON: case IFN_ASAN_POISON:
remove_dead_stmt (&gsi, bb, to_remove_edges); remove_dead_stmt (&gsi, bb, to_remove_edges);
break; break;
......
...@@ -3009,6 +3009,8 @@ vect_loop_versioning (loop_vec_info loop_vinfo, ...@@ -3009,6 +3009,8 @@ vect_loop_versioning (loop_vec_info loop_vinfo,
bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo); bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo); bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo); bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo);
tree version_simd_if_cond
= LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND (loop_vinfo);
if (check_profitability) if (check_profitability)
cond_expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters, cond_expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
...@@ -3044,6 +3046,31 @@ vect_loop_versioning (loop_vec_info loop_vinfo, ...@@ -3044,6 +3046,31 @@ vect_loop_versioning (loop_vec_info loop_vinfo,
vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr); vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
} }
if (version_simd_if_cond)
{
gcc_assert (dom_info_available_p (CDI_DOMINATORS));
if (flag_checking)
if (basic_block bb
= gimple_bb (SSA_NAME_DEF_STMT (version_simd_if_cond)))
gcc_assert (bb != loop->header
&& dominated_by_p (CDI_DOMINATORS, loop->header, bb)
&& (scalar_loop == NULL
|| (bb != scalar_loop->header
&& dominated_by_p (CDI_DOMINATORS,
scalar_loop->header, bb))));
tree zero = build_zero_cst (TREE_TYPE (version_simd_if_cond));
tree c = fold_build2 (NE_EXPR, boolean_type_node,
version_simd_if_cond, zero);
if (cond_expr)
cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
c, cond_expr);
else
cond_expr = c;
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"created versioning for simd if condition check.\n");
}
cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr), cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr),
&gimplify_stmt_list, &gimplify_stmt_list,
is_gimple_condexpr, NULL_TREE); is_gimple_condexpr, NULL_TREE);
......
...@@ -819,6 +819,7 @@ _loop_vec_info::_loop_vec_info (struct loop *loop_in, vec_info_shared *shared) ...@@ -819,6 +819,7 @@ _loop_vec_info::_loop_vec_info (struct loop *loop_in, vec_info_shared *shared)
max_vectorization_factor (0), max_vectorization_factor (0),
mask_skip_niters (NULL_TREE), mask_skip_niters (NULL_TREE),
mask_compare_type (NULL_TREE), mask_compare_type (NULL_TREE),
simd_if_cond (NULL_TREE),
unaligned_dr (NULL), unaligned_dr (NULL),
peeling_for_alignment (0), peeling_for_alignment (0),
ptr_mask (0), ptr_mask (0),
...@@ -862,6 +863,26 @@ _loop_vec_info::_loop_vec_info (struct loop *loop_in, vec_info_shared *shared) ...@@ -862,6 +863,26 @@ _loop_vec_info::_loop_vec_info (struct loop *loop_in, vec_info_shared *shared)
gimple *stmt = gsi_stmt (si); gimple *stmt = gsi_stmt (si);
gimple_set_uid (stmt, 0); gimple_set_uid (stmt, 0);
add_stmt (stmt); add_stmt (stmt);
/* If .GOMP_SIMD_LANE call for the current loop has 2 arguments, the
second argument is the #pragma omp simd if (x) condition, when 0,
loop shouldn't be vectorized, when non-zero constant, it should
be vectorized normally, otherwise versioned with vectorized loop
done if the condition is non-zero at runtime. */
if (loop_in->simduid
&& is_gimple_call (stmt)
&& gimple_call_internal_p (stmt)
&& gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
&& gimple_call_num_args (stmt) >= 2
&& TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
&& (loop_in->simduid
== SSA_NAME_VAR (gimple_call_arg (stmt, 0))))
{
tree arg = gimple_call_arg (stmt, 1);
if (integer_zerop (arg) || TREE_CODE (arg) == SSA_NAME)
simd_if_cond = arg;
else
gcc_assert (integer_nonzerop (arg));
}
} }
} }
} }
...@@ -1769,6 +1790,11 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, unsigned *n_stmts) ...@@ -1769,6 +1790,11 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, unsigned *n_stmts)
/* The first group of checks is independent of the vector size. */ /* The first group of checks is independent of the vector size. */
fatal = true; fatal = true;
if (LOOP_VINFO_SIMD_IF_COND (loop_vinfo)
&& integer_zerop (LOOP_VINFO_SIMD_IF_COND (loop_vinfo)))
return opt_result::failure_at (vect_location,
"not vectorized: simd if(0)\n");
/* Find all data references in the loop (which correspond to vdefs/vuses) /* Find all data references in the loop (which correspond to vdefs/vuses)
and analyze their evolution in the loop. */ and analyze their evolution in the loop. */
......
...@@ -428,6 +428,13 @@ typedef struct _loop_vec_info : public vec_info { ...@@ -428,6 +428,13 @@ typedef struct _loop_vec_info : public vec_info {
loops. */ loops. */
tree mask_compare_type; tree mask_compare_type;
/* For #pragma omp simd if (x) loops the x expression. If constant 0,
the loop should not be vectorized, if constant non-zero, simd_if_cond
shouldn't be set and loop vectorized normally, if SSA_NAME, the loop
should be versioned on that condition, using scalar loop if the condition
is false and vectorized loop otherwise. */
tree simd_if_cond;
/* Unknown DRs according to which loop was peeled. */ /* Unknown DRs according to which loop was peeled. */
struct dr_vec_info *unaligned_dr; struct dr_vec_info *unaligned_dr;
...@@ -591,6 +598,7 @@ typedef struct _loop_vec_info : public vec_info { ...@@ -591,6 +598,7 @@ typedef struct _loop_vec_info : public vec_info {
#define LOOP_VINFO_SCALAR_ITERATION_COST(L) (L)->scalar_cost_vec #define LOOP_VINFO_SCALAR_ITERATION_COST(L) (L)->scalar_cost_vec
#define LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST(L) (L)->single_scalar_iteration_cost #define LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST(L) (L)->single_scalar_iteration_cost
#define LOOP_VINFO_ORIG_LOOP_INFO(L) (L)->orig_loop_info #define LOOP_VINFO_ORIG_LOOP_INFO(L) (L)->orig_loop_info
#define LOOP_VINFO_SIMD_IF_COND(L) (L)->simd_if_cond
#define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \ #define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \
((L)->may_misalign_stmts.length () > 0) ((L)->may_misalign_stmts.length () > 0)
...@@ -600,10 +608,13 @@ typedef struct _loop_vec_info : public vec_info { ...@@ -600,10 +608,13 @@ typedef struct _loop_vec_info : public vec_info {
|| (L)->lower_bounds.length () > 0) || (L)->lower_bounds.length () > 0)
#define LOOP_REQUIRES_VERSIONING_FOR_NITERS(L) \ #define LOOP_REQUIRES_VERSIONING_FOR_NITERS(L) \
(LOOP_VINFO_NITERS_ASSUMPTIONS (L)) (LOOP_VINFO_NITERS_ASSUMPTIONS (L))
#define LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND(L) \
(LOOP_VINFO_SIMD_IF_COND (L))
#define LOOP_REQUIRES_VERSIONING(L) \ #define LOOP_REQUIRES_VERSIONING(L) \
(LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (L) \ (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (L) \
|| LOOP_REQUIRES_VERSIONING_FOR_ALIAS (L) \ || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (L) \
|| LOOP_REQUIRES_VERSIONING_FOR_NITERS (L)) || LOOP_REQUIRES_VERSIONING_FOR_NITERS (L) \
|| LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND (L))
#define LOOP_VINFO_NITERS_KNOWN_P(L) \ #define LOOP_VINFO_NITERS_KNOWN_P(L) \
(tree_fits_shwi_p ((L)->num_iters) && tree_to_shwi ((L)->num_iters) > 0) (tree_fits_shwi_p ((L)->num_iters) && tree_to_shwi ((L)->num_iters) > 0)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment