Commit 1b4dbccc by Richard Biener Committed by Richard Biener

tree-vect-loop.c (vect_analyze_loop_operations): Analyze loop-closed PHIs that…

tree-vect-loop.c (vect_analyze_loop_operations): Analyze loop-closed PHIs that are vect_internal_def.

2019-09-26  Richard Biener  <rguenther@suse.de>

	* tree-vect-loop.c (vect_analyze_loop_operations): Analyze
	loop-closed PHIs that are vect_internal_def.
	(vect_create_epilog_for_reduction): Exit early for nested cycles.
	Simplify.
	(vectorizable_lc_phi): New.
	* tree-vect-stmts.c (vect_analyze_stmt): Call vectorize_lc_phi.
	(vect_transform_stmt): Likewise.
	* tree-vectorizer.h (stmt_vec_info_type): Add lc_phi_info_type.
	(vectorizable_lc_phi): Declare.

From-SVN: r276157
parent 26cdf7bd
2019-09-26 Richard Biener <rguenther@suse.de> 2019-09-26 Richard Biener <rguenther@suse.de>
* tree-vect-loop.c (vect_analyze_loop_operations): Analyze
loop-closed PHIs that are vect_internal_def.
(vect_create_epilog_for_reduction): Exit early for nested cycles.
Simplify.
(vectorizable_lc_phi): New.
* tree-vect-stmts.c (vect_analyze_stmt): Call vectorize_lc_phi.
(vect_transform_stmt): Likewise.
* tree-vectorizer.h (stmt_vec_info_type): Add lc_phi_info_type.
(vectorizable_lc_phi): Declare.
2019-09-26 Richard Biener <rguenther@suse.de>
* tree-vect-loop.c (vect_analyze_loop_operations): Also call * tree-vect-loop.c (vect_analyze_loop_operations): Also call
vectorizable_reduction for vect_double_reduction_def. vectorizable_reduction for vect_double_reduction_def.
(vect_transform_loop): Likewise. (vect_transform_loop): Likewise.
......
...@@ -1519,12 +1519,16 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo) ...@@ -1519,12 +1519,16 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo)
phi_op = PHI_ARG_DEF (phi, 0); phi_op = PHI_ARG_DEF (phi, 0);
stmt_vec_info op_def_info = loop_vinfo->lookup_def (phi_op); stmt_vec_info op_def_info = loop_vinfo->lookup_def (phi_op);
if (!op_def_info) if (!op_def_info)
return opt_result::failure_at (phi, "unsupported phi"); return opt_result::failure_at (phi, "unsupported phi\n");
if (STMT_VINFO_RELEVANT (op_def_info) != vect_used_in_outer if (STMT_VINFO_RELEVANT (op_def_info) != vect_used_in_outer
&& (STMT_VINFO_RELEVANT (op_def_info) && (STMT_VINFO_RELEVANT (op_def_info)
!= vect_used_in_outer_by_reduction)) != vect_used_in_outer_by_reduction))
return opt_result::failure_at (phi, "unsupported phi"); return opt_result::failure_at (phi, "unsupported phi\n");
if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def
&& !vectorizable_lc_phi (stmt_info, NULL, NULL))
return opt_result::failure_at (phi, "unsupported phi\n");
} }
continue; continue;
...@@ -4396,6 +4400,10 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, ...@@ -4396,6 +4400,10 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
} }
} }
/* For vectorizing nested cycles the above is all we need to do. */
if (nested_in_vect_loop && !double_reduc)
return;
/* For cond reductions we want to create a new vector (INDEX_COND_EXPR) /* For cond reductions we want to create a new vector (INDEX_COND_EXPR)
which is updated with the current index of the loop for every match of which is updated with the current index of the loop for every match of
the original loop's cond_expr (VEC_STMT). This results in a vector the original loop's cond_expr (VEC_STMT). This results in a vector
...@@ -4588,16 +4596,6 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, ...@@ -4588,16 +4596,6 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
new_scalar_dest = vect_create_destination_var (scalar_dest, NULL); new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
bitsize = TYPE_SIZE (scalar_type); bitsize = TYPE_SIZE (scalar_type);
/* In case this is a reduction in an inner-loop while vectorizing an outer
loop - we don't need to extract a single scalar result at the end of the
inner-loop (unless it is double reduction, i.e., the use of reduction is
outside the outer-loop). The final vector of partial results will be used
in the vectorized outer-loop, or reduced to a scalar result at the end of
the outer-loop. */
if (nested_in_vect_loop && !double_reduc)
;
else
{
/* SLP reduction without reduction chain, e.g., /* SLP reduction without reduction chain, e.g.,
# a1 = phi <a2, a0> # a1 = phi <a2, a0>
# b1 = phi <b2, b0> # b1 = phi <b2, b0>
...@@ -5313,7 +5311,6 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, ...@@ -5313,7 +5311,6 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
new_phis[0] = epilog_stmt; new_phis[0] = epilog_stmt;
} }
}
if (double_reduc) if (double_reduc)
loop = loop->inner; loop = loop->inner;
...@@ -5473,7 +5470,7 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, ...@@ -5473,7 +5470,7 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
if (double_reduc) if (double_reduc)
loop = outer_loop; loop = outer_loop;
else else
continue; gcc_unreachable ();
} }
phis.create (3); phis.create (3);
...@@ -7167,6 +7164,76 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, ...@@ -7167,6 +7164,76 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
return true; return true;
} }
/* Vectorizes LC PHIs of nested cycles (sofar). */
bool
vectorizable_lc_phi (stmt_vec_info stmt_info, stmt_vec_info *vec_stmt,
slp_tree slp_node)
{
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
if (!loop_vinfo
|| !is_a <gphi *> (stmt_info->stmt)
|| gimple_phi_num_args (stmt_info->stmt) != 1)
return false;
/* To handle the nested_cycle_def for double-reductions we have to
refactor epilogue generation more. */
if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
/* && STMT_VINFO_DEF_TYPE (stmt_info) != vect_double_reduction_def */)
return false;
if (!vec_stmt) /* transformation not required. */
{
STMT_VINFO_TYPE (stmt_info) = lc_phi_info_type;
return true;
}
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
tree scalar_dest = gimple_phi_result (stmt_info->stmt);
basic_block bb = gimple_bb (stmt_info->stmt);
edge e = single_pred_edge (bb);
tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
vec<tree> vec_oprnds = vNULL;
vect_get_vec_defs (gimple_phi_arg_def (stmt_info->stmt, 0), NULL_TREE,
stmt_info, &vec_oprnds, NULL, slp_node);
if (slp_node)
{
unsigned vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
gcc_assert (vec_oprnds.length () == vec_num);
for (unsigned i = 0; i < vec_num; i++)
{
/* Create the vectorized LC PHI node. */
gphi *new_phi = create_phi_node (vec_dest, bb);
add_phi_arg (new_phi, vec_oprnds[i], e, UNKNOWN_LOCATION);
stmt_vec_info new_phi_info = loop_vinfo->add_stmt (new_phi);
SLP_TREE_VEC_STMTS (slp_node).quick_push (new_phi_info);
}
}
else
{
unsigned ncopies = vect_get_num_copies (loop_vinfo, vectype);
stmt_vec_info prev_phi_info = NULL;
for (unsigned i = 0; i < ncopies; i++)
{
if (i != 0)
vect_get_vec_defs_for_stmt_copy (loop_vinfo, &vec_oprnds, NULL);
/* Create the vectorized LC PHI node. */
gphi *new_phi = create_phi_node (vec_dest, bb);
add_phi_arg (new_phi, vec_oprnds[0], e, UNKNOWN_LOCATION);
stmt_vec_info new_phi_info = loop_vinfo->add_stmt (new_phi);
if (i == 0)
STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_phi_info;
else
STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi_info;
prev_phi_info = new_phi_info;
}
}
vec_oprnds.release ();
return true;
}
/* Function vect_min_worthwhile_factor. /* Function vect_min_worthwhile_factor.
For a loop where we could vectorize the operation indicated by CODE, For a loop where we could vectorize the operation indicated by CODE,
...@@ -8399,7 +8466,8 @@ vect_transform_loop (loop_vec_info loop_vinfo) ...@@ -8399,7 +8466,8 @@ vect_transform_loop (loop_vec_info loop_vinfo)
if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def
|| STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
|| STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def
|| STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle) || STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
|| STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def)
&& ! PURE_SLP_STMT (stmt_info)) && ! PURE_SLP_STMT (stmt_info))
{ {
if (dump_enabled_p ()) if (dump_enabled_p ())
......
...@@ -10671,7 +10671,8 @@ vect_analyze_stmt (stmt_vec_info stmt_info, bool *need_to_vectorize, ...@@ -10671,7 +10671,8 @@ vect_analyze_stmt (stmt_vec_info stmt_info, bool *need_to_vectorize,
|| vectorizable_condition (stmt_info, NULL, NULL, false, -1, node, || vectorizable_condition (stmt_info, NULL, NULL, false, -1, node,
cost_vec) cost_vec)
|| vectorizable_comparison (stmt_info, NULL, NULL, node, || vectorizable_comparison (stmt_info, NULL, NULL, node,
cost_vec)); cost_vec)
|| vectorizable_lc_phi (stmt_info, NULL, node));
else else
{ {
if (bb_vinfo) if (bb_vinfo)
...@@ -10820,6 +10821,11 @@ vect_transform_stmt (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, ...@@ -10820,6 +10821,11 @@ vect_transform_stmt (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
gcc_assert (done); gcc_assert (done);
break; break;
case lc_phi_info_type:
done = vectorizable_lc_phi (stmt_info, &vec_stmt, slp_node);
gcc_assert (done);
break;
default: default:
if (!STMT_VINFO_LIVE_P (stmt_info)) if (!STMT_VINFO_LIVE_P (stmt_info))
{ {
......
...@@ -694,6 +694,7 @@ enum stmt_vec_info_type { ...@@ -694,6 +694,7 @@ enum stmt_vec_info_type {
type_promotion_vec_info_type, type_promotion_vec_info_type,
type_demotion_vec_info_type, type_demotion_vec_info_type,
type_conversion_vec_info_type, type_conversion_vec_info_type,
lc_phi_info_type,
loop_exit_ctrl_vec_info_type loop_exit_ctrl_vec_info_type
}; };
...@@ -1653,6 +1654,7 @@ extern bool vectorizable_reduction (stmt_vec_info, gimple_stmt_iterator *, ...@@ -1653,6 +1654,7 @@ extern bool vectorizable_reduction (stmt_vec_info, gimple_stmt_iterator *,
extern bool vectorizable_induction (stmt_vec_info, gimple_stmt_iterator *, extern bool vectorizable_induction (stmt_vec_info, gimple_stmt_iterator *,
stmt_vec_info *, slp_tree, stmt_vec_info *, slp_tree,
stmt_vector_for_cost *); stmt_vector_for_cost *);
extern bool vectorizable_lc_phi (stmt_vec_info, stmt_vec_info *, slp_tree);
extern bool vect_worthwhile_without_simd_p (vec_info *, tree_code); extern bool vect_worthwhile_without_simd_p (vec_info *, tree_code);
extern int vect_get_known_peeling_cost (loop_vec_info, int, int *, extern int vect_get_known_peeling_cost (loop_vec_info, int, int *,
stmt_vector_for_cost *, stmt_vector_for_cost *,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment