Commit 9f4d9a36 by Richard Biener Committed by Richard Biener

tree-vectorizer.h (vect_transform_reduction): Declare.

2019-10-02  Richard Biener  <rguenther@suse.de>

	* tree-vectorizer.h (vect_transform_reduction): Declare.
	* tree-vect-stmts.c (vect_transform_stmt): Use it.
	* tree-vect-loop.c (vectorizable_reduction): Split out reduction
	stmt transform to ...
	(vect_transform_reduction): ... this.

From-SVN: r276452
parent ef4add8e
2019-10-02 Richard Biener <rguenther@suse.de>
* tree-vectorizer.h (vect_transform_reduction): Declare.
* tree-vect-stmts.c (vect_transform_stmt): Use it.
* tree-vect-loop.c (vectorizable_reduction): Split out reduction
stmt transform to ...
(vect_transform_reduction): ... this.
2019-10-02 Tobias Burnus <tobias@codesourcery.com> 2019-10-02 Tobias Burnus <tobias@codesourcery.com>
* omp-low.c (lower_omp_target): Dereference optional argument * omp-low.c (lower_omp_target): Dereference optional argument
......
...@@ -5784,7 +5784,6 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, ...@@ -5784,7 +5784,6 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
int i; int i;
int ncopies; int ncopies;
bool single_defuse_cycle = false; bool single_defuse_cycle = false;
int j;
tree ops[3]; tree ops[3];
enum vect_def_type dts[3]; enum vect_def_type dts[3];
bool nested_cycle = false, found_nested_cycle_def = false; bool nested_cycle = false, found_nested_cycle_def = false;
...@@ -6576,43 +6575,224 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, ...@@ -6576,43 +6575,224 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo); vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
bool mask_by_cond_expr = use_mask_by_cond_expr_p (code, cond_fn, vectype_in); bool mask_by_cond_expr = use_mask_by_cond_expr_p (code, cond_fn, vectype_in);
if (!vec_stmt) /* transformation not required. */ /* transformation not required. */
gcc_assert (!vec_stmt);
vect_model_reduction_cost (stmt_info, reduc_fn, ncopies, cost_vec);
if (loop_vinfo && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
{ {
vect_model_reduction_cost (stmt_info, reduc_fn, ncopies, cost_vec); if (reduction_type != FOLD_LEFT_REDUCTION
if (loop_vinfo && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)) && !mask_by_cond_expr
&& (cond_fn == IFN_LAST
|| !direct_internal_fn_supported_p (cond_fn, vectype_in,
OPTIMIZE_FOR_SPEED)))
{ {
if (reduction_type != FOLD_LEFT_REDUCTION if (dump_enabled_p ())
&& !mask_by_cond_expr dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
&& (cond_fn == IFN_LAST "can't use a fully-masked loop because no"
|| !direct_internal_fn_supported_p (cond_fn, vectype_in, " conditional operation is available.\n");
OPTIMIZE_FOR_SPEED))) LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"can't use a fully-masked loop because no"
" conditional operation is available.\n");
LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
}
else if (reduc_index == -1)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"can't use a fully-masked loop for chained"
" reductions.\n");
LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
}
else
vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
vectype_in);
} }
if (dump_enabled_p () else if (reduc_index == -1)
&& reduction_type == FOLD_LEFT_REDUCTION) {
dump_printf_loc (MSG_NOTE, vect_location, if (dump_enabled_p ())
"using an in-order (fold-left) reduction.\n"); dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type; "can't use a fully-masked loop for chained"
return true; " reductions.\n");
LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
}
else
vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
vectype_in);
}
if (dump_enabled_p ()
&& reduction_type == FOLD_LEFT_REDUCTION)
dump_printf_loc (MSG_NOTE, vect_location,
"using an in-order (fold-left) reduction.\n");
STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
return true;
}
/* Transform the definition stmt STMT_INFO of a reduction PHI backedge
value. */
bool
vect_transform_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
stmt_vec_info *vec_stmt, slp_tree slp_node)
{
tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
tree vectype_in = NULL_TREE;
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
enum tree_code code;
int op_type;
bool is_simple_use;
int i;
int ncopies;
bool single_defuse_cycle = false;
int j;
tree ops[3];
bool nested_cycle = false;
int vec_num;
if (nested_in_vect_loop_p (loop, stmt_info))
{
loop = loop->inner;
nested_cycle = true;
} }
gassign *stmt = as_a <gassign *> (stmt_info->stmt);
/* Flatten RHS. */
switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
{
case GIMPLE_BINARY_RHS:
code = gimple_assign_rhs_code (stmt);
op_type = TREE_CODE_LENGTH (code);
gcc_assert (op_type == binary_op);
ops[0] = gimple_assign_rhs1 (stmt);
ops[1] = gimple_assign_rhs2 (stmt);
break;
case GIMPLE_TERNARY_RHS:
code = gimple_assign_rhs_code (stmt);
op_type = TREE_CODE_LENGTH (code);
gcc_assert (op_type == ternary_op);
ops[0] = gimple_assign_rhs1 (stmt);
ops[1] = gimple_assign_rhs2 (stmt);
ops[2] = gimple_assign_rhs3 (stmt);
break;
case GIMPLE_UNARY_RHS:
return false;
default:
gcc_unreachable ();
}
/* All uses but the last are expected to be defined in the loop.
The last use is the reduction variable. In case of nested cycle this
assumption is not true: we use reduc_index to record the index of the
reduction variable. */
stmt_vec_info reduc_def_info
= STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info));
gcc_assert (reduc_def_info);
gphi *reduc_def_phi = as_a <gphi *> (reduc_def_info->stmt);
tree reduc_def = PHI_RESULT (reduc_def_phi);
int reduc_index = -1;
for (i = 0; i < op_type; i++)
{
/* The condition of COND_EXPR is checked in vectorizable_condition(). */
if (i == 0 && code == COND_EXPR)
continue;
stmt_vec_info def_stmt_info;
enum vect_def_type dt;
tree tem;
is_simple_use = vect_is_simple_use (ops[i], loop_vinfo, &dt, &tem,
&def_stmt_info);
gcc_assert (is_simple_use);
if (dt == vect_reduction_def
&& ops[i] == reduc_def)
{
reduc_index = i;
continue;
}
else if (tem)
{
/* To properly compute ncopies we are interested in the widest
input type in case we're looking at a widening accumulation. */
if (!vectype_in
|| (GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype_in)))
< GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (tem)))))
vectype_in = tem;
}
if (dt == vect_nested_cycle
&& ops[i] == reduc_def)
{
reduc_index = i;
}
}
if (!vectype_in)
vectype_in = vectype_out;
if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == INTEGER_INDUC_COND_REDUCTION
&& dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"condition expression based on "
"integer induction.\n");
if (slp_node)
ncopies = 1;
else
ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
vect_reduction_type reduction_type
= STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
/* In case of widenning multiplication by a constant, we update the type
of the constant to be the type of the other operand. We check that the
constant fits the type in the pattern recognition pass. */
if (code == DOT_PROD_EXPR
&& !types_compatible_p (TREE_TYPE (ops[0]), TREE_TYPE (ops[1])))
{
gcc_unreachable ();
/* No testcase for this. PR49478. */
if (TREE_CODE (ops[0]) == INTEGER_CST)
ops[0] = fold_convert (TREE_TYPE (ops[1]), ops[0]);
else if (TREE_CODE (ops[1]) == INTEGER_CST)
ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]);
}
/* In case the vectorization factor (VF) is bigger than the number
of elements that we can fit in a vectype (nunits), we have to generate
more than one vector stmt - i.e - we need to "unroll" the
vector stmt by a factor VF/nunits. For more details see documentation
in vectorizable_operation. */
/* If the reduction is used in an outer loop we need to generate
VF intermediate results, like so (e.g. for ncopies=2):
r0 = phi (init, r0)
r1 = phi (init, r1)
r0 = x0 + r0;
r1 = x1 + r1;
(i.e. we generate VF results in 2 registers).
In this case we have a separate def-use cycle for each copy, and therefore
for each copy we get the vector def for the reduction variable from the
respective phi node created for this copy.
Otherwise (the reduction is unused in the loop nest), we can combine
together intermediate results, like so (e.g. for ncopies=2):
r = phi (init, r)
r = x0 + r;
r = x1 + r;
(i.e. we generate VF/2 results in a single register).
In this case for each copy we get the vector def for the reduction variable
from the vectorized reduction operation generated in the previous iteration.
This only works when we see both the reduction PHI and its only consumer
in vectorizable_reduction and there are no intermediate stmts
participating. */
stmt_vec_info use_stmt_info;
tree reduc_phi_result = gimple_phi_result (reduc_def_phi);
if (ncopies > 1
&& (STMT_VINFO_RELEVANT (stmt_info) <= vect_used_only_live)
&& (use_stmt_info = loop_vinfo->lookup_single_use (reduc_phi_result))
&& (!STMT_VINFO_IN_PATTERN_P (use_stmt_info)
|| !STMT_VINFO_PATTERN_DEF_SEQ (use_stmt_info))
&& vect_stmt_to_vectorize (use_stmt_info) == stmt_info)
single_defuse_cycle = true;
if (slp_node)
vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
else
vec_num = 1;
internal_fn cond_fn = get_conditional_internal_fn (code);
vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
bool mask_by_cond_expr = use_mask_by_cond_expr_p (code, cond_fn, vectype_in);
/* Transform. */ /* Transform. */
stmt_vec_info new_stmt_info = NULL; stmt_vec_info new_stmt_info = NULL;
...@@ -6633,9 +6813,12 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, ...@@ -6633,9 +6813,12 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo); bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
if (reduction_type == FOLD_LEFT_REDUCTION) if (reduction_type == FOLD_LEFT_REDUCTION)
return vectorize_fold_left_reduction {
(stmt_info, gsi, vec_stmt, slp_node, reduc_def_phi, code, internal_fn reduc_fn = STMT_VINFO_REDUC_FN (stmt_info);
reduc_fn, ops, vectype_in, reduc_index, masks); return vectorize_fold_left_reduction
(stmt_info, gsi, vec_stmt, slp_node, reduc_def_phi, code,
reduc_fn, ops, vectype_in, reduc_index, masks);
}
if (reduction_type == EXTRACT_LAST_REDUCTION) if (reduction_type == EXTRACT_LAST_REDUCTION)
{ {
...@@ -6645,6 +6828,7 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, ...@@ -6645,6 +6828,7 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
} }
/* Create the destination vector */ /* Create the destination vector */
tree scalar_dest = gimple_assign_lhs (stmt);
tree vec_dest = vect_create_destination_var (scalar_dest, vectype_out); tree vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
prev_stmt_info = NULL; prev_stmt_info = NULL;
......
...@@ -10819,8 +10819,7 @@ vect_transform_stmt (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, ...@@ -10819,8 +10819,7 @@ vect_transform_stmt (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
break; break;
case reduc_vec_info_type: case reduc_vec_info_type:
done = vectorizable_reduction (stmt_info, gsi, &vec_stmt, slp_node, done = vect_transform_reduction (stmt_info, gsi, &vec_stmt, slp_node);
slp_node_instance, NULL);
gcc_assert (done); gcc_assert (done);
break; break;
......
...@@ -1659,6 +1659,8 @@ extern bool vectorizable_reduction (stmt_vec_info, gimple_stmt_iterator *, ...@@ -1659,6 +1659,8 @@ extern bool vectorizable_reduction (stmt_vec_info, gimple_stmt_iterator *,
extern bool vectorizable_induction (stmt_vec_info, gimple_stmt_iterator *, extern bool vectorizable_induction (stmt_vec_info, gimple_stmt_iterator *,
stmt_vec_info *, slp_tree, stmt_vec_info *, slp_tree,
stmt_vector_for_cost *); stmt_vector_for_cost *);
extern bool vect_transform_reduction (stmt_vec_info, gimple_stmt_iterator *,
stmt_vec_info *, slp_tree);
extern bool vect_transform_cycle_phi (stmt_vec_info, stmt_vec_info *, extern bool vect_transform_cycle_phi (stmt_vec_info, stmt_vec_info *,
slp_tree, slp_instance); slp_tree, slp_instance);
extern bool vectorizable_lc_phi (stmt_vec_info, stmt_vec_info *, slp_tree); extern bool vectorizable_lc_phi (stmt_vec_info, stmt_vec_info *, slp_tree);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment