Commit 15e693cc by Richard Biener Committed by Richard Biener

re PR tree-optimization/59058 (wrong code at -O3 on x86_64-linux-gnu (affecting gcc 4.6 to trunk))

2013-11-21  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/59058
	* tree-scalar-evolution.h (number_of_exit_cond_executions): Remove.
	* tree-scalar-evolution.c (number_of_exit_cond_executions): Likewise.
	* tree-vectorizer.h (LOOP_PEELING_FOR_ALIGNMENT): Rename to ...
	(LOOP_VINFO_PEELING_FOR_ALIGNMENT): ... this.
	(NITERS_KNOWN_P): Fold into ...
	(LOOP_VINFO_NITERS_KNOWN_P): ... this.
	(LOOP_VINFO_PEELING_FOR_NITER): Add.
	* tree-vect-loop-manip.c (vect_gen_niters_for_prolog_loop):
	Use LOOP_VINFO_PEELING_FOR_ALIGNMENT.
	(vect_do_peeling_for_alignment): Re-use precomputed niter
	instead of re-emitting it.
	* tree-vect-data-refs.c (vect_enhance_data_refs_alignment):
	Use LOOP_VINFO_PEELING_FOR_ALIGNMENT.
	* tree-vect-loop.c (vect_get_loop_niters): Use
	number_of_latch_executions.
	(new_loop_vec_info): Initialize LOOP_VINFO_PEELING_FOR_NITER.
	(vect_analyze_loop_form): Simplify.
	(vect_analyze_loop_operations): Move epilogue peeling code ...
	(vect_analyze_loop_2): ... here and adjust it to compute
	LOOP_VINFO_PEELING_FOR_NITER.
	(vect_estimate_min_profitable_iters): Use
	LOOP_VINFO_PEELING_FOR_ALIGNMENT.
	(vect_build_loop_niters): Emit on the preheader.
	(vect_generate_tmps_on_preheader): Likewise.
	(vect_transform_loop): Use LOOP_VINFO_PEELING_FOR_NITER instead
	of recomputing it.  Adjust.

From-SVN: r205217
parent b05e0233
2013-11-21 Richard Biener <rguenther@suse.de> 2013-11-21 Richard Biener <rguenther@suse.de>
PR tree-optimization/59058
* tree-scalar-evolution.h (number_of_exit_cond_executions): Remove.
* tree-scalar-evolution.c (number_of_exit_cond_executions): Likewise.
* tree-vectorizer.h (LOOP_PEELING_FOR_ALIGNMENT): Rename to ...
(LOOP_VINFO_PEELING_FOR_ALIGNMENT): ... this.
(NITERS_KNOWN_P): Fold into ...
(LOOP_VINFO_NITERS_KNOWN_P): ... this.
(LOOP_VINFO_PEELING_FOR_NITER): Add.
* tree-vect-loop-manip.c (vect_gen_niters_for_prolog_loop):
Use LOOP_VINFO_PEELING_FOR_ALIGNMENT.
(vect_do_peeling_for_alignment): Re-use precomputed niter
instead of re-emitting it.
* tree-vect-data-refs.c (vect_enhance_data_refs_alignment):
Use LOOP_VINFO_PEELING_FOR_ALIGNMENT.
* tree-vect-loop.c (vect_get_loop_niters): Use
number_of_latch_executions.
(new_loop_vec_info): Initialize LOOP_VINFO_PEELING_FOR_NITER.
(vect_analyze_loop_form): Simplify.
(vect_analyze_loop_operations): Move epilogue peeling code ...
(vect_analyze_loop_2): ... here and adjust it to compute
LOOP_VINFO_PEELING_FOR_NITER.
(vect_estimate_min_profitable_iters): Use
LOOP_VINFO_PEELING_FOR_ALIGNMENT.
(vect_build_loop_niters): Emit on the preheader.
(vect_generate_tmps_on_preheader): Likewise.
(vect_transform_loop): Use LOOP_VINFO_PEELING_FOR_NITER instead
of recomputing it. Adjust.
2013-11-21 Richard Biener <rguenther@suse.de>
* tree-vectorizer.h (LOC, UNKNOWN_LOC, EXPR_LOC, LOC_FILE, * tree-vectorizer.h (LOC, UNKNOWN_LOC, EXPR_LOC, LOC_FILE,
LOC_LINE): Remove wrappers and fix all users. LOC_LINE): Remove wrappers and fix all users.
(struct _loop_vec_info): Remove loop_line_number member. (struct _loop_vec_info): Remove loop_line_number member.
...@@ -2911,34 +2911,6 @@ number_of_latch_executions (struct loop *loop) ...@@ -2911,34 +2911,6 @@ number_of_latch_executions (struct loop *loop)
return res; return res;
} }
/* Returns the number of executions of the exit condition of LOOP,
i.e., the number by one higher than number_of_latch_executions.
Note that unlike number_of_latch_executions, this number does
not necessarily fit in the unsigned variant of the type of
the control variable -- if the number of iterations is a constant,
we return chrec_dont_know if adding one to number_of_latch_executions
overflows; however, in case the number of iterations is symbolic
expression, the caller is responsible for dealing with this
the possible overflow. */
tree
number_of_exit_cond_executions (struct loop *loop)
{
tree ret = number_of_latch_executions (loop);
tree type = chrec_type (ret);
if (chrec_contains_undetermined (ret))
return ret;
ret = chrec_fold_plus (type, ret, build_int_cst (type, 1));
if (TREE_CODE (ret) == INTEGER_CST
&& TREE_OVERFLOW (ret))
return chrec_dont_know;
return ret;
}
/* Counters for the stats. */ /* Counters for the stats. */
......
...@@ -22,7 +22,6 @@ along with GCC; see the file COPYING3. If not see ...@@ -22,7 +22,6 @@ along with GCC; see the file COPYING3. If not see
#define GCC_TREE_SCALAR_EVOLUTION_H #define GCC_TREE_SCALAR_EVOLUTION_H
extern tree number_of_latch_executions (struct loop *); extern tree number_of_latch_executions (struct loop *);
extern tree number_of_exit_cond_executions (struct loop *);
extern gimple get_loop_exit_condition (const struct loop *); extern gimple get_loop_exit_condition (const struct loop *);
extern void scev_initialize (void); extern void scev_initialize (void);
......
...@@ -1735,9 +1735,10 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) ...@@ -1735,9 +1735,10 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0; LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
if (npeel) if (npeel)
LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel; LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
else else
LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0); LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
= DR_MISALIGNMENT (dr0);
SET_DR_MISALIGNMENT (dr0, 0); SET_DR_MISALIGNMENT (dr0, 0);
if (dump_enabled_p ()) if (dump_enabled_p ())
{ {
......
...@@ -1736,16 +1736,16 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters, int ...@@ -1736,16 +1736,16 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters, int
pe = loop_preheader_edge (loop); pe = loop_preheader_edge (loop);
if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0) if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
{ {
int npeel = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo); int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
if (dump_enabled_p ()) if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location, dump_printf_loc (MSG_NOTE, vect_location,
"known peeling = %d.\n", npeel); "known peeling = %d.\n", npeel);
iters = build_int_cst (niters_type, npeel); iters = build_int_cst (niters_type, npeel);
*bound = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo); *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
} }
else else
{ {
...@@ -1876,7 +1876,6 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, tree ni_name, ...@@ -1876,7 +1876,6 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, tree ni_name,
{ {
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
tree niters_of_prolog_loop; tree niters_of_prolog_loop;
tree n_iters;
tree wide_prolog_niters; tree wide_prolog_niters;
struct loop *new_loop; struct loop *new_loop;
int max_iter; int max_iter;
...@@ -1918,9 +1917,8 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, tree ni_name, ...@@ -1918,9 +1917,8 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, tree ni_name,
"loop to %d\n", max_iter); "loop to %d\n", max_iter);
/* Update number of times loop executes. */ /* Update number of times loop executes. */
n_iters = LOOP_VINFO_NITERS (loop_vinfo);
LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR, LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop); TREE_TYPE (ni_name), ni_name, niters_of_prolog_loop);
if (types_compatible_p (sizetype, TREE_TYPE (niters_of_prolog_loop))) if (types_compatible_p (sizetype, TREE_TYPE (niters_of_prolog_loop)))
wide_prolog_niters = niters_of_prolog_loop; wide_prolog_niters = niters_of_prolog_loop;
......
...@@ -771,11 +771,12 @@ vect_analyze_scalar_cycles (loop_vec_info loop_vinfo) ...@@ -771,11 +771,12 @@ vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner); vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
} }
/* Function vect_get_loop_niters. /* Function vect_get_loop_niters.
Determine how many iterations the loop is executed. Determine how many iterations the loop is executed and place it
If an expression that represents the number of iterations in NUMBER_OF_ITERATIONS.
can be constructed, place it in NUMBER_OF_ITERATIONS.
Return the loop exit condition. */ Return the loop exit condition. */
static gimple static gimple
...@@ -786,21 +787,17 @@ vect_get_loop_niters (struct loop *loop, tree *number_of_iterations) ...@@ -786,21 +787,17 @@ vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
if (dump_enabled_p ()) if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location, dump_printf_loc (MSG_NOTE, vect_location,
"=== get_loop_niters ===\n"); "=== get_loop_niters ===\n");
niters = number_of_exit_cond_executions (loop);
if (niters != NULL_TREE niters = number_of_latch_executions (loop);
&& niters != chrec_dont_know) /* We want the number of loop header executions which is the number
{ of latch executions plus one.
??? For UINT_MAX latch executions this number overflows to zero
for loops like do { n++; } while (n != 0); */
if (niters && !chrec_contains_undetermined (niters))
niters = fold_build2 (PLUS_EXPR, TREE_TYPE (niters), niters,
build_int_cst (TREE_TYPE (niters), 1));
*number_of_iterations = niters; *number_of_iterations = niters;
if (dump_enabled_p ())
{
dump_printf_loc (MSG_NOTE, vect_location, "==> get_loop_niters:");
dump_generic_expr (MSG_NOTE, TDF_SLIM, *number_of_iterations);
dump_printf (MSG_NOTE, "\n");
}
}
return get_loop_exit_condition (loop); return get_loop_exit_condition (loop);
} }
...@@ -907,7 +904,7 @@ new_loop_vec_info (struct loop *loop) ...@@ -907,7 +904,7 @@ new_loop_vec_info (struct loop *loop)
LOOP_VINFO_NITERS_UNCHANGED (res) = NULL; LOOP_VINFO_NITERS_UNCHANGED (res) = NULL;
LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0; LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0;
LOOP_VINFO_VECTORIZABLE_P (res) = 0; LOOP_VINFO_VECTORIZABLE_P (res) = 0;
LOOP_PEELING_FOR_ALIGNMENT (res) = 0; LOOP_VINFO_PEELING_FOR_ALIGNMENT (res) = 0;
LOOP_VINFO_VECT_FACTOR (res) = 0; LOOP_VINFO_VECT_FACTOR (res) = 0;
LOOP_VINFO_LOOP_NEST (res).create (3); LOOP_VINFO_LOOP_NEST (res).create (3);
LOOP_VINFO_DATAREFS (res).create (10); LOOP_VINFO_DATAREFS (res).create (10);
...@@ -924,6 +921,7 @@ new_loop_vec_info (struct loop *loop) ...@@ -924,6 +921,7 @@ new_loop_vec_info (struct loop *loop)
LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1; LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
LOOP_VINFO_TARGET_COST_DATA (res) = init_cost (loop); LOOP_VINFO_TARGET_COST_DATA (res) = init_cost (loop);
LOOP_VINFO_PEELING_FOR_GAPS (res) = false; LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
LOOP_VINFO_PEELING_FOR_NITER (res) = false;
LOOP_VINFO_OPERANDS_SWAPPED (res) = false; LOOP_VINFO_OPERANDS_SWAPPED (res) = false;
return res; return res;
...@@ -1243,7 +1241,8 @@ vect_analyze_loop_form (struct loop *loop) ...@@ -1243,7 +1241,8 @@ vect_analyze_loop_form (struct loop *loop)
return NULL; return NULL;
} }
if (!number_of_iterations) if (!number_of_iterations
|| chrec_contains_undetermined (number_of_iterations))
{ {
if (dump_enabled_p ()) if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
...@@ -1254,17 +1253,21 @@ vect_analyze_loop_form (struct loop *loop) ...@@ -1254,17 +1253,21 @@ vect_analyze_loop_form (struct loop *loop)
return NULL; return NULL;
} }
if (chrec_contains_undetermined (number_of_iterations)) if (integer_zerop (number_of_iterations))
{ {
if (dump_enabled_p ()) if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"Infinite number of iterations.\n"); "not vectorized: number of iterations = 0.\n");
if (inner_loop_vinfo) if (inner_loop_vinfo)
destroy_loop_vec_info (inner_loop_vinfo, true); destroy_loop_vec_info (inner_loop_vinfo, true);
return NULL; return NULL;
} }
if (!NITERS_KNOWN_P (number_of_iterations)) loop_vinfo = new_loop_vec_info (loop);
LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
{ {
if (dump_enabled_p ()) if (dump_enabled_p ())
{ {
...@@ -1274,19 +1277,6 @@ vect_analyze_loop_form (struct loop *loop) ...@@ -1274,19 +1277,6 @@ vect_analyze_loop_form (struct loop *loop)
dump_printf (MSG_NOTE, "\n"); dump_printf (MSG_NOTE, "\n");
} }
} }
else if (TREE_INT_CST_LOW (number_of_iterations) == 0)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"not vectorized: number of iterations = 0.\n");
if (inner_loop_vinfo)
destroy_loop_vec_info (inner_loop_vinfo, true);
return NULL;
}
loop_vinfo = new_loop_vec_info (loop);
LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type; STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
...@@ -1588,23 +1578,6 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp) ...@@ -1588,23 +1578,6 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp)
return false; return false;
} }
if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
|| ((int) tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
< exact_log2 (vectorization_factor)))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location, "epilog loop required\n");
if (!vect_can_advance_ivs_p (loop_vinfo)
|| !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"not vectorized: can't create required "
"epilog loop\n");
return false;
}
}
return true; return true;
} }
...@@ -1760,6 +1733,40 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo) ...@@ -1760,6 +1733,40 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo)
return false; return false;
} }
/* Decide whether we need to create an epilogue loop to handle
remaining scalar iterations. */
if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
&& LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
{
if (ctz_hwi (LOOP_VINFO_INT_NITERS (loop_vinfo)
- LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo))
< exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
}
else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
|| (tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
< (unsigned)exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))))
LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
/* If an epilogue loop is required make sure we can create one. */
if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
|| LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location, "epilog loop required\n");
if (!vect_can_advance_ivs_p (loop_vinfo)
|| !slpeel_can_duplicate_loop_p (LOOP_VINFO_LOOP (loop_vinfo),
single_exit (LOOP_VINFO_LOOP
(loop_vinfo))))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"not vectorized: can't create required "
"epilog loop\n");
return false;
}
}
return true; return true;
} }
...@@ -2689,7 +2696,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo, ...@@ -2689,7 +2696,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
int scalar_single_iter_cost = 0; int scalar_single_iter_cost = 0;
int scalar_outside_cost = 0; int scalar_outside_cost = 0;
int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
int npeel = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo); int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo); void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
/* Cost model disabled. */ /* Cost model disabled. */
...@@ -2880,7 +2887,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo, ...@@ -2880,7 +2887,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
else else
{ {
/* Cost model check occurs at prologue generation. */ /* Cost model check occurs at prologue generation. */
if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0) if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken) scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken)
+ vect_get_stmt_cost (cond_branch_not_taken); + vect_get_stmt_cost (cond_branch_not_taken);
/* Cost model check occurs at epilogue generation. */ /* Cost model check occurs at epilogue generation. */
...@@ -5574,22 +5581,27 @@ vect_loop_kill_debug_uses (struct loop *loop, gimple stmt) ...@@ -5574,22 +5581,27 @@ vect_loop_kill_debug_uses (struct loop *loop, gimple stmt)
/* This function builds ni_name = number of iterations. Statements /* This function builds ni_name = number of iterations. Statements
are queued onto SEQ. */ are emitted on the loop preheader edge. */
static tree static tree
vect_build_loop_niters (loop_vec_info loop_vinfo, gimple_seq *seq) vect_build_loop_niters (loop_vec_info loop_vinfo)
{ {
tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
if (TREE_CODE (ni) == INTEGER_CST)
return ni;
else
{
tree ni_name, var; tree ni_name, var;
gimple_seq stmts = NULL; gimple_seq stmts = NULL;
tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo)); edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
var = create_tmp_var (TREE_TYPE (ni), "niters"); var = create_tmp_var (TREE_TYPE (ni), "niters");
ni_name = force_gimple_operand (ni, &stmts, false, var); ni_name = force_gimple_operand (ni, &stmts, false, var);
if (stmts) if (stmts)
gimple_seq_add_seq (seq, stmts); gsi_insert_seq_on_edge_immediate (pe, stmts);
return ni_name; return ni_name;
}
} }
...@@ -5599,22 +5611,21 @@ vect_build_loop_niters (loop_vec_info loop_vinfo, gimple_seq *seq) ...@@ -5599,22 +5611,21 @@ vect_build_loop_niters (loop_vec_info loop_vinfo, gimple_seq *seq)
ratio = ni_name / vf ratio = ni_name / vf
ratio_mult_vf_name = ratio * vf ratio_mult_vf_name = ratio * vf
and places them in COND_EXPR_STMT_LIST. */ and places them on the loop preheader edge. */
static void static void
vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo, vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
tree ni_name, tree ni_name,
tree *ratio_mult_vf_name_ptr, tree *ratio_mult_vf_name_ptr,
tree *ratio_name_ptr, tree *ratio_name_ptr)
gimple_seq *cond_expr_stmt_list)
{ {
gimple_seq stmts;
tree ni_minus_gap_name; tree ni_minus_gap_name;
tree var; tree var;
tree ratio_name; tree ratio_name;
tree ratio_mult_vf_name; tree ratio_mult_vf_name;
tree ni = LOOP_VINFO_NITERS (loop_vinfo); tree ni = LOOP_VINFO_NITERS (loop_vinfo);
int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
tree log_vf; tree log_vf;
log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf)); log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
...@@ -5630,11 +5641,10 @@ vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo, ...@@ -5630,11 +5641,10 @@ vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
if (!is_gimple_val (ni_minus_gap_name)) if (!is_gimple_val (ni_minus_gap_name))
{ {
var = create_tmp_var (TREE_TYPE (ni), "ni_gap"); var = create_tmp_var (TREE_TYPE (ni), "ni_gap");
gimple stmts = NULL;
stmts = NULL;
ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts, ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts,
true, var); true, var);
gimple_seq_add_seq (cond_expr_stmt_list, stmts); gsi_insert_seq_on_edge_immediate (pe, stmts);
} }
} }
else else
...@@ -5647,10 +5657,9 @@ vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo, ...@@ -5647,10 +5657,9 @@ vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
if (!is_gimple_val (ratio_name)) if (!is_gimple_val (ratio_name))
{ {
var = create_tmp_var (TREE_TYPE (ni), "bnd"); var = create_tmp_var (TREE_TYPE (ni), "bnd");
gimple stmts = NULL;
stmts = NULL;
ratio_name = force_gimple_operand (ratio_name, &stmts, true, var); ratio_name = force_gimple_operand (ratio_name, &stmts, true, var);
gimple_seq_add_seq (cond_expr_stmt_list, stmts); gsi_insert_seq_on_edge_immediate (pe, stmts);
} }
*ratio_name_ptr = ratio_name; *ratio_name_ptr = ratio_name;
...@@ -5663,11 +5672,10 @@ vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo, ...@@ -5663,11 +5672,10 @@ vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
if (!is_gimple_val (ratio_mult_vf_name)) if (!is_gimple_val (ratio_mult_vf_name))
{ {
var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf"); var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
gimple stmts = NULL;
stmts = NULL;
ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts, ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts,
true, var); true, var);
gimple_seq_add_seq (cond_expr_stmt_list, stmts); gsi_insert_seq_on_edge_immediate (pe, stmts);
} }
*ratio_mult_vf_name_ptr = ratio_mult_vf_name; *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
} }
...@@ -5739,20 +5747,20 @@ vect_transform_loop (loop_vec_info loop_vinfo) ...@@ -5739,20 +5747,20 @@ vect_transform_loop (loop_vec_info loop_vinfo)
check_profitability = false; check_profitability = false;
} }
tree ni_name = vect_build_loop_niters (loop_vinfo);
LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = ni_name;
/* Peel the loop if there are data refs with unknown alignment. /* Peel the loop if there are data refs with unknown alignment.
Only one data ref with unknown store is allowed. Only one data ref with unknown store is allowed. */
This clobbers LOOP_VINFO_NITERS but retains the original
in LOOP_VINFO_NITERS_UNCHANGED. So we cannot avoid re-computing
niters. */
if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo)) if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo))
{ {
gimple_seq stmts = NULL;
tree ni_name = vect_build_loop_niters (loop_vinfo, &stmts);
gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
vect_do_peeling_for_alignment (loop_vinfo, ni_name, vect_do_peeling_for_alignment (loop_vinfo, ni_name,
th, check_profitability); th, check_profitability);
check_profitability = false; check_profitability = false;
/* The above adjusts LOOP_VINFO_NITERS, so cause ni_name to
be re-computed. */
ni_name = NULL_TREE;
} }
/* If the loop has a symbolic number of iterations 'n' (i.e. it's not a /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
...@@ -5763,16 +5771,14 @@ vect_transform_loop (loop_vec_info loop_vinfo) ...@@ -5763,16 +5771,14 @@ vect_transform_loop (loop_vec_info loop_vinfo)
will remain scalar and will compute the remaining (n%VF) iterations. will remain scalar and will compute the remaining (n%VF) iterations.
(VF is the vectorization factor). */ (VF is the vectorization factor). */
if ((int) tree_ctz (LOOP_VINFO_NITERS (loop_vinfo)) if (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
< exact_log2 (vectorization_factor)
|| LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)) || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
{ {
tree ni_name, ratio_mult_vf; tree ratio_mult_vf;
gimple_seq stmts = NULL; if (!ni_name)
ni_name = vect_build_loop_niters (loop_vinfo, &stmts); ni_name = vect_build_loop_niters (loop_vinfo);
vect_generate_tmps_on_preheader (loop_vinfo, ni_name, &ratio_mult_vf, vect_generate_tmps_on_preheader (loop_vinfo, ni_name, &ratio_mult_vf,
&ratio, &stmts); &ratio);
gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
vect_do_peeling_for_loop_bound (loop_vinfo, ni_name, ratio_mult_vf, vect_do_peeling_for_loop_bound (loop_vinfo, ni_name, ratio_mult_vf,
th, check_profitability); th, check_profitability);
} }
...@@ -5781,12 +5787,9 @@ vect_transform_loop (loop_vec_info loop_vinfo) ...@@ -5781,12 +5787,9 @@ vect_transform_loop (loop_vec_info loop_vinfo)
LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor); LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
else else
{ {
tree ni_name; if (!ni_name)
gimple_seq stmts = NULL; ni_name = vect_build_loop_niters (loop_vinfo);
ni_name = vect_build_loop_niters (loop_vinfo, &stmts); vect_generate_tmps_on_preheader (loop_vinfo, ni_name, NULL, &ratio);
vect_generate_tmps_on_preheader (loop_vinfo, ni_name, NULL,
&ratio, &stmts);
gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
} }
/* 1) Make sure the loop header has exactly two entries /* 1) Make sure the loop header has exactly two entries
......
...@@ -361,7 +361,7 @@ typedef struct _loop_vec_info { ...@@ -361,7 +361,7 @@ typedef struct _loop_vec_info {
#define LOOP_VINFO_DATAREFS(L) (L)->datarefs #define LOOP_VINFO_DATAREFS(L) (L)->datarefs
#define LOOP_VINFO_DDRS(L) (L)->ddrs #define LOOP_VINFO_DDRS(L) (L)->ddrs
#define LOOP_VINFO_INT_NITERS(L) (TREE_INT_CST_LOW ((L)->num_iters)) #define LOOP_VINFO_INT_NITERS(L) (TREE_INT_CST_LOW ((L)->num_iters))
#define LOOP_PEELING_FOR_ALIGNMENT(L) (L)->peeling_for_alignment #define LOOP_VINFO_PEELING_FOR_ALIGNMENT(L) (L)->peeling_for_alignment
#define LOOP_VINFO_UNALIGNED_DR(L) (L)->unaligned_dr #define LOOP_VINFO_UNALIGNED_DR(L) (L)->unaligned_dr
#define LOOP_VINFO_MAY_MISALIGN_STMTS(L) (L)->may_misalign_stmts #define LOOP_VINFO_MAY_MISALIGN_STMTS(L) (L)->may_misalign_stmts
#define LOOP_VINFO_MAY_ALIAS_DDRS(L) (L)->may_alias_ddrs #define LOOP_VINFO_MAY_ALIAS_DDRS(L) (L)->may_alias_ddrs
...@@ -375,18 +375,15 @@ typedef struct _loop_vec_info { ...@@ -375,18 +375,15 @@ typedef struct _loop_vec_info {
#define LOOP_VINFO_TARGET_COST_DATA(L) (L)->target_cost_data #define LOOP_VINFO_TARGET_COST_DATA(L) (L)->target_cost_data
#define LOOP_VINFO_PEELING_FOR_GAPS(L) (L)->peeling_for_gaps #define LOOP_VINFO_PEELING_FOR_GAPS(L) (L)->peeling_for_gaps
#define LOOP_VINFO_OPERANDS_SWAPPED(L) (L)->operands_swapped #define LOOP_VINFO_OPERANDS_SWAPPED(L) (L)->operands_swapped
#define LOOP_VINFO_PEELING_FOR_NITER(L) (L)->peeling_for_niter
#define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \ #define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \
(L)->may_misalign_stmts.length () > 0 (L)->may_misalign_stmts.length () > 0
#define LOOP_REQUIRES_VERSIONING_FOR_ALIAS(L) \ #define LOOP_REQUIRES_VERSIONING_FOR_ALIAS(L) \
(L)->may_alias_ddrs.length () > 0 (L)->may_alias_ddrs.length () > 0
#define NITERS_KNOWN_P(n) \
(tree_fits_shwi_p ((n)) \
&& tree_to_shwi ((n)) > 0)
#define LOOP_VINFO_NITERS_KNOWN_P(L) \ #define LOOP_VINFO_NITERS_KNOWN_P(L) \
NITERS_KNOWN_P ((L)->num_iters) (tree_fits_shwi_p ((L)->num_iters) && tree_to_shwi ((L)->num_iters) > 0)
static inline loop_vec_info static inline loop_vec_info
loop_vec_info_for_loop (struct loop *loop) loop_vec_info_for_loop (struct loop *loop)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment