Commit 0f26839a by Richard Sandiford Committed by Richard Sandiford

Add an alternative vector loop iv mechanism

Normally we adjust the vector loop so that it iterates:

   (original number of scalar iterations - number of peels) / VF

times, enforcing this using an IV that starts at zero and increments
by one each iteration.  However, dividing by VF would be expensive
for variable VF, so this patch adds an alternative in which the IV
increments by VF each iteration instead.  We then need to take care
to handle possible overflow in the IV.

The new mechanism isn't used yet; a later patch replaces the
"if (1)" with a check for variable VF.

2018-01-03  Richard Sandiford  <richard.sandiford@linaro.org>

gcc/
	* tree-vect-loop-manip.c: Include gimple-fold.h.
	(slpeel_make_loop_iterate_ntimes): Add step, final_iv and
	niters_maybe_zero parameters.  Handle other cases besides a step of 1.
	(vect_gen_vector_loop_niters): Add a step_vector_ptr parameter.
	Add a path that uses a step of VF instead of 1, but disable it
	for now.
	(vect_do_peeling): Add step_vector, niters_vector_mult_vf_var
	and niters_no_overflow parameters.  Update calls to
	slpeel_make_loop_iterate_ntimes and vect_gen_vector_loop_niters.
	Create a new SSA name if the latter choses to use a ste other
	than zero, and return it via niters_vector_mult_vf_var.
	* tree-vect-loop.c (vect_transform_loop): Update calls to
	vect_do_peeling, vect_gen_vector_loop_niters and
	slpeel_make_loop_iterate_ntimes.
	* tree-vectorizer.h (slpeel_make_loop_iterate_ntimes, vect_do_peeling)
	(vect_gen_vector_loop_niters): Update declarations after above changes.

From-SVN: r256124
parent e50ffab3
2018-01-03 Richard Sandiford <richard.sandiford@linaro.org>
* tree-vect-loop-manip.c: Include gimple-fold.h.
(slpeel_make_loop_iterate_ntimes): Add step, final_iv and
niters_maybe_zero parameters. Handle other cases besides a step of 1.
(vect_gen_vector_loop_niters): Add a step_vector_ptr parameter.
Add a path that uses a step of VF instead of 1, but disable it
for now.
(vect_do_peeling): Add step_vector, niters_vector_mult_vf_var
and niters_no_overflow parameters. Update calls to
slpeel_make_loop_iterate_ntimes and vect_gen_vector_loop_niters.
Create a new SSA name if the latter choses to use a ste other
than zero, and return it via niters_vector_mult_vf_var.
* tree-vect-loop.c (vect_transform_loop): Update calls to
vect_do_peeling, vect_gen_vector_loop_niters and
slpeel_make_loop_iterate_ntimes.
* tree-vectorizer.h (slpeel_make_loop_iterate_ntimes, vect_do_peeling)
(vect_gen_vector_loop_niters): Update declarations after above changes.
2018-01-02 Michael Meissner <meissner@linux.vnet.ibm.com> 2018-01-02 Michael Meissner <meissner@linux.vnet.ibm.com>
* config/rs6000/rs6000.md (floor<mode>2): Add support for IEEE * config/rs6000/rs6000.md (floor<mode>2): Add support for IEEE
...@@ -7334,7 +7334,9 @@ vect_transform_loop (loop_vec_info loop_vinfo) ...@@ -7334,7 +7334,9 @@ vect_transform_loop (loop_vec_info loop_vinfo)
basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
int nbbs = loop->num_nodes; int nbbs = loop->num_nodes;
int i; int i;
tree niters_vector = NULL; tree niters_vector = NULL_TREE;
tree step_vector = NULL_TREE;
tree niters_vector_mult_vf = NULL_TREE;
int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
bool grouped_store; bool grouped_store;
bool slp_scheduled = false; bool slp_scheduled = false;
...@@ -7413,17 +7415,21 @@ vect_transform_loop (loop_vec_info loop_vinfo) ...@@ -7413,17 +7415,21 @@ vect_transform_loop (loop_vec_info loop_vinfo)
LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = niters; LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = niters;
tree nitersm1 = unshare_expr (LOOP_VINFO_NITERSM1 (loop_vinfo)); tree nitersm1 = unshare_expr (LOOP_VINFO_NITERSM1 (loop_vinfo));
bool niters_no_overflow = loop_niters_no_overflow (loop_vinfo); bool niters_no_overflow = loop_niters_no_overflow (loop_vinfo);
epilogue = vect_do_peeling (loop_vinfo, niters, nitersm1, &niters_vector, th, epilogue = vect_do_peeling (loop_vinfo, niters, nitersm1, &niters_vector,
&step_vector, &niters_vector_mult_vf, th,
check_profitability, niters_no_overflow); check_profitability, niters_no_overflow);
if (niters_vector == NULL_TREE) if (niters_vector == NULL_TREE)
{ {
if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
niters_vector {
= build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)), niters_vector
LOOP_VINFO_INT_NITERS (loop_vinfo) / vf); = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
LOOP_VINFO_INT_NITERS (loop_vinfo) / vf);
step_vector = build_one_cst (TREE_TYPE (niters));
}
else else
vect_gen_vector_loop_niters (loop_vinfo, niters, &niters_vector, vect_gen_vector_loop_niters (loop_vinfo, niters, &niters_vector,
niters_no_overflow); &step_vector, niters_no_overflow);
} }
/* 1) Make sure the loop header has exactly two entries /* 1) Make sure the loop header has exactly two entries
...@@ -7674,7 +7680,13 @@ vect_transform_loop (loop_vec_info loop_vinfo) ...@@ -7674,7 +7680,13 @@ vect_transform_loop (loop_vec_info loop_vinfo)
} /* stmts in BB */ } /* stmts in BB */
} /* BBs in loop */ } /* BBs in loop */
slpeel_make_loop_iterate_ntimes (loop, niters_vector); /* The vectorization factor is always > 1, so if we use an IV increment of 1.
a zero NITERS becomes a nonzero NITERS_VECTOR. */
if (integer_onep (step_vector))
niters_no_overflow = true;
slpeel_make_loop_iterate_ntimes (loop, niters_vector, step_vector,
niters_vector_mult_vf,
!niters_no_overflow);
scale_profile_for_vect_loop (loop, vf); scale_profile_for_vect_loop (loop, vf);
......
...@@ -1145,14 +1145,15 @@ extern source_location vect_location; ...@@ -1145,14 +1145,15 @@ extern source_location vect_location;
/* Simple loop peeling and versioning utilities for vectorizer's purposes - /* Simple loop peeling and versioning utilities for vectorizer's purposes -
in tree-vect-loop-manip.c. */ in tree-vect-loop-manip.c. */
extern void slpeel_make_loop_iterate_ntimes (struct loop *, tree); extern void slpeel_make_loop_iterate_ntimes (struct loop *, tree, tree,
tree, bool);
extern bool slpeel_can_duplicate_loop_p (const struct loop *, const_edge); extern bool slpeel_can_duplicate_loop_p (const struct loop *, const_edge);
struct loop *slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *, struct loop *slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *,
struct loop *, edge); struct loop *, edge);
extern void vect_loop_versioning (loop_vec_info, unsigned int, bool, extern void vect_loop_versioning (loop_vec_info, unsigned int, bool,
poly_uint64); poly_uint64);
extern struct loop *vect_do_peeling (loop_vec_info, tree, tree, extern struct loop *vect_do_peeling (loop_vec_info, tree, tree,
tree *, int, bool, bool); tree *, tree *, tree *, int, bool, bool);
extern source_location find_loop_location (struct loop *); extern source_location find_loop_location (struct loop *);
extern bool vect_can_advance_ivs_p (loop_vec_info); extern bool vect_can_advance_ivs_p (loop_vec_info);
...@@ -1269,7 +1270,8 @@ extern bool check_reduction_path (location_t, loop_p, gphi *, tree, ...@@ -1269,7 +1270,8 @@ extern bool check_reduction_path (location_t, loop_p, gphi *, tree,
/* Drive for loop analysis stage. */ /* Drive for loop analysis stage. */
extern loop_vec_info vect_analyze_loop (struct loop *, loop_vec_info); extern loop_vec_info vect_analyze_loop (struct loop *, loop_vec_info);
extern tree vect_build_loop_niters (loop_vec_info, bool * = NULL); extern tree vect_build_loop_niters (loop_vec_info, bool * = NULL);
extern void vect_gen_vector_loop_niters (loop_vec_info, tree, tree *, bool); extern void vect_gen_vector_loop_niters (loop_vec_info, tree, tree *,
tree *, bool);
/* Drive for loop transformation stage. */ /* Drive for loop transformation stage. */
extern struct loop *vect_transform_loop (loop_vec_info); extern struct loop *vect_transform_loop (loop_vec_info);
extern loop_vec_info vect_analyze_loop_form (struct loop *); extern loop_vec_info vect_analyze_loop_form (struct loop *);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment