Commit a6c51a12 by Yuri Rumyantsev Committed by Ilya Enkovich

tree-vect-loop-manip.c (rename_variables_in_bb): Add argument to allow renaming…

tree-vect-loop-manip.c (rename_variables_in_bb): Add argument to allow renaming of PHI arguments on edges incoming from outer...

gcc/

	* tree-vect-loop-manip.c (rename_variables_in_bb): Add argument
	to allow renaming of PHI arguments on edges incoming from outer
	loop header, add corresponding check before start PHI iterator.
	(slpeel_tree_duplicate_loop_to_edge_cfg): Introduce new bool
	variable DUPLICATE_OUTER_LOOP and set it to true for outer loops
	with true force_vectorize.  Set-up dominator for outer loop too.
	Pass DUPLICATE_OUTER_LOOP as argument to rename_variables_in_bb.
	(slpeel_can_duplicate_loop_p): Allow duplicate of outer loop if it
	was marked with force_vectorize and has restricted cfg.
	(slpeel_tree_peel_loop_to_edge): Do not rename exit PHI uses in
	inner loop.
	* tree-vect-data-refs.c (vect_enhance_data_refs_alignment): Do not
	do peeling for outer loops.

gcc/testsuite/

	* gcc.dg/vect/vect-outer-simd-2.c: New test.

From-SVN: r225923
parent 54b710be
2015-07-17 Yuri Rumyantsev <ysrumyan@gmail.com>
* tree-vect-loop-manip.c (rename_variables_in_bb): Add argument
to allow renaming of PHI arguments on edges incoming from outer
loop header, add corresponding check before start PHI iterator.
(slpeel_tree_duplicate_loop_to_edge_cfg): Introduce new bool
variable DUPLICATE_OUTER_LOOP and set it to true for outer loops
with true force_vectorize. Set-up dominator for outer loop too.
Pass DUPLICATE_OUTER_LOOP as argument to rename_variables_in_bb.
(slpeel_can_duplicate_loop_p): Allow duplicate of outer loop if it
was marked with force_vectorize and has restricted cfg.
(slpeel_tree_peel_loop_to_edge): Do not rename exit PHI uses in
inner loop.
* tree-vect-data-refs.c (vect_enhance_data_refs_alignment): Do not
do peeling for outer loops.
2015-07-17 Yvan Roux <yvan.roux@linaro.org> 2015-07-17 Yvan Roux <yvan.roux@linaro.org>
Matthias Klose <doko@ubuntu.com> Matthias Klose <doko@ubuntu.com>
......
2015-07-17 Yuri Rumyantsev <ysrumyan@gmail.com>
* gcc.dg/vect/vect-outer-simd-2.c: New test.
2015-07-17 Uros Bizjak <ubizjak@gmail.com> 2015-07-17 Uros Bizjak <ubizjak@gmail.com>
PR target/66891 PR target/66891
......
/* { dg-require-effective-target vect_simd_clones } */
/* { dg-additional-options "-fopenmp-simd -ffast-math" } */
#include <stdlib.h>
#include "tree-vect.h"
#define N 64
float *px, *py;
float *tx, *ty;
float *x1, *z1, *t1, *t2;
static void inline bar (const float cx, float cy,
float *vx, float *vy)
{
int j;
for (j = 0; j < N; ++j)
{
const float dx = cx - px[j];
const float dy = cy - py[j];
*vx -= dx * tx[j];
*vy -= dy * ty[j];
}
}
__attribute__((noinline, noclone)) void foo1 (int n)
{
int i;
#pragma omp simd
for (i=0; i<n; i++)
bar (px[i], py[i], x1+i, z1+i);
}
__attribute__((noinline, noclone)) void foo2 (int n)
{
volatile int i;
for (i=0; i<n; i++)
bar (px[i], py[i], x1+i, z1+i);
}
int main ()
{
float *X = (float*)malloc (N * 8 * sizeof (float));
int i;
int n = N - 1;
check_vect ();
px = &X[0];
py = &X[N * 1];
tx = &X[N * 2];
ty = &X[N * 3];
x1 = &X[N * 4];
z1 = &X[N * 5];
t1 = &X[N * 6];
t2 = &X[N * 7];
for (i=0; i<N; i++)
{
px[i] = (float) (i+2);
tx[i] = (float) (i+1);
py[i] = (float) (i+4);
ty[i] = (float) (i+3);
x1[i] = z1[i] = 1.0f;
}
foo1 (n); /* vector variant. */
for (i=0; i<N;i++)
{
t1[i] = x1[i]; x1[i] = 1.0f;
t2[i] = z1[i]; z1[i] = 1.0f;
}
foo2 (n); /* scalar variant. */
for (i=0; i<N; i++)
if (x1[i] != t1[i] || z1[i] != t2[i])
abort ();
return 0;
}
/* { dg-final { scan-tree-dump "OUTER LOOP VECTORIZED" "vect" } } */
...@@ -1516,7 +1516,8 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) ...@@ -1516,7 +1516,8 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
/* Check if we can possibly peel the loop. */ /* Check if we can possibly peel the loop. */
if (!vect_can_advance_ivs_p (loop_vinfo) if (!vect_can_advance_ivs_p (loop_vinfo)
|| !slpeel_can_duplicate_loop_p (loop, single_exit (loop))) || !slpeel_can_duplicate_loop_p (loop, single_exit (loop))
|| loop->inner)
do_peeling = false; do_peeling = false;
if (do_peeling if (do_peeling
......
...@@ -77,10 +77,12 @@ rename_use_op (use_operand_p op_p) ...@@ -77,10 +77,12 @@ rename_use_op (use_operand_p op_p)
} }
/* Renames the variables in basic block BB. */ /* Renames the variables in basic block BB. Allow renaming of PHI argumnets
on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
true. */
static void static void
rename_variables_in_bb (basic_block bb) rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
{ {
gimple stmt; gimple stmt;
use_operand_p use_p; use_operand_p use_p;
...@@ -88,6 +90,13 @@ rename_variables_in_bb (basic_block bb) ...@@ -88,6 +90,13 @@ rename_variables_in_bb (basic_block bb)
edge e; edge e;
edge_iterator ei; edge_iterator ei;
struct loop *loop = bb->loop_father; struct loop *loop = bb->loop_father;
struct loop *outer_loop = NULL;
if (rename_from_outer_loop)
{
gcc_assert (loop);
outer_loop = loop_outer (loop);
}
for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
gsi_next (&gsi)) gsi_next (&gsi))
...@@ -99,7 +108,8 @@ rename_variables_in_bb (basic_block bb) ...@@ -99,7 +108,8 @@ rename_variables_in_bb (basic_block bb)
FOR_EACH_EDGE (e, ei, bb->preds) FOR_EACH_EDGE (e, ei, bb->preds)
{ {
if (!flow_bb_inside_loop_p (loop, e->src)) if (!flow_bb_inside_loop_p (loop, e->src)
&& (!rename_from_outer_loop || e->src != outer_loop->header))
continue; continue;
for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi); for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
gsi_next (&gsi)) gsi_next (&gsi))
...@@ -755,6 +765,7 @@ slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, ...@@ -755,6 +765,7 @@ slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop,
bool was_imm_dom; bool was_imm_dom;
basic_block exit_dest; basic_block exit_dest;
edge exit, new_exit; edge exit, new_exit;
bool duplicate_outer_loop = false;
exit = single_exit (loop); exit = single_exit (loop);
at_exit = (e == exit); at_exit = (e == exit);
...@@ -766,7 +777,9 @@ slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, ...@@ -766,7 +777,9 @@ slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop,
bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1); bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
get_loop_body_with_size (scalar_loop, bbs, scalar_loop->num_nodes); get_loop_body_with_size (scalar_loop, bbs, scalar_loop->num_nodes);
/* Allow duplication of outer loops. */
if (scalar_loop->inner)
duplicate_outer_loop = true;
/* Check whether duplication is possible. */ /* Check whether duplication is possible. */
if (!can_copy_bbs_p (bbs, scalar_loop->num_nodes)) if (!can_copy_bbs_p (bbs, scalar_loop->num_nodes))
{ {
...@@ -835,7 +848,7 @@ slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, ...@@ -835,7 +848,7 @@ slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop,
redirect_edge_and_branch_force (e, new_preheader); redirect_edge_and_branch_force (e, new_preheader);
flush_pending_stmts (e); flush_pending_stmts (e);
set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src); set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
if (was_imm_dom) if (was_imm_dom || duplicate_outer_loop)
set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src); set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
/* And remove the non-necessary forwarder again. Keep the other /* And remove the non-necessary forwarder again. Keep the other
...@@ -878,7 +891,7 @@ slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, ...@@ -878,7 +891,7 @@ slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop,
} }
for (unsigned i = 0; i < scalar_loop->num_nodes + 1; i++) for (unsigned i = 0; i < scalar_loop->num_nodes + 1; i++)
rename_variables_in_bb (new_bbs[i]); rename_variables_in_bb (new_bbs[i], duplicate_outer_loop);
if (scalar_loop != loop) if (scalar_loop != loop)
{ {
...@@ -960,11 +973,11 @@ slpeel_add_loop_guard (basic_block guard_bb, tree cond, ...@@ -960,11 +973,11 @@ slpeel_add_loop_guard (basic_block guard_bb, tree cond,
/* This function verifies that the following restrictions apply to LOOP: /* This function verifies that the following restrictions apply to LOOP:
(1) it is innermost (1) it consists of exactly 2 basic blocks - header, and an empty latch
(2) it consists of exactly 2 basic blocks - header, and an empty latch. for innermost loop and 5 basic blocks for outer-loop.
(3) it is single entry, single exit (2) it is single entry, single exit
(4) its exit condition is the last stmt in the header (3) its exit condition is the last stmt in the header
(5) E is the entry/exit edge of LOOP. (4) E is the entry/exit edge of LOOP.
*/ */
bool bool
...@@ -974,12 +987,12 @@ slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e) ...@@ -974,12 +987,12 @@ slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
edge entry_e = loop_preheader_edge (loop); edge entry_e = loop_preheader_edge (loop);
gcond *orig_cond = get_loop_exit_condition (loop); gcond *orig_cond = get_loop_exit_condition (loop);
gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src); gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
unsigned int num_bb = loop->inner? 5 : 2;
if (loop->inner
/* All loops have an outer scope; the only case loop->outer is NULL is for /* All loops have an outer scope; the only case loop->outer is NULL is for
the function itself. */ the function itself. */
|| !loop_outer (loop) if (!loop_outer (loop)
|| loop->num_nodes != 2 || loop->num_nodes != num_bb
|| !empty_block_p (loop->latch) || !empty_block_p (loop->latch)
|| !single_exit (loop) || !single_exit (loop)
/* Verify that new loop exit condition can be trivially modified. */ /* Verify that new loop exit condition can be trivially modified. */
...@@ -1165,6 +1178,7 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loop *scalar_loop, ...@@ -1165,6 +1178,7 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loop *scalar_loop,
int bound1, int bound2) int bound1, int bound2)
{ {
struct loop *new_loop = NULL, *first_loop, *second_loop; struct loop *new_loop = NULL, *first_loop, *second_loop;
struct loop *inner_loop = NULL;
edge skip_e; edge skip_e;
tree pre_condition = NULL_TREE; tree pre_condition = NULL_TREE;
basic_block bb_before_second_loop, bb_after_second_loop; basic_block bb_before_second_loop, bb_after_second_loop;
...@@ -1185,6 +1199,9 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loop *scalar_loop, ...@@ -1185,6 +1199,9 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loop *scalar_loop,
if (!slpeel_can_duplicate_loop_p (loop, e)) if (!slpeel_can_duplicate_loop_p (loop, e))
return NULL; return NULL;
if (loop->inner)
inner_loop = loop->inner;
/* We might have a queued need to update virtual SSA form. As we /* We might have a queued need to update virtual SSA form. As we
delete the update SSA machinery below after doing a regular delete the update SSA machinery below after doing a regular
incremental SSA update during loop copying make sure we don't incremental SSA update during loop copying make sure we don't
...@@ -1220,7 +1237,9 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loop *scalar_loop, ...@@ -1220,7 +1237,9 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loop *scalar_loop,
add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION); add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
gimple_phi_set_result (new_phi, new_vop); gimple_phi_set_result (new_phi, new_vop);
FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop) FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
if (stmt != new_phi && gimple_bb (stmt) != loop->header) if (stmt != new_phi && gimple_bb (stmt) != loop->header
/* Do not rename PHI arguments in inner-loop. */
&& (!inner_loop || gimple_bb (stmt) != inner_loop->header))
FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter) FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
SET_USE (use_p, new_vop); SET_USE (use_p, new_vop);
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment