Commit 1dd69902 by Richard Biener Committed by Richard Biener

re PR tree-optimization/87621 (outer loop auto-vectorization fails for exponentiation code)

2018-11-09  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/87621
	* tree-vect-loop.c (vectorizable_reduction): Handle reduction
	op with only phi inputs.
	* tree-ssa-loop-ch.c: Include tree-ssa-sccvn.h.
	(ch_base::copy_headers): Run CSE on copied loop headers.
	(pass_ch_vect::process_loop_p): Simplify.

	* g++.dg/vect/pr87621.cc: New testcase.

From-SVN: r265959
parent 43b01cc1
2018-11-09 Richard Biener <rguenther@suse.de>
PR tree-optimization/87621
* tree-vect-loop.c (vectorizable_reduction): Handle reduction
op with only phi inputs.
* tree-ssa-loop-ch.c: Include tree-ssa-sccvn.h.
(ch_base::copy_headers): Run CSE on copied loop headers.
(pass_ch_vect::process_loop_p): Simplify.
2018-11-09 Alexandre Oliva <oliva@adacore.com> 2018-11-09 Alexandre Oliva <oliva@adacore.com>
* config/i386/mingw32.h (LINK_SPEC_LARGE_ADDR_AWARE): Adjust * config/i386/mingw32.h (LINK_SPEC_LARGE_ADDR_AWARE): Adjust
2018-11-09 Richard Biener <rguenther@suse.de>
PR tree-optimization/87621
* g++.dg/vect/pr87621.cc: New testcase.
2018-11-09 Alexandre Oliva <aoliva@redhat.com> 2018-11-09 Alexandre Oliva <aoliva@redhat.com>
PR rtl-optimization/86438 PR rtl-optimization/86438
......
/* { dg-do compile } */
extern "C" double pow(double, double);
template <typename T>
T pow(T x, unsigned int n)
{
if (!n)
return 1;
T y = 1;
while (n > 1)
{
if (n%2)
y *= x;
x = x*x;
n /= 2;
}
return x*y;
}
void testVec(int* x)
{
for (int i = 0; i < 8; ++i)
x[i] = pow(x[i], 10);
}
/* { dg-final { scan-tree-dump "OUTER LOOP VECTORIZED" "vect" { target { vect_double && vect_hw_misalign } } } } */
...@@ -33,6 +33,7 @@ along with GCC; see the file COPYING3. If not see ...@@ -33,6 +33,7 @@ along with GCC; see the file COPYING3. If not see
#include "tree-inline.h" #include "tree-inline.h"
#include "tree-ssa-scopedtables.h" #include "tree-ssa-scopedtables.h"
#include "tree-ssa-threadedge.h" #include "tree-ssa-threadedge.h"
#include "tree-ssa-sccvn.h"
#include "params.h" #include "params.h"
/* Duplicates headers of loops if they are small enough, so that the statements /* Duplicates headers of loops if they are small enough, so that the statements
...@@ -297,12 +298,14 @@ ch_base::copy_headers (function *fun) ...@@ -297,12 +298,14 @@ ch_base::copy_headers (function *fun)
bool changed = false; bool changed = false;
if (number_of_loops (fun) <= 1) if (number_of_loops (fun) <= 1)
return 0; return 0;
bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (fun)); bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (fun));
copied_bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (fun)); copied_bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (fun));
bbs_size = n_basic_blocks_for_fn (fun); bbs_size = n_basic_blocks_for_fn (fun);
auto_vec<std::pair<edge, loop_p> > copied;
FOR_EACH_LOOP (loop, 0) FOR_EACH_LOOP (loop, 0)
{ {
int initial_limit = PARAM_VALUE (PARAM_MAX_LOOP_HEADER_INSNS); int initial_limit = PARAM_VALUE (PARAM_MAX_LOOP_HEADER_INSNS);
...@@ -371,6 +374,7 @@ ch_base::copy_headers (function *fun) ...@@ -371,6 +374,7 @@ ch_base::copy_headers (function *fun)
fprintf (dump_file, "Duplication failed.\n"); fprintf (dump_file, "Duplication failed.\n");
continue; continue;
} }
copied.safe_push (std::make_pair (entry, loop));
/* If the loop has the form "for (i = j; i < j + 10; i++)" then /* If the loop has the form "for (i = j; i < j + 10; i++)" then
this copying can introduce a case where we rely on undefined this copying can introduce a case where we rely on undefined
...@@ -422,7 +426,28 @@ ch_base::copy_headers (function *fun) ...@@ -422,7 +426,28 @@ ch_base::copy_headers (function *fun)
} }
if (changed) if (changed)
update_ssa (TODO_update_ssa); {
update_ssa (TODO_update_ssa);
/* After updating SSA form perform CSE on the loop header
copies. This is esp. required for the pass before
vectorization since nothing cleans up copied exit tests
that can now be simplified. CSE from the entry of the
region we copied till all loop exit blocks but not
entering the loop itself. */
for (unsigned i = 0; i < copied.length (); ++i)
{
edge entry = copied[i].first;
loop_p loop = copied[i].second;
vec<edge> exit_edges = get_loop_exit_edges (loop);
bitmap exit_bbs = BITMAP_ALLOC (NULL);
for (unsigned j = 0; j < exit_edges.length (); ++j)
bitmap_set_bit (exit_bbs, exit_edges[j]->dest->index);
bitmap_set_bit (exit_bbs, loop->header->index);
do_rpo_vn (cfun, entry, exit_bbs);
BITMAP_FREE (exit_bbs);
exit_edges.release ();
}
}
free (bbs); free (bbs);
free (copied_bbs); free (copied_bbs);
...@@ -473,24 +498,13 @@ pass_ch_vect::process_loop_p (struct loop *loop) ...@@ -473,24 +498,13 @@ pass_ch_vect::process_loop_p (struct loop *loop)
if (loop->dont_vectorize) if (loop->dont_vectorize)
return false; return false;
if (!do_while_loop_p (loop)) /* The vectorizer won't handle anything with multiple exits, so skip. */
return true;
/* The vectorizer won't handle anything with multiple exits, so skip. */
edge exit = single_exit (loop); edge exit = single_exit (loop);
if (!exit) if (!exit)
return false; return false;
/* Copy headers iff there looks to be code in the loop after the exit block, if (!do_while_loop_p (loop))
i.e. the exit block has an edge to another block (besides the latch, return true;
which should be empty). */
edge_iterator ei;
edge e;
FOR_EACH_EDGE (e, ei, exit->src->succs)
if (!loop_exit_edge_p (loop, e)
&& e->dest != loop->header
&& e->dest != loop->latch)
return true;
return false; return false;
} }
......
...@@ -6075,6 +6075,10 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, ...@@ -6075,6 +6075,10 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op)); vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op));
break; break;
} }
/* For a nested cycle we might end up with an operation like
phi_result * phi_result. */
if (!vectype_in)
vectype_in = STMT_VINFO_VECTYPE (stmt_info);
gcc_assert (vectype_in); gcc_assert (vectype_in);
if (slp_node) if (slp_node)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment