Commit b010117a by Ira Rosen Committed by Ira Rosen

re PR tree-optimization/41881 (Complete unrolling (inner) versus vectorization of reduction)



        PR tree-optimization/41881
        * tree-vectorizer.h (struct _loop_vec_info): Add new field
        reduction_chains along with a macro for its access.
        * tree-vect-loop.c (new_loop_vec_info): Initialize
        reduction chains.
        (destroy_loop_vec_info): Free reduction chains.
        (vect_analyze_loop_2): Return false if
        vect_analyze_slp() returns false.
        (vect_is_slp_reduction): New function.
        (vect_is_simple_reduction_1): Call
        vect_is_slp_reduction.
        (vect_create_epilog_for_reduction): Support SLP
        reduction chains.
        * tree-vect-slp.c (vect_get_and_check_slp_defs): Allow
        different definition types for reduction chains.
        (vect_supported_load_permutation_p): Don't allow
        permutations for reduction chains.
        (vect_analyze_slp_instance): Support reduction chains.
        (vect_analyze_slp): Try to build SLP instance from
        reduction chains.
        (vect_get_constant_vectors):  Handle reduction chains.
        (vect_schedule_slp_instance): Mark the first statement
        of the reduction chain as reduction.

From-SVN: r173856
parent e14c1050
2011-05-18 Ira Rosen <ira.rosen@linaro.org>
PR tree-optimization/41881
* tree-vectorizer.h (struct _loop_vec_info): Add new field
reduction_chains along with a macro for its access.
* tree-vect-loop.c (new_loop_vec_info): Initialize reduction chains.
(destroy_loop_vec_info): Free reduction chains.
(vect_analyze_loop_2): Return false if vect_analyze_slp() returns false.
(vect_is_slp_reduction): New function.
(vect_is_simple_reduction_1): Call vect_is_slp_reduction.
(vect_create_epilog_for_reduction): Support SLP reduction chains.
* tree-vect-slp.c (vect_get_and_check_slp_defs): Allow different
definition types for reduction chains.
(vect_supported_load_permutation_p): Don't allow permutations for
reduction chains.
(vect_analyze_slp_instance): Support reduction chains.
(vect_analyze_slp): Try to build SLP instance from reduction chains.
(vect_get_constant_vectors): Handle reduction chains.
(vect_schedule_slp_instance): Mark the first statement of the
reduction chain as reduction.
2011-05-18 Ira Rosen <ira.rosen@linaro.org>
* tree-vect-loop-manip.c (vect_create_cond_for_alias_checks): Use new
names for group elements access.
* tree-vectorizer.h (struct _stmt_vec_info): Use interleaving info for
......
2011-05-18 Ira Rosen <ira.rosen@linaro.org>
PR tree-optimization/41881
* gcc.dg/vect/O3-pr41881.c: New test.
* gcc.dg/vect/O3-slp-reduc-10.c: New test.
2011-05-18 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/49000
......
/* { dg-do compile } */
#define TYPE int
TYPE fun1(TYPE *x, TYPE *y, unsigned int n)
{
int i, j;
TYPE dot = 0;
for (i = 0; i < n; i++)
dot += *(x++) * *(y++);
return dot;
}
TYPE fun2(TYPE *x, TYPE *y, unsigned int n)
{
int i, j;
TYPE dot = 0;
for (i = 0; i < n / 8; i++)
for (j = 0; j < 8; j++)
dot += *(x++) * *(y++);
return dot;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target { vect_int_mult && {! vect_no_align } } } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 128
#define TYPE int
#define RESULT 755918
__attribute__ ((noinline)) TYPE fun2 (TYPE *x, TYPE *y, unsigned int n)
{
int i, j;
TYPE dot = 14;
for (i = 0; i < n / 2; i++)
for (j = 0; j < 2; j++)
dot += *(x++) * *(y++);
return dot;
}
int main (void)
{
TYPE a[N], b[N], dot;
int i;
check_vect ();
for (i = 0; i < N; i++)
{
a[i] = i;
b[i] = i+8;
}
dot = fun2 (a, b, N);
if (dot != RESULT)
abort();
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target { vect_int_mult && {! vect_no_align } } } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
......@@ -244,14 +244,21 @@ vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
else
{
/* Not first stmt of the group, check that the def-stmt/s match
the def-stmt/s of the first stmt. */
the def-stmt/s of the first stmt. Allow different definition
types for reduction chains: the first stmt must be a
vect_reduction_def (a phi node), and the rest
vect_internal_def. */
if ((i == 0
&& (*first_stmt_dt0 != dt[i]
&& ((*first_stmt_dt0 != dt[i]
&& !(*first_stmt_dt0 == vect_reduction_def
&& dt[i] == vect_internal_def))
|| (*first_stmt_def0_type && def
&& !types_compatible_p (*first_stmt_def0_type,
TREE_TYPE (def)))))
|| (i == 1
&& (*first_stmt_dt1 != dt[i]
&& ((*first_stmt_dt1 != dt[i]
&& !(*first_stmt_dt1 == vect_reduction_def
&& dt[i] == vect_internal_def))
|| (*first_stmt_def1_type && def
&& !types_compatible_p (*first_stmt_def1_type,
TREE_TYPE (def)))))
......@@ -974,8 +981,10 @@ vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
GROUP_SIZE. */
number_of_groups = VEC_length (int, load_permutation) / group_size;
/* Reduction (there are no data-refs in the root). */
if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
/* Reduction (there are no data-refs in the root).
In reduction chain the order of the loads is important. */
if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
&& !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
{
int first_group_load_index;
......@@ -1153,10 +1162,19 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
VEC (slp_tree, heap) *loads;
struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
if (dr)
if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
{
scalar_type = TREE_TYPE (DR_REF (dr));
vectype = get_vectype_for_scalar_type (scalar_type);
if (dr)
{
scalar_type = TREE_TYPE (DR_REF (dr));
vectype = get_vectype_for_scalar_type (scalar_type);
}
else
{
gcc_assert (loop_vinfo);
vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
}
group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
}
else
......@@ -1198,7 +1216,7 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
/* Create a node (a root of the SLP tree) for the packed strided stores. */
SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
next = stmt;
if (dr)
if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
{
/* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
while (next)
......@@ -1213,14 +1231,7 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
for (i = 0; VEC_iterate (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i,
next);
i++)
{
VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
if (vect_print_dump_info (REPORT_DETAILS))
{
fprintf (vect_dump, "pushing reduction into node: ");
print_gimple_stmt (vect_dump, next, 0, TDF_SLIM);
}
}
VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
}
SLP_TREE_VEC_STMTS (node) = NULL;
......@@ -1313,8 +1324,8 @@ bool
vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
{
unsigned int i;
VEC (gimple, heap) *strided_stores, *reductions = NULL;
gimple store;
VEC (gimple, heap) *strided_stores, *reductions = NULL, *reduc_chains = NULL;
gimple first_element;
bool ok = false;
if (vect_print_dump_info (REPORT_SLP))
......@@ -1323,14 +1334,15 @@ vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
if (loop_vinfo)
{
strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
}
else
strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
/* Find SLP sequences starting from groups of strided stores. */
FOR_EACH_VEC_ELT (gimple, strided_stores, i, store)
if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, store))
FOR_EACH_VEC_ELT (gimple, strided_stores, i, first_element)
if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
ok = true;
if (bb_vinfo && !ok)
......@@ -1341,6 +1353,21 @@ vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
return false;
}
if (loop_vinfo
&& VEC_length (gimple, LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)) > 0)
{
/* Find SLP sequences starting from reduction chains. */
FOR_EACH_VEC_ELT (gimple, reduc_chains, i, first_element)
if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
ok = true;
else
return false;
/* Don't try to vectorize SLP reductions if reduction chain was
detected. */
return ok;
}
/* Find SLP sequences starting from groups of reductions. */
if (loop_vinfo && VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo)) > 1
&& vect_analyze_slp_instance (loop_vinfo, bb_vinfo,
......@@ -1972,11 +1999,17 @@ vect_get_constant_vectors (tree op, slp_tree slp_node,
gimple def_stmt = SSA_NAME_DEF_STMT (op);
gcc_assert (loop);
/* Get the def before the loop. */
op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
loop_preheader_edge (loop));
if (j != (number_of_copies - 1) && neutral_op)
/* Get the def before the loop. In reduction chain we have only
one initial value. */
if ((j != (number_of_copies - 1)
|| (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
&& i != 0))
&& neutral_op)
op = neutral_op;
else
op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
loop_preheader_edge (loop));
}
/* Create 'vect_ = {op0,op1,...,opn}'. */
......@@ -2524,6 +2557,16 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance,
si = gsi_for_stmt (last_store);
}
/* Mark the first element of the reduction chain as reduction to properly
transform the node. In the analysis phase only the last element of the
chain is marked as reduction. */
if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_STRIDED_ACCESS (stmt_info)
&& GROUP_FIRST_ELEMENT (stmt_info) == stmt)
{
STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
}
is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
return is_store;
}
......
......@@ -248,6 +248,10 @@ typedef struct _loop_vec_info {
/* Reduction cycles detected in the loop. Used in loop-aware SLP. */
VEC (gimple, heap) *reductions;
/* All reduction chains in the loop, represented by the first
stmt in the chain. */
VEC (gimple, heap) *reduction_chains;
/* Hash table used to choose the best peeling option. */
htab_t peeling_htab;
......@@ -277,6 +281,7 @@ typedef struct _loop_vec_info {
#define LOOP_VINFO_SLP_INSTANCES(L) (L)->slp_instances
#define LOOP_VINFO_SLP_UNROLLING_FACTOR(L) (L)->slp_unrolling_factor
#define LOOP_VINFO_REDUCTIONS(L) (L)->reductions
#define LOOP_VINFO_REDUCTION_CHAINS(L) (L)->reduction_chains
#define LOOP_VINFO_PEELING_HTAB(L) (L)->peeling_htab
#define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment