Commit 32ff1243 by Dorit Nuzman Committed by Dorit Nuzman

tree-vect-transform.c (vect_create_epilog_for_reduction): Takes an additional argument.

2008-08-14  Dorit Nuzman  <dorit@il.ibm.com>

        * tree-vect-transform.c (vect_create_epilog_for_reduction): Takes an
        additional argument. Support reduction when duplication is needed due
        to data-types of different sizes in the loop.
        (get_initial_def_for_induction): Fix printout.
        (vect_get_vec_def_for_stmt_copy): Support case where the
        vec_stmt_for_operand is a phi node.
        (vectorizable_reduction): Support reduction when duplication is needed
        due to data-types of different sizes in the loop.
        (vectorizable_call): Remove restriction to not vectorize in case we
        have data-types of different sizes in the loop.
        (vectorizable_conversion): Likewise.
        (vectorizable_operation): Likewise.
        (vectorizable_type_demotion): Likewise.
        (vectorizable_type_promotion): Likewise.
        (vectorizable_induction): Add restriction to not vectorize in case
        we have data-types of different sizes in the loop.

From-SVN: r139096
parent ed6fa129
2008-08-14 Dorit Nuzman <dorit@il.ibm.com>
* tree-vect-transform.c (vect_create_epilog_for_reduction): Takes an
additional argument. Support reduction when duplication is needed due
to data-types of different sizes in the loop.
(get_initial_def_for_induction): Fix printout.
(vect_get_vec_def_for_stmt_copy): Support case where the
vec_stmt_for_operand is a phi node.
(vectorizable_reduction): Support reduction when duplication is needed
due to data-types of different sizes in the loop.
(vectorizable_call): Remove restriction to not vectorize in case we
have data-types of different sizes in the loop.
(vectorizable_conversion): Likewise.
(vectorizable_operation): Likewise.
(vectorizable_type_demotion): Likewise.
(vectorizable_type_promotion): Likewise.
(vectorizable_induction): Add restriction to not vectorize in case
we have data-types of different sizes in the loop.
2008-08-14 Christophe Saout <christophe@saout.de> 2008-08-14 Christophe Saout <christophe@saout.de>
Uros Bizjak <ubizjak@gmail.com> Uros Bizjak <ubizjak@gmail.com>
......
2008-08-14 Dorit Nuzman <dorit@il.ibm.com>
* gcc.dg/vect/vect-outer-4g.c: Change loop bound.
* gcc.dg/vect/vect-outer-4k.c: Likewise.
* gcc.dg/vect/vect-outer-4l.c: Likewise.
* gcc.dg/vect/vect-outer-4f.c: Likewise.
* gcc.dg/vect/vect-outer-4a.c: Vectorizable. Remove obsolete comment.
* gcc.dg/vect/vect-outer-4i.c: Likewise.
* gcc.dg/vect/vect-outer-4b.c: Likewise.
* gcc.dg/vect/vect-outer-4j.c: Likewise.
2008-08-14 Uros Bizjak <ubizjak@gmail.com> 2008-08-14 Uros Bizjak <ubizjak@gmail.com>
PR target/37101 PR target/37101
......
...@@ -6,8 +6,7 @@ signed short in[N+M]; ...@@ -6,8 +6,7 @@ signed short in[N+M];
signed short coeff[M]; signed short coeff[M];
signed short out[N]; signed short out[N];
/* Outer-loop vectorization. /* Outer-loop vectorization. */
Currently not vectorized because of multiple-data-types in the inner-loop. */
void void
foo (){ foo (){
...@@ -23,9 +22,5 @@ foo (){ ...@@ -23,9 +22,5 @@ foo (){
} }
} }
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */ /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { target { vect_widen_mult_hi_to_si && vect_pack_trunc } } } } */
/* FORNOW. not vectorized until we support 0-stride acceses like coeff[j]. should be:
{ scan-tree-dump-not "multiple types in nested loop." "vect" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times "zero step in outer loop." 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -6,8 +6,7 @@ signed short in[N+M]; ...@@ -6,8 +6,7 @@ signed short in[N+M];
signed short coeff[M]; signed short coeff[M];
int out[N]; int out[N];
/* Outer-loop vectorization. /* Outer-loop vectorization. */
Currently not vectorized because of multiple-data-types in the inner-loop. */
void void
foo (){ foo (){
...@@ -23,9 +22,5 @@ foo (){ ...@@ -23,9 +22,5 @@ foo (){
} }
} }
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */ /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { target vect_widen_mult_hi_to_si } } } */
/* FORNOW. not vectorized until we support 0-stride acceses like coeff[j]. should be:
{ scan-tree-dump-not "multiple types in nested loop." "vect" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times "zero step in outer loop." 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
#include <stdarg.h> #include <stdarg.h>
#include "tree-vect.h" #include "tree-vect.h"
#define N 40 #define N 96
#define M 128 #define M 128
unsigned short in[N+M]; unsigned short in[N+M];
unsigned int out[N]; unsigned int out[N];
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
#include <stdarg.h> #include <stdarg.h>
#include "tree-vect.h" #include "tree-vect.h"
#define N 40 #define N 96
#define M 128 #define M 128
unsigned short in[N+M]; unsigned short in[N+M];
unsigned int out[N]; unsigned int out[N];
......
/* { dg-do compile } */ /* { dg-require-effective-target vect_int } */
#define N 40 #include <stdarg.h>
#include "tree-vect.h"
#define N 96
#define M 128 #define M 128
unsigned char in[N+M]; unsigned char in[N+M];
unsigned short out[N]; unsigned short out[N];
/* Outer-loop vectorization. */ /* Outer-loop vectorization. */
/* Not vectorized due to multiple-types in the inner-loop. */ /* Multiple-types in the inner-loop. */
__attribute__ ((noinline))
unsigned short unsigned short
foo (){ foo (){
int i,j; int i,j;
...@@ -24,5 +28,22 @@ foo (){ ...@@ -24,5 +28,22 @@ foo (){
return s; return s;
} }
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */ int main (void)
{
check_vect ();
int i;
unsigned short s;
for (i = 0; i < N+M; i++)
in[i] = (unsigned char)i;
s = foo ();
if (s != 34048)
abort ();
return 0;
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { target vect_unpack } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-do compile } */ /* { dg-do compile } */
#define N 40 #define N 96
#define M 128 #define M 128
unsigned char in[N+M]; unsigned char in[N+M];
unsigned short out[N]; unsigned short out[N];
/* Outer-loop vectorization. */ /* Outer-loop vectorization. */
/* Not vectorized due to multiple-types in the inner-loop. */
void void
foo (){ foo (){
...@@ -22,5 +21,5 @@ foo (){ ...@@ -22,5 +21,5 @@ foo (){
} }
} }
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */ /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { target vect_unpack } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
#include <stdarg.h> #include <stdarg.h>
#include "tree-vect.h" #include "tree-vect.h"
#define N 40 #define N 96
#define M 128 #define M 128
unsigned short in[N+M]; unsigned short in[N+M];
unsigned int out[N]; unsigned int out[N];
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
#include <stdarg.h> #include <stdarg.h>
#include "tree-vect.h" #include "tree-vect.h"
#define N 40 #define N 96
#define M 128 #define M 128
unsigned short in[N+M]; unsigned short in[N+M];
unsigned int out[N]; unsigned int out[N];
......
...@@ -59,8 +59,8 @@ static tree vect_init_vector (gimple, tree, tree, gimple_stmt_iterator *); ...@@ -59,8 +59,8 @@ static tree vect_init_vector (gimple, tree, tree, gimple_stmt_iterator *);
static void vect_finish_stmt_generation static void vect_finish_stmt_generation
(gimple stmt, gimple vec_stmt, gimple_stmt_iterator *); (gimple stmt, gimple vec_stmt, gimple_stmt_iterator *);
static bool vect_is_simple_cond (tree, loop_vec_info); static bool vect_is_simple_cond (tree, loop_vec_info);
static void vect_create_epilog_for_reduction (tree, gimple, enum tree_code, static void vect_create_epilog_for_reduction
gimple); (tree, gimple, int, enum tree_code, gimple);
static tree get_initial_def_for_reduction (gimple, tree, tree *); static tree get_initial_def_for_reduction (gimple, tree, tree *);
/* Utility function dealing with loop peeling (not peeling itself). */ /* Utility function dealing with loop peeling (not peeling itself). */
...@@ -1824,7 +1824,7 @@ get_initial_def_for_induction (gimple iv_phi) ...@@ -1824,7 +1824,7 @@ get_initial_def_for_induction (gimple iv_phi)
if (vect_print_dump_info (REPORT_DETAILS)) if (vect_print_dump_info (REPORT_DETAILS))
{ {
fprintf (vect_dump, "transform induction: created def-use cycle:"); fprintf (vect_dump, "transform induction: created def-use cycle: ");
print_gimple_stmt (vect_dump, induction_phi, 0, TDF_SLIM); print_gimple_stmt (vect_dump, induction_phi, 0, TDF_SLIM);
fprintf (vect_dump, "\n"); fprintf (vect_dump, "\n");
print_gimple_stmt (vect_dump, SSA_NAME_DEF_STMT (vec_def), 0, TDF_SLIM); print_gimple_stmt (vect_dump, SSA_NAME_DEF_STMT (vec_def), 0, TDF_SLIM);
...@@ -2056,6 +2056,10 @@ vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd) ...@@ -2056,6 +2056,10 @@ vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info); vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
gcc_assert (vec_stmt_for_operand); gcc_assert (vec_stmt_for_operand);
vec_oprnd = gimple_get_lhs (vec_stmt_for_operand); vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
else
vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
return vec_oprnd; return vec_oprnd;
} }
...@@ -2251,6 +2255,11 @@ get_initial_def_for_reduction (gimple stmt, tree init_val, tree *adjustment_def) ...@@ -2251,6 +2255,11 @@ get_initial_def_for_reduction (gimple stmt, tree init_val, tree *adjustment_def)
VECT_DEF is a vector of partial results. VECT_DEF is a vector of partial results.
REDUC_CODE is the tree-code for the epilog reduction. REDUC_CODE is the tree-code for the epilog reduction.
NCOPIES is > 1 in case the vectorization factor (VF) is bigger than the
number of elements that we can fit in a vectype (nunits). In this case
we have to generate more than one vector stmt - i.e - we need to "unroll"
the vector stmt by a factor VF/nunits. For more details see documentation
in vectorizable_operation.
STMT is the scalar reduction stmt that is being vectorized. STMT is the scalar reduction stmt that is being vectorized.
REDUCTION_PHI is the phi-node that carries the reduction computation. REDUCTION_PHI is the phi-node that carries the reduction computation.
...@@ -2294,10 +2303,12 @@ get_initial_def_for_reduction (gimple stmt, tree init_val, tree *adjustment_def) ...@@ -2294,10 +2303,12 @@ get_initial_def_for_reduction (gimple stmt, tree init_val, tree *adjustment_def)
static void static void
vect_create_epilog_for_reduction (tree vect_def, gimple stmt, vect_create_epilog_for_reduction (tree vect_def, gimple stmt,
int ncopies,
enum tree_code reduc_code, enum tree_code reduc_code,
gimple reduction_phi) gimple reduction_phi)
{ {
stmt_vec_info stmt_info = vinfo_for_stmt (stmt); stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
stmt_vec_info prev_phi_info;
tree vectype; tree vectype;
enum machine_mode mode; enum machine_mode mode;
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
...@@ -2305,7 +2316,7 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt, ...@@ -2305,7 +2316,7 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt,
basic_block exit_bb; basic_block exit_bb;
tree scalar_dest; tree scalar_dest;
tree scalar_type; tree scalar_type;
gimple new_phi; gimple new_phi = NULL, phi;
gimple_stmt_iterator exit_gsi; gimple_stmt_iterator exit_gsi;
tree vec_dest; tree vec_dest;
tree new_temp = NULL_TREE; tree new_temp = NULL_TREE;
...@@ -2316,7 +2327,7 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt, ...@@ -2316,7 +2327,7 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt,
tree bitsize, bitpos, bytesize; tree bitsize, bitpos, bytesize;
enum tree_code code = gimple_assign_rhs_code (stmt); enum tree_code code = gimple_assign_rhs_code (stmt);
tree adjustment_def; tree adjustment_def;
tree vec_initial_def; tree vec_initial_def, def;
tree orig_name; tree orig_name;
imm_use_iterator imm_iter; imm_use_iterator imm_iter;
use_operand_p use_p; use_operand_p use_p;
...@@ -2326,7 +2337,8 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt, ...@@ -2326,7 +2337,8 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt,
gimple use_stmt; gimple use_stmt;
bool nested_in_vect_loop = false; bool nested_in_vect_loop = false;
VEC(gimple,heap) *phis = NULL; VEC(gimple,heap) *phis = NULL;
int i; enum vect_def_type dt = vect_unknown_def_type;
int j, i;
if (nested_in_vect_loop_p (loop, stmt)) if (nested_in_vect_loop_p (loop, stmt))
{ {
...@@ -2356,25 +2368,34 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt, ...@@ -2356,25 +2368,34 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt,
/*** 1. Create the reduction def-use cycle ***/ /*** 1. Create the reduction def-use cycle ***/
/* 1.1 set the loop-entry arg of the reduction-phi: */
/* For the case of reduction, vect_get_vec_def_for_operand returns /* For the case of reduction, vect_get_vec_def_for_operand returns
the scalar def before the loop, that defines the initial value the scalar def before the loop, that defines the initial value
of the reduction variable. */ of the reduction variable. */
vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt, vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
&adjustment_def); &adjustment_def);
add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop));
phi = reduction_phi;
def = vect_def;
for (j = 0; j < ncopies; j++)
{
/* 1.1 set the loop-entry arg of the reduction-phi: */
add_phi_arg (phi, vec_initial_def, loop_preheader_edge (loop));
/* 1.2 set the loop-latch arg for the reduction-phi: */ /* 1.2 set the loop-latch arg for the reduction-phi: */
add_phi_arg (reduction_phi, vect_def, loop_latch_edge (loop)); if (j > 0)
def = vect_get_vec_def_for_stmt_copy (dt, def);
add_phi_arg (phi, def, loop_latch_edge (loop));
if (vect_print_dump_info (REPORT_DETAILS)) if (vect_print_dump_info (REPORT_DETAILS))
{ {
fprintf (vect_dump, "transform reduction: created def-use cycle:"); fprintf (vect_dump, "transform reduction: created def-use cycle: ");
print_gimple_stmt (vect_dump, reduction_phi, 0, TDF_SLIM); print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
fprintf (vect_dump, "\n"); fprintf (vect_dump, "\n");
print_gimple_stmt (vect_dump, SSA_NAME_DEF_STMT (vect_def), 0, TDF_SLIM); print_gimple_stmt (vect_dump, SSA_NAME_DEF_STMT (def), 0, TDF_SLIM);
} }
phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
}
/*** 2. Create epilog code /*** 2. Create epilog code
The reduction epilog code operates across the elements of the vector The reduction epilog code operates across the elements of the vector
...@@ -2398,7 +2419,7 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt, ...@@ -2398,7 +2419,7 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt,
s_out3 = extract_field <v_out2, 0> # step 2 s_out3 = extract_field <v_out2, 0> # step 2
s_out4 = adjust_result <s_out3> # step 3 s_out4 = adjust_result <s_out3> # step 3
(step 3 is optional, and step2 1 and 2 may be combined). (step 3 is optional, and steps 1 and 2 may be combined).
Lastly, the uses of s_out0 are replaced by s_out4. Lastly, the uses of s_out0 are replaced by s_out4.
***/ ***/
...@@ -2407,8 +2428,22 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt, ...@@ -2407,8 +2428,22 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt,
v_out1 = phi <v_loop> */ v_out1 = phi <v_loop> */
exit_bb = single_exit (loop)->dest; exit_bb = single_exit (loop)->dest;
new_phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb); def = vect_def;
SET_PHI_ARG_DEF (new_phi, single_exit (loop)->dest_idx, vect_def); prev_phi_info = NULL;
for (j = 0; j < ncopies; j++)
{
phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb);
set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, loop_vinfo));
if (j == 0)
new_phi = phi;
else
{
def = vect_get_vec_def_for_stmt_copy (dt, def);
STMT_VINFO_RELATED_STMT (prev_phi_info) = phi;
}
SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def);
prev_phi_info = vinfo_for_stmt (phi);
}
exit_gsi = gsi_after_labels (exit_bb); exit_gsi = gsi_after_labels (exit_bb);
/* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3 /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
...@@ -2449,6 +2484,9 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt, ...@@ -2449,6 +2484,9 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt,
if (nested_in_vect_loop) if (nested_in_vect_loop)
goto vect_finalize_reduction; goto vect_finalize_reduction;
/* FORNOW */
gcc_assert (ncopies = 1);
/* 2.3 Create the reduction code, using one of the three schemes described /* 2.3 Create the reduction code, using one of the three schemes described
above. */ above. */
...@@ -2664,8 +2702,9 @@ vect_finalize_reduction: ...@@ -2664,8 +2702,9 @@ vect_finalize_reduction:
{ {
stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi); stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi);
/* FORNOW. Currently not supporting the case that an inner-loop reduction /* FORNOW. Currently not supporting the case that an inner-loop
is not used in the outer-loop (but only outside the outer-loop). */ reduction is not used in the outer-loop (but only outside the
outer-loop). */
gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo) gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
&& !STMT_VINFO_LIVE_P (stmt_vinfo)); && !STMT_VINFO_LIVE_P (stmt_vinfo));
...@@ -2673,6 +2712,9 @@ vect_finalize_reduction: ...@@ -2673,6 +2712,9 @@ vect_finalize_reduction:
STMT_VINFO_VEC_STMT (stmt_vinfo) = epilog_stmt; STMT_VINFO_VEC_STMT (stmt_vinfo) = epilog_stmt;
set_vinfo_for_stmt (epilog_stmt, set_vinfo_for_stmt (epilog_stmt,
new_stmt_vec_info (epilog_stmt, loop_vinfo)); new_stmt_vec_info (epilog_stmt, loop_vinfo));
if (adjustment_def)
STMT_VINFO_RELATED_STMT (vinfo_for_stmt (epilog_stmt)) =
STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_phi));
continue; continue;
} }
...@@ -2745,7 +2787,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi, ...@@ -2745,7 +2787,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
tree def; tree def;
gimple def_stmt; gimple def_stmt;
enum vect_def_type dt; enum vect_def_type dt;
gimple new_phi; gimple new_phi = NULL;
tree scalar_type; tree scalar_type;
bool is_simple_use; bool is_simple_use;
gimple orig_stmt; gimple orig_stmt;
...@@ -2754,23 +2796,17 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi, ...@@ -2754,23 +2796,17 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
int i; int i;
int nunits = TYPE_VECTOR_SUBPARTS (vectype); int nunits = TYPE_VECTOR_SUBPARTS (vectype);
int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits; int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
stmt_vec_info prev_stmt_info; int epilog_copies;
stmt_vec_info prev_stmt_info, prev_phi_info;
gimple first_phi = NULL;
bool single_defuse_cycle = false;
tree reduc_def; tree reduc_def;
gimple new_stmt = NULL; gimple new_stmt = NULL;
int j; int j;
tree ops[3]; tree ops[3];
if (nested_in_vect_loop_p (loop, stmt)) if (nested_in_vect_loop_p (loop, stmt))
{
loop = loop->inner; loop = loop->inner;
/* FORNOW. This restriction should be relaxed. */
if (ncopies > 1)
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "multiple types in nested loop.");
return false;
}
}
gcc_assert (ncopies >= 1); gcc_assert (ncopies >= 1);
...@@ -3004,18 +3040,52 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi, ...@@ -3004,18 +3040,52 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
/* Create the destination vector */ /* Create the destination vector */
vec_dest = vect_create_destination_var (scalar_dest, vectype); vec_dest = vect_create_destination_var (scalar_dest, vectype);
/* Create the reduction-phi that defines the reduction-operand. */
new_phi = create_phi_node (vec_dest, loop->header);
/* In case the vectorization factor (VF) is bigger than the number /* In case the vectorization factor (VF) is bigger than the number
of elements that we can fit in a vectype (nunits), we have to generate of elements that we can fit in a vectype (nunits), we have to generate
more than one vector stmt - i.e - we need to "unroll" the more than one vector stmt - i.e - we need to "unroll" the
vector stmt by a factor VF/nunits. For more details see documentation vector stmt by a factor VF/nunits. For more details see documentation
in vectorizable_operation. */ in vectorizable_operation. */
/* If the reduction is used in an outer loop we need to generate
VF intermediate results, like so (e.g. for ncopies=2):
r0 = phi (init, r0)
r1 = phi (init, r1)
r0 = x0 + r0;
r1 = x1 + r1;
(i.e. we generate VF results in 2 registers).
In this case we have a separate def-use cycle for each copy, and therefore
for each copy we get the vector def for the reduction variable from the
respective phi node created for this copy.
Otherwise (the reduction is unused in the loop nest), we can combine
together intermediate results, like so (e.g. for ncopies=2):
r = phi (init, r)
r = x0 + r;
r = x1 + r;
(i.e. we generate VF/2 results in a single register).
In this case for each copy we get the vector def for the reduction variable
from the vectorized reduction operation generated in the previous iteration.
*/
if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_loop)
{
single_defuse_cycle = true;
epilog_copies = 1;
}
else
epilog_copies = ncopies;
prev_stmt_info = NULL; prev_stmt_info = NULL;
prev_phi_info = NULL;
for (j = 0; j < ncopies; j++) for (j = 0; j < ncopies; j++)
{ {
if (j == 0 || !single_defuse_cycle)
{
/* Create the reduction-phi that defines the reduction-operand. */
new_phi = create_phi_node (vec_dest, loop->header);
set_vinfo_for_stmt (new_phi, new_stmt_vec_info (new_phi, loop_vinfo));
}
/* Handle uses. */ /* Handle uses. */
if (j == 0) if (j == 0)
{ {
...@@ -3027,6 +3097,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi, ...@@ -3027,6 +3097,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
/* Get the vector def for the reduction variable from the phi node */ /* Get the vector def for the reduction variable from the phi node */
reduc_def = PHI_RESULT (new_phi); reduc_def = PHI_RESULT (new_phi);
first_phi = new_phi;
} }
else else
{ {
...@@ -3035,9 +3106,12 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi, ...@@ -3035,9 +3106,12 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
if (op_type == ternary_op) if (op_type == ternary_op)
loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def1); loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def1);
/* Get the vector def for the reduction variable from the vectorized if (single_defuse_cycle)
reduction operation generated in the previous iteration (j-1) */
reduc_def = gimple_assign_lhs (new_stmt); reduc_def = gimple_assign_lhs (new_stmt);
else
reduc_def = PHI_RESULT (new_phi);
STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi;
} }
/* Arguments are ready. create the new vector stmt. */ /* Arguments are ready. create the new vector stmt. */
...@@ -3056,11 +3130,15 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi, ...@@ -3056,11 +3130,15 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
else else
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
prev_stmt_info = vinfo_for_stmt (new_stmt); prev_stmt_info = vinfo_for_stmt (new_stmt);
prev_phi_info = vinfo_for_stmt (new_phi);
} }
/* Finalize the reduction-phi (set it's arguments) and create the /* Finalize the reduction-phi (set it's arguments) and create the
epilog reduction code. */ epilog reduction code. */
vect_create_epilog_for_reduction (new_temp, stmt, epilog_reduc_code, new_phi); if (!single_defuse_cycle)
new_temp = gimple_assign_lhs (*vec_stmt);
vect_create_epilog_for_reduction (new_temp, stmt, epilog_copies,
epilog_reduc_code, first_phi);
return true; return true;
} }
...@@ -3108,7 +3186,6 @@ vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt) ...@@ -3108,7 +3186,6 @@ vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt)
int nunits_in; int nunits_in;
int nunits_out; int nunits_out;
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
tree fndecl, new_temp, def, rhs_type, lhs_type; tree fndecl, new_temp, def, rhs_type, lhs_type;
gimple def_stmt; gimple def_stmt;
enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
...@@ -3212,14 +3289,6 @@ vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt) ...@@ -3212,14 +3289,6 @@ vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt)
needs to be generated. */ needs to be generated. */
gcc_assert (ncopies >= 1); gcc_assert (ncopies >= 1);
/* FORNOW. This restriction should be relaxed. */
if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "multiple types in nested loop.");
return false;
}
if (!vec_stmt) /* transformation not required. */ if (!vec_stmt) /* transformation not required. */
{ {
STMT_VINFO_TYPE (stmt_info) = call_vec_info_type; STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
...@@ -3234,14 +3303,6 @@ vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt) ...@@ -3234,14 +3303,6 @@ vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt)
if (vect_print_dump_info (REPORT_DETAILS)) if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "transform operation."); fprintf (vect_dump, "transform operation.");
/* FORNOW. This restriction should be relaxed. */
if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "multiple types in nested loop.");
return false;
}
/* Handle def. */ /* Handle def. */
scalar_dest = gimple_call_lhs (stmt); scalar_dest = gimple_call_lhs (stmt);
vec_dest = vect_create_destination_var (scalar_dest, vectype_out); vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
...@@ -3436,7 +3497,6 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi, ...@@ -3436,7 +3497,6 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE; tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt); stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK; enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
tree decl1 = NULL_TREE, decl2 = NULL_TREE; tree decl1 = NULL_TREE, decl2 = NULL_TREE;
tree new_temp; tree new_temp;
...@@ -3527,14 +3587,6 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi, ...@@ -3527,14 +3587,6 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
needs to be generated. */ needs to be generated. */
gcc_assert (ncopies >= 1); gcc_assert (ncopies >= 1);
/* FORNOW. This restriction should be relaxed. */
if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "multiple types in nested loop.");
return false;
}
/* Check the operands of the operation. */ /* Check the operands of the operation. */
if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0])) if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
{ {
...@@ -3847,11 +3899,19 @@ vectorizable_induction (gimple phi, gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED, ...@@ -3847,11 +3899,19 @@ vectorizable_induction (gimple phi, gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
stmt_vec_info stmt_info = vinfo_for_stmt (phi); stmt_vec_info stmt_info = vinfo_for_stmt (phi);
tree vectype = STMT_VINFO_VECTYPE (stmt_info); tree vectype = STMT_VINFO_VECTYPE (stmt_info);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
int nunits = TYPE_VECTOR_SUBPARTS (vectype); int nunits = TYPE_VECTOR_SUBPARTS (vectype);
int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits; int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
tree vec_def; tree vec_def;
gcc_assert (ncopies >= 1); gcc_assert (ncopies >= 1);
/* FORNOW. This restriction should be relaxed. */
if (nested_in_vect_loop_p (loop, phi) && ncopies > 1)
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "multiple types in nested loop.");
return false;
}
if (!STMT_VINFO_RELEVANT_P (stmt_info)) if (!STMT_VINFO_RELEVANT_P (stmt_info))
return false; return false;
...@@ -3903,7 +3963,6 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi, ...@@ -3903,7 +3963,6 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
stmt_vec_info stmt_info = vinfo_for_stmt (stmt); stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
tree vectype = STMT_VINFO_VECTYPE (stmt_info); tree vectype = STMT_VINFO_VECTYPE (stmt_info);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
enum tree_code code; enum tree_code code;
enum machine_mode vec_mode; enum machine_mode vec_mode;
tree new_temp; tree new_temp;
...@@ -3932,13 +3991,6 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi, ...@@ -3932,13 +3991,6 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
if (slp_node) if (slp_node)
ncopies = 1; ncopies = 1;
gcc_assert (ncopies >= 1); gcc_assert (ncopies >= 1);
/* FORNOW. This restriction should be relaxed. */
if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "multiple types in nested loop.");
return false;
}
if (!STMT_VINFO_RELEVANT_P (stmt_info)) if (!STMT_VINFO_RELEVANT_P (stmt_info))
return false; return false;
...@@ -4257,7 +4309,6 @@ vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi, ...@@ -4257,7 +4309,6 @@ vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi,
tree vec_oprnd0=NULL, vec_oprnd1=NULL; tree vec_oprnd0=NULL, vec_oprnd1=NULL;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt); stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
enum tree_code code, code1 = ERROR_MARK; enum tree_code code, code1 = ERROR_MARK;
tree new_temp; tree new_temp;
tree def; tree def;
...@@ -4310,13 +4361,6 @@ vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi, ...@@ -4310,13 +4361,6 @@ vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi,
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out; ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
gcc_assert (ncopies >= 1); gcc_assert (ncopies >= 1);
/* FORNOW. This restriction should be relaxed. */
if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "multiple types in nested loop.");
return false;
}
if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest)) if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
&& INTEGRAL_TYPE_P (TREE_TYPE (op0))) && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
...@@ -4458,7 +4502,6 @@ vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi, ...@@ -4458,7 +4502,6 @@ vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
tree vec_oprnd0=NULL, vec_oprnd1=NULL; tree vec_oprnd0=NULL, vec_oprnd1=NULL;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt); stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK; enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
tree decl1 = NULL_TREE, decl2 = NULL_TREE; tree decl1 = NULL_TREE, decl2 = NULL_TREE;
int op_type; int op_type;
...@@ -4511,13 +4554,6 @@ vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi, ...@@ -4511,13 +4554,6 @@ vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in; ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
gcc_assert (ncopies >= 1); gcc_assert (ncopies >= 1);
/* FORNOW. This restriction should be relaxed. */
if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "multiple types in nested loop.");
return false;
}
if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest)) if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
&& INTEGRAL_TYPE_P (TREE_TYPE (op0))) && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment