Commit 06066f92 by Ira Rosen Committed by Ira Rosen

tree-parloops.c (loop_parallel_p): Call vect_is_simple_reduction with additional argument.


	* tree-parloops.c (loop_parallel_p): Call vect_is_simple_reduction
	with additional argument.
	* tree-vectorizer.h (enum vect_def_type): Add 
	vect_double_reduction_def.
	(vect_is_simple_reduction): Add argument.
	* tree-vect-loop.c (vect_determine_vectorization_factor): Fix 
	indentation.
	(vect_analyze_scalar_cycles_1): Detect double reduction. Call
	vect_is_simple_reduction with additional argument.
	(vect_analyze_loop_operations): Handle exit phi nodes in case of
	double reduction.
	(reduction_code_for_scalar_code): Handle additional codes by
	returning ERROR_MARK for them. Fix comment and indentation.
	(vect_is_simple_reduction): Fix comment, add argument to specify
	double reduction. Detect double reduction.
	(get_initial_def_for_induction): Fix indentation.
	(get_initial_def_for_reduction): Fix comment and indentation.
	Handle double reduction. Create initial definitions that do not
	require adjustment if ADJUSTMENT_DEF is NULL. Handle additional cases.
	(vect_create_epilog_for_reduction): Fix comment, add argument to
	handle double reduction. Use PLUS_EXPR in case of MINUS_EXPR in
	epilogue result extraction. Create double reduction phi node and
	replace relevant uses.
	(vectorizable_reduction): Call vect_is_simple_reduction with
	additional argument. Fix indentation. Update epilogue code treatment
	according to the changes in reduction_code_for_scalar_code. Check 
	for double reduction. Call vect_create_epilog_for_reduction with
	additional argument.
	* tree-vect-stmts.c (process_use): Handle double reduction, update
	documentation.
	(vect_mark_stmts_to_be_vectorized): Handle double reduction.
	(vect_get_vec_def_for_operand): Likewise.

From-SVN: r149526
parent b20231fe
2009-07-12 Ira Rosen <irar@il.ibm.com>
* tree-parloops.c (loop_parallel_p): Call vect_is_simple_reduction
with additional argument.
* tree-vectorizer.h (enum vect_def_type): Add
vect_double_reduction_def.
(vect_is_simple_reduction): Add argument.
* tree-vect-loop.c (vect_determine_vectorization_factor): Fix
indentation.
(vect_analyze_scalar_cycles_1): Detect double reduction. Call
vect_is_simple_reduction with additional argument.
(vect_analyze_loop_operations): Handle exit phi nodes in case of
double reduction.
(reduction_code_for_scalar_code): Handle additional codes by
returning ERROR_MARK for them. Fix comment and indentation.
(vect_is_simple_reduction): Fix comment, add argument to specify
double reduction. Detect double reduction.
(get_initial_def_for_induction): Fix indentation.
(get_initial_def_for_reduction): Fix comment and indentation.
Handle double reduction. Create initial definitions that do not
require adjustment if ADJUSTMENT_DEF is NULL. Handle additional cases.
(vect_create_epilog_for_reduction): Fix comment, add argument to
handle double reduction. Use PLUS_EXPR in case of MINUS_EXPR in
epilogue result extraction. Create double reduction phi node and
replace relevant uses.
(vectorizable_reduction): Call vect_is_simple_reduction with
additional argument. Fix indentation. Update epilogue code treatment
according to the changes in reduction_code_for_scalar_code. Check
for double reduction. Call vect_create_epilog_for_reduction with
additional argument.
* tree-vect-stmts.c (process_use): Handle double reduction, update
documentation.
(vect_mark_stmts_to_be_vectorized): Handle double reduction.
(vect_get_vec_def_for_operand): Likewise.
2009-07-12 Danny Smith <dansmister@gmail.com>
* config/i386/winnt.c (i386_pe_determine_dllexport_p): Don't
......
2009-07-12 Ira Rosen <irar@il.ibm.com>
* gcc.dg/vect/no-scevccp-outer-2.c: Expect to vectorize.
* gcc.dg/vect/vect-double-reduc-1.c, gcc.dg/vect/vect-double-reduc-2.c,
gcc.dg/vect/vect-double-reduc-3.c, gcc.dg/vect/vect-double-reduc-4.c,
gcc.dg/vect/vect-double-reduc-5.c, gcc.dg/vect/vect-double-reduc-6.c,
gcc.dg/vect/vect-double-reduc-7.c: New tests.
2009-07-12 Hans-Peter Nilsson <hp@axis.com>
* gfortran.dg/f2003_io_4.f03, gfortran.dg/read_size_noadvance.f90,
......
/* { dg-do compile } */
/* { dg-require-effective-target vect_int } */
#define N 40
int
......@@ -14,5 +16,5 @@ foo (){
return diff;
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int_mult } */
#include <stdarg.h>
#include <stdio.h>
#include "tree-vect.h"
#define K 32
int in[2*K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
int coeff[K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
int out[K];
int check_result[K] = {642816,660736,678656,696576,714496,732416,750336,768256,786176,804096,822016,839936,857856,875776,893696,911616,929536,947456,965376,983296,1001216,1019136,1037056,1054976,1072896,1090816,1108736,1126656,1144576,1162496,1180416,1198336};
__attribute__ ((noinline)) void
foo ()
{
int sum = 0, i, j, k;
for (k = 0; k < K; k++)
{
sum = 0;
for (j = 0; j < K; j++)
for (i = 0; i < K; i++)
sum += in[i+k][j] * coeff[i][j];
out[k] = sum;
}
}
int main ()
{
int i, j, k;
check_vect ();
for (j = 0; j < K; j++)
{
for (i = 0; i < 2*K; i++)
in[i][j] = i+j;
for (i = 0; i < K; i++)
coeff[i][j] = i+2;
}
foo();
for (k = 0; k < K; k++)
if (out[k] != check_result[k])
abort ();
return 0;
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int_mult } */
#include <stdarg.h>
#include <stdio.h>
#include "tree-vect.h"
#define K 32
int in[2*K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
int coeff[K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
int out[K];
int check_result[K] = {357184,339264,321344,303424,285504,267584,249664,231744,213824,195904,177984,160064,142144,124224,106304,88384,70464,52544,34624,16704,-1216,-19136,-37056,-54976,-72896,-90816,-108736,-126656,-144576,-162496,-180416,-198336};
__attribute__ ((noinline)) void
foo ()
{
int res = 0, i, j, k;
for (k = 0; k < K; k++)
{
res = 1000000;
for (j = 0; j < K; j++)
for (i = 0; i < K; i++)
res -= in[i+k][j] * coeff[i][j];
out[k] = res;
}
}
int main ()
{
int i, j, k;
check_vect ();
for (j = 0; j < K; j++)
{
for (i = 0; i < 2*K; i++)
in[i][j] = i+j;
for (i = 0; i < K; i++)
coeff[i][j] = i+2;
}
foo();
for (k = 0; k < K; k++)
if (out[k] != check_result[k])
abort ();
return 0;
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include <stdio.h>
#include "tree-vect.h"
#define K 32
int in[2*K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
int coeff[K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
int out_max[K], out_min[K];
int check_max[K] = {62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93};
int check_min[K] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
__attribute__ ((noinline)) void
foo (int x, int y)
{
int max, min, i, j, k;
for (k = 0; k < K; k++)
{
max = x;
min = y;
for (j = 0; j < K; j++)
for (i = 0; i < K; i++)
{
max = max < in[i+k][j] ? in[i+k][j] : max;
min = min > in[i+k][j] ? in[i+k][j] : min;
}
out_max[k] = max;
out_min[k] = min;
}
}
int main ()
{
int i, j, k;
check_vect ();
for (j = 0; j < K; j++)
{
for (i = 0; i < 2*K; i++)
in[i][j] = i+j;
for (i = 0; i < K; i++)
coeff[i][j] = i+2;
}
foo(0, 0);
for (k = 0; k < K; k++)
if (out_max[k] != check_max[k] || out_min[k] != 0)
abort ();
foo(100, 45);
for (k = 0; k < K; k++)
if (out_min[k] != check_min[k] || out_max[k] != 100)
abort ();
return 0;
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail vect_no_int_max } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int_mult } */
#include <stdarg.h>
#include <stdio.h>
#include "tree-vect.h"
#define K 32
int in[2*K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
int coeff[K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
int out[K];
int check_result[K] = {652816,670736,688656,706576,724496,742416,760336,778256,796176,814096,832016,849936,867856,885776,903696,921616,939536,957456,975376,993296,1011216,1029136,1047056,1064976,1082896,1100816,1118736,1136656,1154576,1172496,1190416,1208336};
__attribute__ ((noinline)) void
foo ()
{
int sum = 0, i, j, k;
for (k = 0; k < K; k++)
{
sum = 10000;
for (j = 0; j < K; j++)
for (i = 0; i < K; i++)
sum += in[i+k][j] * coeff[i][j];
out[k] = sum;
}
}
int main ()
{
int i, j, k;
check_vect ();
for (j = 0; j < K; j++)
{
for (i = 0; i < 2*K; i++)
in[i][j] = i+j;
for (i = 0; i < K; i++)
coeff[i][j] = i+2;
}
foo();
for (k = 0; k < K; k++)
if (out[k] != check_result[k])
abort ();
return 0;
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int_mult } */
#include <stdarg.h>
#include <stdio.h>
#include "tree-vect.h"
#define K 32
signed short in[2*K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
signed short coeff[K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
int out[K];
int check_result[K] = {642816,660736,678656,696576,714496,732416,750336,768256,786176,804096,822016,839936,857856,875776,893696,911616,929536,947456,965376,983296,1001216,1019136,1037056,1054976,1072896,1090816,1108736,1126656,1144576,1162496,1180416,1198336};
__attribute__ ((noinline)) void
foo ()
{
int sum = 0, i, j, k;
for (k = 0; k < K; k++)
{
sum = 0;
for (j = 0; j < K; j++)
for (i = 0; i < K; i++)
sum += in[i+k][j] * coeff[i][j];
out[k] = sum;
}
}
int main ()
{
int i, j, k;
check_vect ();
for (j = 0; j < K; j++)
{
for (i = 0; i < 2*K; i++)
in[i][j] = i+j;
for (i = 0; i < K; i++)
coeff[i][j] = i+2;
}
foo();
for (k = 0; k < K; k++)
if (out[k] != check_result[k])
abort ();
return 0;
}
/* Vectorization of loops with multiple types and double reduction is not
supported yet. */
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int_mult } */
#include <stdarg.h>
#include <stdio.h>
#include "tree-vect.h"
#define K 4
int in[2*K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
int out[K];
int check_result[K] = {0,16,256,4096};
__attribute__ ((noinline)) void
foo ()
{
int sum;
int i, j, k;
for (k = 0; k < K; k++)
{
sum = 1;
for (j = 0; j < K; j++)
for (i = 0; i < K; i++)
sum *= in[i+k][j];
out[k] = sum;
}
}
int main ()
{
int i, j, k;
check_vect ();
for (i = 0; i < 2*K; i++)
for (j = 0; j < K; j++)
in[i][j] = (i+2)/3;
foo();
for (k = 0; k < K; k++)
if (out[k] != check_result[k])
abort ();
return 0;
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include <stdio.h>
#include "tree-vect.h"
#define K 32
int in[2*K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
int out[K];
int check_result[K] = {63,63,191,191,127,127,191,191,127,127,191,191,127,127,191,191,127,127,191,191,127,127,191,191,127,127,191,191,127,127,191,191};
__attribute__ ((noinline)) void
foo ()
{
int res_or, res_and, res_xor, i, j, k;
for (k = 0; k < K; k++)
{
res_or = 0;
for (j = 0; j < K; j++)
for (i = 0; i < K; i++)
res_or = res_or | in[i+k][j];
res_and = 1;
for (j = 0; j < K; j++)
for (i = 0; i < K; i++)
res_and = res_and & in[i+k][j];
res_xor = 0;
for (j = 0; j < K; j++)
for (i = 0; i < K; i++)
res_xor = res_xor ^ in[i+k][j];
out[k] = res_or + res_and + res_xor;
}
}
int main ()
{
int i, j, k;
check_vect ();
for (j = 0; j < K; j++)
{
for (i = 0; i < 2*K; i++)
in[i][j] = i+j;
for (i = 0; i < K; i++)
out[i] = i+j;
}
foo();
for (k = 0; k < K; k++)
if (out[k] != check_result[k])
abort ();
return 0;
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 3 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
......@@ -284,13 +284,15 @@ loop_parallel_p (struct loop *loop, htab_t reduction_list,
{
gimple phi = gsi_stmt (gsi);
gimple reduc_stmt = NULL;
bool dummy;
/* ??? TODO: Change this into a generic function that
recognizes reductions. */
if (!is_gimple_reg (PHI_RESULT (phi)))
continue;
if (simple_loop_info)
reduc_stmt = vect_is_simple_reduction (simple_loop_info, phi, true);
reduc_stmt = vect_is_simple_reduction (simple_loop_info, phi, true,
&dummy);
/* Create a reduction_info struct, initialize it and insert it to
the reduction list. */
......
......@@ -291,8 +291,7 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
}
else
{
gcc_assert (! STMT_VINFO_DATA_REF (stmt_info)
gcc_assert (!STMT_VINFO_DATA_REF (stmt_info)
&& !is_pattern_stmt_p (stmt_info));
scalar_type = vect_get_smallest_scalar_type (stmt, &dummy,
......@@ -410,6 +409,7 @@ vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
tree dumy;
VEC(gimple,heap) *worklist = VEC_alloc (gimple, heap, 64);
gimple_stmt_iterator gsi;
bool double_reduc;
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
......@@ -477,9 +477,21 @@ vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
nested_cycle = (loop != LOOP_VINFO_LOOP (loop_vinfo));
reduc_stmt = vect_is_simple_reduction (loop_vinfo, phi, !nested_cycle);
reduc_stmt = vect_is_simple_reduction (loop_vinfo, phi, !nested_cycle,
&double_reduc);
if (reduc_stmt)
{
if (double_reduc)
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "Detected double reduction.");
STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_double_reduction_def;
STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
vect_double_reduction_def;
}
else
{
if (nested_cycle)
{
if (vect_print_dump_info (REPORT_DETAILS))
......@@ -499,6 +511,7 @@ vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
vect_reduction_def;
}
}
}
else
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "Unknown def-use cycle pattern.");
......@@ -1111,10 +1124,13 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo)
/* inner-loop loop-closed exit phi in outer-loop vectorization
(i.e. a phi in the tail of the outer-loop).
FORNOW: we currently don't support the case that these phis
are not used in the outerloop, cause this case requires
to actually do something here. */
if (!STMT_VINFO_RELEVANT_P (stmt_info)
are not used in the outerloop (unless it is double reduction,
i.e., this phi is vect_reduction_def), cause this case
requires to actually do something here. */
if ((!STMT_VINFO_RELEVANT_P (stmt_info)
|| STMT_VINFO_LIVE_P (stmt_info))
&& STMT_VINFO_DEF_TYPE (stmt_info)
!= vect_double_reduction_def)
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump,
......@@ -1466,9 +1482,10 @@ vect_analyze_loop (struct loop *loop)
Output:
REDUC_CODE - the corresponding tree-code to be used to reduce the
vector of partial results into a single scalar result (which
will also reside in a vector).
will also reside in a vector) or ERROR_MARK if the operation is
a supported reduction operation, but does not have such tree-code.
Return TRUE if a corresponding REDUC_CODE was found, FALSE otherwise. */
Return FALSE if CODE currently cannot be vectorized as reduction. */
static bool
reduction_code_for_scalar_code (enum tree_code code,
......@@ -1488,6 +1505,14 @@ reduction_code_for_scalar_code (enum tree_code code,
*reduc_code = REDUC_PLUS_EXPR;
return true;
case MULT_EXPR:
case MINUS_EXPR:
case BIT_IOR_EXPR:
case BIT_XOR_EXPR:
case BIT_AND_EXPR:
*reduc_code = ERROR_MARK;
return true;
default:
return false;
}
......@@ -1507,7 +1532,7 @@ report_vect_op (gimple stmt, const char *msg)
/* Function vect_is_simple_reduction
Detect a cross-iteration def-use cycle that represents a simple
(1) Detect a cross-iteration def-use cycle that represents a simple
reduction computation. We look for the following pattern:
loop_header:
......@@ -1524,12 +1549,20 @@ report_vect_op (gimple stmt, const char *msg)
Condition 1 is tested here.
Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.
Also detect a cross-iteration def-use cycle in nested loops, i.e., nested
cycles, if CHECK_REDUCTION is false. */
(2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
nested cycles, if CHECK_REDUCTION is false.
(3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double
reductions:
a1 = phi < a0, a2 >
inner loop (def of a3)
a2 = phi < a3 >
*/
gimple
vect_is_simple_reduction (loop_vec_info loop_info, gimple phi,
bool check_reduction)
bool check_reduction, bool *double_reduc)
{
struct loop *loop = (gimple_bb (phi))->loop_father;
struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
......@@ -1543,6 +1576,9 @@ vect_is_simple_reduction (loop_vec_info loop_info, gimple phi,
tree name;
imm_use_iterator imm_iter;
use_operand_p use_p;
bool phi_def;
*double_reduc = false;
/* If CHECK_REDUCTION is true, we assume inner-most loop vectorization,
otherwise, we assume outer loop vectorization. */
......@@ -1584,14 +1620,24 @@ vect_is_simple_reduction (loop_vec_info loop_info, gimple phi,
return NULL;
}
if (!is_gimple_assign (def_stmt))
if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) != GIMPLE_PHI)
{
if (vect_print_dump_info (REPORT_DETAILS))
print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
return NULL;
}
if (is_gimple_assign (def_stmt))
{
name = gimple_assign_lhs (def_stmt);
phi_def = false;
}
else
{
name = PHI_RESULT (def_stmt);
phi_def = true;
}
nloop_uses = 0;
FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
{
......@@ -1608,6 +1654,37 @@ vect_is_simple_reduction (loop_vec_info loop_info, gimple phi,
}
}
/* If DEF_STMT is a phi node itself, we expect it to have a single argument
defined in the inner loop. */
if (phi_def)
{
op1 = PHI_ARG_DEF (def_stmt, 0);
if (gimple_phi_num_args (def_stmt) != 1
|| TREE_CODE (op1) != SSA_NAME)
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "unsupported phi node definition.");
return NULL;
}
def1 = SSA_NAME_DEF_STMT (op1);
if (flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
&& loop->inner
&& flow_bb_inside_loop_p (loop->inner, gimple_bb (def1))
&& is_gimple_assign (def1))
{
if (vect_print_dump_info (REPORT_DETAILS))
report_vect_op (def_stmt, "detected double reduction: ");
*double_reduc = true;
return def_stmt;
}
return NULL;
}
code = gimple_assign_rhs_code (def_stmt);
if (check_reduction
......@@ -1697,7 +1774,6 @@ vect_is_simple_reduction (loop_vec_info loop_info, gimple phi,
return NULL;
}
/* Check that one def is the reduction def, defined by PHI,
the other def is either defined in the loop ("vect_internal_def"),
or it's an induction (defined by a loop-header phi-node). */
......@@ -2306,7 +2382,8 @@ get_initial_def_for_induction (gimple iv_phi)
/* iv_loop is nested in the loop to be vectorized. init_expr had already
been created during vectorization of previous stmts; We obtain it from
the STMT_VINFO_VEC_STMT of the defining stmt. */
tree iv_def = PHI_ARG_DEF_FROM_EDGE (iv_phi, loop_preheader_edge (iv_loop));
tree iv_def = PHI_ARG_DEF_FROM_EDGE (iv_phi,
loop_preheader_edge (iv_loop));
vec_init = vect_get_vec_def_for_operand (iv_def, iv_phi, NULL);
}
else
......@@ -2507,18 +2584,16 @@ get_initial_def_for_induction (gimple iv_phi)
vector of partial results.
Option1 (adjust in epilog): Initialize the vector as follows:
add: [0,0,...,0,0]
mult: [1,1,...,1,1]
add/bit or/xor: [0,0,...,0,0]
mult/bit and: [1,1,...,1,1]
min/max: [init_val,init_val,..,init_val,init_val]
bit and/or: [init_val,init_val,..,init_val,init_val]
and when necessary (e.g. add/mult case) let the caller know
that it needs to adjust the result by init_val.
Option2: Initialize the vector as follows:
add: [0,0,...,0,init_val]
mult: [1,1,...,1,init_val]
add/bit or/xor: [init_val,0,0,...,0]
mult/bit and: [init_val,1,1,...,1]
min/max: [init_val,init_val,...,init_val]
bit and/or: [init_val,init_val,...,init_val]
and no adjustments are needed.
For example, for the following code:
......@@ -2533,11 +2608,14 @@ get_initial_def_for_induction (gimple iv_phi)
the result at the end by 'init_val'.
FORNOW, we are using the 'adjust in epilog' scheme, because this way the
initialization vector is simpler (same element in all entries).
initialization vector is simpler (same element in all entries), if
ADJUSTMENT_DEF is not NULL, and Option2 otherwise.
A cost model should help decide between these two schemes. */
tree
get_initial_def_for_reduction (gimple stmt, tree init_val, tree *adjustment_def)
get_initial_def_for_reduction (gimple stmt, tree init_val,
tree *adjustment_def)
{
stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
......@@ -2551,43 +2629,114 @@ get_initial_def_for_reduction (gimple stmt, tree init_val, tree *adjustment_def)
tree t = NULL_TREE;
int i;
bool nested_in_vect_loop = false;
tree init_value;
REAL_VALUE_TYPE real_init_val = dconst0;
int int_init_val = 0;
gcc_assert (vectype);
nunits = TYPE_VECTOR_SUBPARTS (vectype);
gcc_assert (POINTER_TYPE_P (scalar_type) || INTEGRAL_TYPE_P (scalar_type)
|| SCALAR_FLOAT_TYPE_P (scalar_type));
if (nested_in_vect_loop_p (loop, stmt))
nested_in_vect_loop = true;
else
gcc_assert (loop == (gimple_bb (stmt))->loop_father);
/* In case of double reduction we only create a vector variable to be put
in the reduction phi node. The actual statement creation is done in
vect_create_epilog_for_reduction. */
if (TREE_CODE (init_val) == SSA_NAME
&& vinfo_for_stmt (SSA_NAME_DEF_STMT (init_val))
&& STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SSA_NAME_DEF_STMT (init_val)))
== vect_double_reduction_def)
{
*adjustment_def = NULL;
return vect_create_destination_var (init_val, vectype);
}
if (TREE_CONSTANT (init_val))
{
if (SCALAR_FLOAT_TYPE_P (scalar_type))
init_value = build_real (scalar_type, TREE_REAL_CST (init_val));
else
init_value = build_int_cst (scalar_type, TREE_INT_CST_LOW (init_val));
}
else
init_value = init_val;
switch (code)
{
case WIDEN_SUM_EXPR:
case DOT_PROD_EXPR:
case PLUS_EXPR:
case MINUS_EXPR:
case BIT_IOR_EXPR:
case BIT_XOR_EXPR:
case MULT_EXPR:
case BIT_AND_EXPR:
/* ADJUSMENT_DEF is NULL when called from
vect_create_epilog_for_reduction to vectorize double reduction. */
if (adjustment_def)
{
if (nested_in_vect_loop)
*adjustment_def = vect_get_vec_def_for_operand (init_val, stmt, NULL);
*adjustment_def = vect_get_vec_def_for_operand (init_val, stmt,
NULL);
else
*adjustment_def = init_val;
/* Create a vector of zeros for init_def. */
}
if (code == MULT_EXPR || code == BIT_AND_EXPR)
{
real_init_val = dconst1;
int_init_val = 1;
}
if (SCALAR_FLOAT_TYPE_P (scalar_type))
def_for_init = build_real (scalar_type, dconst0);
def_for_init = build_real (scalar_type, real_init_val);
else
def_for_init = build_int_cst (scalar_type, 0);
def_for_init = build_int_cst (scalar_type, int_init_val);
for (i = nunits - 1; i >= 0; --i)
/* Create a vector of '0' or '1' except the first element. */
for (i = nunits - 2; i >= 0; --i)
t = tree_cons (NULL_TREE, def_for_init, t);
/* Option1: the first element is '0' or '1' as well. */
if (adjustment_def)
{
t = tree_cons (NULL_TREE, def_for_init, t);
init_def = build_vector (vectype, t);
break;
}
/* Option2: the first element is INIT_VAL. */
t = tree_cons (NULL_TREE, init_value, t);
if (TREE_CONSTANT (init_val))
init_def = build_vector (vectype, t);
else
init_def = build_constructor_from_list (vectype, t);
break;
case MIN_EXPR:
case MAX_EXPR:
if (adjustment_def)
{
*adjustment_def = NULL_TREE;
init_def = vect_get_vec_def_for_operand (init_val, stmt, NULL);
break;
}
for (i = nunits - 1; i >= 0; --i)
t = tree_cons (NULL_TREE, init_value, t);
if (TREE_CONSTANT (init_val))
init_def = build_vector (vectype, t);
else
init_def = build_constructor_from_list (vectype, t);
break;
default:
gcc_unreachable ();
......@@ -2613,6 +2762,7 @@ get_initial_def_for_reduction (gimple stmt, tree init_val, tree *adjustment_def)
REDUCTION_PHI is the phi-node that carries the reduction computation.
REDUC_INDEX is the index of the operand in the right hand side of the
statement that is defined by REDUCTION_PHI.
DOUBLE_REDUC is TRUE if double reduction phi nodes should be handled.
This function:
1. Creates the reduction def-use cycle: sets the arguments for
......@@ -2657,14 +2807,15 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt,
int ncopies,
enum tree_code reduc_code,
gimple reduction_phi,
int reduc_index)
int reduc_index,
bool double_reduc)
{
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
stmt_vec_info prev_phi_info;
tree vectype;
enum machine_mode mode;
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *outer_loop = NULL;
basic_block exit_bb;
tree scalar_dest;
tree scalar_type;
......@@ -2694,6 +2845,7 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt,
if (nested_in_vect_loop_p (loop, stmt))
{
outer_loop = loop;
loop = loop->inner;
nested_in_vect_loop = true;
}
......@@ -2831,15 +2983,25 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt,
bitsize = TYPE_SIZE (scalar_type);
bytesize = TYPE_SIZE_UNIT (scalar_type);
/* For MINUS_EXPR the initial vector is [init_val,0,...,0], therefore,
partial results are added and not subtracted. */
if (code == MINUS_EXPR)
code = PLUS_EXPR;
/* In case this is a reduction in an inner-loop while vectorizing an outer
loop - we don't need to extract a single scalar result at the end of the
inner-loop. The final vector of partial results will be used in the
vectorized outer-loop, or reduced to a scalar result at the end of the
outer-loop. */
if (nested_in_vect_loop)
inner-loop (unless it is double reduction, i.e., the use of reduction is
outside the outer-loop). The final vector of partial results will be used
in the vectorized outer-loop, or reduced to a scalar result at the end of
the outer-loop. */
if (nested_in_vect_loop && !double_reduc)
goto vect_finalize_reduction;
/* The epilogue is created for the outer-loop, i.e., for the loop being
vectorized. */
if (double_reduc)
loop = outer_loop;
/* FORNOW */
gcc_assert (ncopies == 1);
......@@ -2914,6 +3076,7 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt,
bit_offset /= 2)
{
tree bitpos = size_int (bit_offset);
epilog_stmt = gimple_build_assign_with_ops (shift_code, vec_dest,
new_temp, bitpos);
new_name = make_ssa_name (vec_dest, epilog_stmt);
......@@ -2987,7 +3150,7 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt,
{
tree rhs;
gcc_assert (!nested_in_vect_loop);
gcc_assert (!nested_in_vect_loop || double_reduc);
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "extract scalar result");
......@@ -3007,6 +3170,9 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt,
vect_finalize_reduction:
if (double_reduc)
loop = loop->inner;
/* 2.5 Adjust the final result by the initial value of the reduction
variable. (When such adjustment is not needed, then
'adjustment_def' is zero). For example, if code is PLUS we create:
......@@ -3016,11 +3182,6 @@ vect_finalize_reduction:
{
if (nested_in_vect_loop)
{
/* For MINUS_EXPR we create new_temp = loop_exit_def + adjustment_def
since the initial value is [0,0,...,0]. */
if (code == MINUS_EXPR)
code = PLUS_EXPR;
gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) == VECTOR_TYPE);
expr = build2 (code, vectype, PHI_RESULT (new_phi), adjustment_def);
new_dest = vect_create_destination_var (scalar_dest, vectype);
......@@ -3055,6 +3216,7 @@ vect_finalize_reduction:
VEC_quick_push (gimple, phis, exit_phi);
}
}
/* We expect to have found an exit_phi because of loop-closed-ssa form. */
gcc_assert (!VEC_empty (gimple, phis));
......@@ -3063,12 +3225,13 @@ vect_finalize_reduction:
if (nested_in_vect_loop)
{
stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi);
gimple vect_phi;
/* FORNOW. Currently not supporting the case that an inner-loop
reduction is not used in the outer-loop (but only outside the
outer-loop). */
gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
&& !STMT_VINFO_LIVE_P (stmt_vinfo));
outer-loop), unless it is double reduction. */
gcc_assert ((STMT_VINFO_RELEVANT_P (stmt_vinfo)
&& !STMT_VINFO_LIVE_P (stmt_vinfo)) || double_reduc);
epilog_stmt = adjustment_def ? epilog_stmt : new_phi;
STMT_VINFO_VEC_STMT (stmt_vinfo) = epilog_stmt;
......@@ -3078,7 +3241,88 @@ vect_finalize_reduction:
if (adjustment_def)
STMT_VINFO_RELATED_STMT (vinfo_for_stmt (epilog_stmt)) =
STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_phi));
if (!double_reduc
|| STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_double_reduction_def)
continue;
/* Handle double reduction:
stmt1: s1 = phi <s0, s2> - double reduction phi (outer loop)
stmt2: s3 = phi <s1, s4> - (regular) reduction phi (inner loop)
stmt3: s4 = use (s3) - (regular) reduction stmt (inner loop)
stmt4: s2 = phi <s4> - double reduction stmt (outer loop)
At that point the regular reduction (stmt2 and stmt3) is already
vectorized, as well as the exit phi node, stmt4.
Here we vectorize the phi node of double reduction, stmt1, and
update all relevant statements. */
/* Go through all the uses of s2 to find double reduction phi node,
i.e., stmt1 above. */
orig_name = PHI_RESULT (exit_phi);
FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
{
stmt_vec_info use_stmt_vinfo = vinfo_for_stmt (use_stmt);
stmt_vec_info new_phi_vinfo;
tree vect_phi_init, preheader_arg, vect_phi_res, init_def;
basic_block bb = gimple_bb (use_stmt);
gimple use;
/* Check that USE_STMT is really double reduction phi node. */
if (gimple_code (use_stmt) != GIMPLE_PHI
|| gimple_phi_num_args (use_stmt) != 2
|| !use_stmt_vinfo
|| STMT_VINFO_DEF_TYPE (use_stmt_vinfo)
!= vect_double_reduction_def
|| bb->loop_father != outer_loop)
continue;
/* Create vector phi node for double reduction:
vs1 = phi <vs0, vs2>
vs1 was created previously in this function by a call to
vect_get_vec_def_for_operand and is stored in vec_initial_def;
vs2 is defined by EPILOG_STMT, the vectorized EXIT_PHI;
vs0 is created here. */
/* Create vector phi node. */
vect_phi = create_phi_node (vec_initial_def, bb);
new_phi_vinfo = new_stmt_vec_info (vect_phi,
loop_vec_info_for_loop (outer_loop), NULL);
set_vinfo_for_stmt (vect_phi, new_phi_vinfo);
/* Create vs0 - initial def of the double reduction phi. */
preheader_arg = PHI_ARG_DEF_FROM_EDGE (use_stmt,
loop_preheader_edge (outer_loop));
init_def = get_initial_def_for_reduction (stmt, preheader_arg,
NULL);
vect_phi_init = vect_init_vector (use_stmt, init_def, vectype,
NULL);
/* Update phi node arguments with vs0 and vs2. */
add_phi_arg (vect_phi, vect_phi_init,
loop_preheader_edge (outer_loop));
add_phi_arg (vect_phi, PHI_RESULT (epilog_stmt),
loop_latch_edge (outer_loop));
if (vect_print_dump_info (REPORT_DETAILS))
{
fprintf (vect_dump, "created double reduction phi node: ");
print_gimple_stmt (vect_dump, vect_phi, 0, TDF_SLIM);
}
vect_phi_res = PHI_RESULT (vect_phi);
/* Replace the use, i.e., set the correct vs1 in the regular
reduction phi node. FORNOW, NCOPIES is always 1, so the loop
is redundant. */
use = reduction_phi;
for (j = 0; j < ncopies; j++)
{
edge pr_edge = loop_preheader_edge (loop);
SET_PHI_ARG_DEF (use, pr_edge->dest_idx, vect_phi_res);
use = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use));
}
}
}
/* Replace the uses: */
......@@ -3087,6 +3331,7 @@ vect_finalize_reduction:
FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
SET_USE (use_p, new_temp);
}
VEC_free (gimple, heap, phis);
}
......@@ -3171,6 +3416,10 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
gimple reduc_def_stmt = NULL;
/* The default is that the reduction variable is the last in statement. */
int reduc_index = 2;
bool double_reduc = false, dummy;
basic_block def_bb;
struct loop * def_stmt_loop;
tree def_arg;
if (nested_in_vect_loop_p (loop, stmt))
{
......@@ -3185,7 +3434,6 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
return false;
/* 1. Is vectorizable reduction? */
/* Not supportable if the reduction variable is used in the loop. */
if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer)
return false;
......@@ -3300,10 +3548,11 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
if (orig_stmt)
gcc_assert (orig_stmt == vect_is_simple_reduction (loop_vinfo,
reduc_def_stmt,
!nested_cycle));
!nested_cycle,
&dummy));
else
gcc_assert (stmt == vect_is_simple_reduction (loop_vinfo, reduc_def_stmt,
!nested_cycle));
!nested_cycle, &dummy));
if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt)))
return false;
......@@ -3400,26 +3649,44 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
orig_code = code;
}
if (nested_cycle)
epilog_reduc_code = orig_code;
else
if (!reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
return false;
reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype, optab_default);
reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype,
optab_default);
if (!reduc_optab)
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "no optab for reduction.");
epilog_reduc_code = ERROR_MARK;
}
if (optab_handler (reduc_optab, vec_mode)->insn_code == CODE_FOR_nothing)
if (reduc_optab
&& optab_handler (reduc_optab, vec_mode)->insn_code == CODE_FOR_nothing)
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "reduc op not supported by target.");
epilog_reduc_code = ERROR_MARK;
}
def_bb = gimple_bb (reduc_def_stmt);
def_stmt_loop = def_bb->loop_father;
def_arg = PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt,
loop_preheader_edge (def_stmt_loop));
if (TREE_CODE (def_arg) == SSA_NAME
&& vinfo_for_stmt (SSA_NAME_DEF_STMT (def_arg))
&& STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SSA_NAME_DEF_STMT (def_arg)))
== vect_double_reduction_def)
double_reduc = true;
if (double_reduc && ncopies > 1)
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "multiple types in double reduction");
return false;
}
if (!vec_stmt) /* transformation not required. */
{
STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
......@@ -3560,8 +3827,10 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
epilog reduction code. */
if (!single_defuse_cycle)
new_temp = gimple_assign_lhs (*vec_stmt);
vect_create_epilog_for_reduction (new_temp, stmt, epilog_copies,
epilog_reduc_code, first_phi, reduc_index);
epilog_reduc_code, first_phi, reduc_index,
double_reduc);
return true;
}
......
......@@ -331,7 +331,7 @@ process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
...
inner-loop:
d = def_stmt
outer-loop-tail-bb:
outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
stmt # use (d) */
else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
{
......@@ -341,7 +341,8 @@ process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
switch (relevant)
{
case vect_unused_in_scope:
relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
|| STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
vect_used_in_outer_by_reduction : vect_unused_in_scope;
break;
......@@ -393,7 +394,8 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
basic_block bb;
gimple phi;
bool live_p;
enum vect_relevant relevant;
enum vect_relevant relevant, tmp_relevant;
enum vect_def_type def_type;
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
......@@ -465,13 +467,14 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
identify stmts that are used solely by a reduction, and therefore the
order of the results that they produce does not have to be kept. */
if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
tmp_relevant = relevant;
switch (def_type)
{
enum vect_relevant tmp_relevant = relevant;
case vect_reduction_def:
switch (tmp_relevant)
{
case vect_unused_in_scope:
gcc_assert (gimple_code (stmt) != GIMPLE_PHI);
relevant = vect_used_by_reduction;
break;
......@@ -483,23 +486,19 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
default:
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "unsupported use of reduction.");
VEC_free (gimple, heap, worklist);
return false;
}
live_p = false;
}
else if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle)
{
enum vect_relevant tmp_relevant = relevant;
switch (tmp_relevant)
{
case vect_unused_in_scope:
case vect_used_in_outer_by_reduction:
case vect_used_in_outer:
break;
default:
case vect_nested_cycle:
if (tmp_relevant != vect_unused_in_scope
&& tmp_relevant != vect_used_in_outer_by_reduction
&& tmp_relevant != vect_used_in_outer)
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "unsupported use of nested cycle.");
......@@ -508,6 +507,24 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
}
live_p = false;
break;
case vect_double_reduction_def:
if (tmp_relevant != vect_unused_in_scope
&& tmp_relevant != vect_used_by_reduction)
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "unsupported use of double reduction.");
VEC_free (gimple, heap, worklist);
return false;
}
live_p = false;
break;
default:
break;
}
FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
......@@ -974,6 +991,7 @@ vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
/* Case 4: operand is defined by a loop header phi - reduction */
case vect_reduction_def:
case vect_double_reduction_def:
case vect_nested_cycle:
{
struct loop *loop;
......
......@@ -61,6 +61,7 @@ enum vect_def_type {
vect_internal_def,
vect_induction_def,
vect_reduction_def,
vect_double_reduction_def,
vect_nested_cycle,
vect_unknown_def_type
};
......@@ -822,7 +823,7 @@ extern tree vect_create_addr_base_for_vector_ref (gimple, gimple_seq *,
/* In tree-vect-loop.c. */
/* FORNOW: Used in tree-parloops.c. */
extern void destroy_loop_vec_info (loop_vec_info, bool);
extern gimple vect_is_simple_reduction (loop_vec_info, gimple, bool);
extern gimple vect_is_simple_reduction (loop_vec_info, gimple, bool, bool *);
/* Drive for loop analysis stage. */
extern loop_vec_info vect_analyze_loop (struct loop *);
/* Drive for loop transformation stage. */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment