Commit 7c5222ff by Ira Rosen Committed by Ira Rosen

tree-parloops.c (loop_parallel_p): Call vect_is_simple_reduction with additional parameter.


	* tree-parloops.c (loop_parallel_p): Call vect_is_simple_reduction
	with additional parameter.
	* tree-vectorizer.h (enum vect_def_type): Add new value 
	vect_nested_cycle.
	(enum vect_relevant): Add comments.
	(vect_is_simple_reduction): Add new argument.
	* tree-vect-loop.c (vect_analyze_scalar_cycles_1): Add comments.
	Detect nested cycles.
	(vect_is_simple_reduction): Update documentation, add an argument to
	distinguish inner-loop reduction from nested cycle, detect nested
	cycles, fix printings and indentation, don't swap operands in case
	of nested cycle.
	(get_initial_def_for_reduction): Handle subtraction.
	(vect_create_epilog_for_reduction): Add new argument to specify
	reduction variable.
	(vect_finalize_reduction): Handle subtraction, fix comments.
	(vectorizable_reduction): Handle nested cycles. In case of nested cycle
	keep track of the reduction variable position. Call 
	vect_is_simple_reduction with additional parameter. Use original 
	statement code in reduction epilogue for nested cycle. Call
	vect_create_epilog_for_reduction with additional parameter.
	* tree-vect-patterns.c (vect_recog_dot_prod_pattern): Assert inner-loop
	vectorization.
	(vect_recog_widen_sum_pattern): Likewise.
	* tree-vect-stmts.c (process_use): Distinguish between nested cycles
	and reductions.
	(vect_mark_stmts_to_be_vectorized): Likewise.
	(vect_get_vec_def_for_operand): Handle nested cycles.

From-SVN: r148518
parent 3ba558db
2009-06-16 Ira Rosen <irar@il.ibm.com>
* tree-parloops.c (loop_parallel_p): Call vect_is_simple_reduction
with additional parameter.
* tree-vectorizer.h (enum vect_def_type): Add new value
vect_nested_cycle.
(enum vect_relevant): Add comments.
(vect_is_simple_reduction): Add new argument.
* tree-vect-loop.c (vect_analyze_scalar_cycles_1): Add comments.
Detect nested cycles.
(vect_is_simple_reduction): Update documentation, add an argument to
distinguish inner-loop reduction from nested cycle, detect nested
cycles, fix printings and indentation, don't swap operands in case
of nested cycle.
(get_initial_def_for_reduction): Handle subtraction.
(vect_create_epilog_for_reduction): Add new argument to specify
reduction variable.
(vect_finalize_reduction): Handle subtraction, fix comments.
(vectorizable_reduction): Handle nested cycles. In case of nested cycle
keep track of the reduction variable position. Call
vect_is_simple_reduction with additional parameter. Use original
statement code in reduction epilogue for nested cycle. Call
vect_create_epilog_for_reduction with additional parameter.
* tree-vect-patterns.c (vect_recog_dot_prod_pattern): Assert inner-loop
vectorization.
(vect_recog_widen_sum_pattern): Likewise.
* tree-vect-stmts.c (process_use): Distinguish between nested cycles
and reductions.
(vect_mark_stmts_to_be_vectorized): Likewise.
(vect_get_vec_def_for_operand): Handle nested cycles.
2009-06-16 Ralf Wildenhues <Ralf.Wildenhues@gmx.de> 2009-06-16 Ralf Wildenhues <Ralf.Wildenhues@gmx.de>
* doc/invoke.texi (Debugging Options): Fix option index entries * doc/invoke.texi (Debugging Options): Fix option index entries
......
2009-06-16 Ira Rosen <irar@il.ibm.com>
* gcc.dg/vect/vect-outer-4g.c: Don't look for pattern not allowed
printing.
* gcc.dg/vect/vect-outer-4k.c, gcc.dg/vect/vect-outer-4l.c,
gcc.dg/vect/vect-outer-4f.c: Likewise.
* gcc.dg/vect/vect-nest-cycle-1.c: New test.
* gcc.dg/vect/vect-nest-cycle-2.c, gcc.dg/vect/vect-nest-cycle-3.c:
Likewise.
* gcc.dg/vect/vect-outer-1a.c: Fail because of strided access in outer
loop.
2009-06-16 Tobias Burnus <burnus@net-b.de> 2009-06-16 Tobias Burnus <burnus@net-b.de>
PR fortran/40383 PR fortran/40383
......
/* { dg-require-effective-target vect_float } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 16
float in[N] = {232,132,32,432,532,321,327,323,321,324,322,329,432,832,932,232};
float out[N];
float check_res[N] = {112,-4,-120,264,348,121,111,91,73,60,42,33,120,504,588,-128};
float a[2*N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
/* Outer-loop vectorization. */
__attribute__ ((noinline)) void
foo ()
{
int i, j;
float res;
for (i = 0; i < N; i++)
{
res = in[i];
for (j = 0; j < N; j++)
res = res - a[i+j];
out[i] = res;
}
for (i = 0; i < N; i++)
if (out[i] != check_res[i])
abort ();
}
int main ()
{
check_vect ();
foo();
return 0;
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_float } */
#include <stdarg.h>
#include <stdio.h>
#include "tree-vect.h"
#define N 16
float out[N];
float check_res[N] = {880,864,848,832,816,800,784,768,752,736,720,704,688,672,656,640};
float a[2*N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
/* Outer-loop vectorization. */
__attribute__ ((noinline)) void
foo ()
{
int i, j;
float res;
for (i = 0; i < N; i++)
{
res = 1000;
for (j = 0; j < N; j++)
res = res - a[i+j];
out[i] = res;
}
for (i = 0; i < N; i++)
if (out[i] != check_res[i])
abort ();
}
int main ()
{
check_vect ();
foo();
return 0;
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_float } */
#include <stdarg.h>
#include <stdio.h>
#include "tree-vect.h"
#define N 16
#define DIFF 82
float c[N][N], b[N][N], a[N];
__attribute__ ((noinline)) int
main1 ()
{
int i, j;
float diff;
/* In inner loop vectorization -funsafe-math-optimizations is needed to
vectorize the summation. But in outer loop vectorization the order of
calculation doesn't change, therefore, there is no need in that flag. */
for (i = 0; i < N; i++)
{
diff = 2;
for (j = 0; j < N; j++)
diff += (b[j][i] - c[j][i]);
a[i] = diff;
}
/* Check results: */
for (i = 0; i < N; i++)
if (a[i] != DIFF)
abort ();
return 0;
}
int main (void)
{
int i, j;
for (i = 0; i < N; i++)
for (j = 0; j < N; j++)
{
b[i][j] = i+j+5;
c[i][j] = i+j;
}
check_vect ();
main1 ();
return 0;
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -4,9 +4,7 @@ ...@@ -4,9 +4,7 @@
signed short image[N][N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); signed short image[N][N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
signed short block[N][N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); signed short block[N][N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
/* Can't do outer-loop vectorization because of non-consecutive access. /* Can't do outer-loop vectorization because of non-consecutive access. */
Currently fails to vectorize because the reduction pattern is not
recognized. */
int int
foo (){ foo (){
...@@ -22,7 +20,5 @@ foo (){ ...@@ -22,7 +20,5 @@ foo (){
} }
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */ /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
/* FORNOW */ /* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" } } */
/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times "unexpected pattern" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -66,5 +66,4 @@ int main (void) ...@@ -66,5 +66,4 @@ int main (void)
} }
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */ /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times "vect_recog_widen_sum_pattern: not allowed" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -66,5 +66,4 @@ int main (void) ...@@ -66,5 +66,4 @@ int main (void)
} }
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */ /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times "vect_recog_widen_sum_pattern: not allowed" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -66,5 +66,4 @@ int main (void) ...@@ -66,5 +66,4 @@ int main (void)
} }
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */ /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times "vect_recog_widen_sum_pattern: not allowed" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -66,5 +66,4 @@ int main (void) ...@@ -66,5 +66,4 @@ int main (void)
} }
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */ /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times "vect_recog_widen_sum_pattern: not allowed" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -290,7 +290,7 @@ loop_parallel_p (struct loop *loop, htab_t reduction_list, ...@@ -290,7 +290,7 @@ loop_parallel_p (struct loop *loop, htab_t reduction_list,
if (!is_gimple_reg (PHI_RESULT (phi))) if (!is_gimple_reg (PHI_RESULT (phi)))
continue; continue;
if (simple_loop_info) if (simple_loop_info)
reduc_stmt = vect_is_simple_reduction (simple_loop_info, phi); reduc_stmt = vect_is_simple_reduction (simple_loop_info, phi, true);
/* Create a reduction_info struct, initialize it and insert it to /* Create a reduction_info struct, initialize it and insert it to
the reduction list. */ the reduction list. */
......
...@@ -319,12 +319,7 @@ vect_recog_dot_prod_pattern (gimple last_stmt, tree *type_in, tree *type_out) ...@@ -319,12 +319,7 @@ vect_recog_dot_prod_pattern (gimple last_stmt, tree *type_in, tree *type_out)
/* We don't allow changing the order of the computation in the inner-loop /* We don't allow changing the order of the computation in the inner-loop
when doing outer-loop vectorization. */ when doing outer-loop vectorization. */
if (nested_in_vect_loop_p (loop, last_stmt)) gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "vect_recog_dot_prod_pattern: not allowed.");
return NULL;
}
return pattern_stmt; return pattern_stmt;
} }
...@@ -638,12 +633,7 @@ vect_recog_widen_sum_pattern (gimple last_stmt, tree *type_in, tree *type_out) ...@@ -638,12 +633,7 @@ vect_recog_widen_sum_pattern (gimple last_stmt, tree *type_in, tree *type_out)
/* We don't allow changing the order of the computation in the inner-loop /* We don't allow changing the order of the computation in the inner-loop
when doing outer-loop vectorization. */ when doing outer-loop vectorization. */
if (nested_in_vect_loop_p (loop, last_stmt)) gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "vect_recog_widen_sum_pattern: not allowed.");
return NULL;
}
return pattern_stmt; return pattern_stmt;
} }
......
...@@ -300,19 +300,24 @@ process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p, ...@@ -300,19 +300,24 @@ process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
{ {
if (vect_print_dump_info (REPORT_DETAILS)) if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt."); fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
switch (relevant) switch (relevant)
{ {
case vect_unused_in_scope: case vect_unused_in_scope:
relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ? relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
vect_used_by_reduction : vect_unused_in_scope; vect_used_in_scope : vect_unused_in_scope;
break; break;
case vect_used_in_outer_by_reduction: case vect_used_in_outer_by_reduction:
gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
relevant = vect_used_by_reduction; relevant = vect_used_by_reduction;
break; break;
case vect_used_in_outer: case vect_used_in_outer:
gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
relevant = vect_used_in_scope; relevant = vect_used_in_scope;
break; break;
case vect_used_by_reduction:
case vect_used_in_scope: case vect_used_in_scope:
break; break;
...@@ -332,6 +337,7 @@ process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p, ...@@ -332,6 +337,7 @@ process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
{ {
if (vect_print_dump_info (REPORT_DETAILS)) if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt."); fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
switch (relevant) switch (relevant)
{ {
case vect_unused_in_scope: case vect_unused_in_scope:
...@@ -339,10 +345,6 @@ process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p, ...@@ -339,10 +345,6 @@ process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
vect_used_in_outer_by_reduction : vect_unused_in_scope; vect_used_in_outer_by_reduction : vect_unused_in_scope;
break; break;
case vect_used_in_outer_by_reduction:
case vect_used_in_outer:
break;
case vect_used_by_reduction: case vect_used_by_reduction:
relevant = vect_used_in_outer_by_reduction; relevant = vect_used_in_outer_by_reduction;
break; break;
...@@ -461,19 +463,7 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo) ...@@ -461,19 +463,7 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
those that are used by a reduction computation, and those that are those that are used by a reduction computation, and those that are
(also) used by a regular computation. This allows us later on to (also) used by a regular computation. This allows us later on to
identify stmts that are used solely by a reduction, and therefore the identify stmts that are used solely by a reduction, and therefore the
order of the results that they produce does not have to be kept. order of the results that they produce does not have to be kept. */
Reduction phis are expected to be used by a reduction stmt, or by
in an outer loop; Other reduction stmts are expected to be
in the loop, and possibly used by a stmt in an outer loop.
Here are the expected values of "relevant" for reduction phis/stmts:
relevance: phi stmt
vect_unused_in_scope ok
vect_used_in_outer_by_reduction ok ok
vect_used_in_outer ok ok
vect_used_by_reduction ok
vect_used_in_scope */
if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
{ {
...@@ -485,28 +475,41 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo) ...@@ -485,28 +475,41 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
relevant = vect_used_by_reduction; relevant = vect_used_by_reduction;
break; break;
case vect_used_in_outer_by_reduction:
case vect_used_in_outer:
gcc_assert (gimple_code (stmt) != GIMPLE_ASSIGN
|| (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR
&& (gimple_assign_rhs_code (stmt)
!= DOT_PROD_EXPR)));
break;
case vect_used_by_reduction: case vect_used_by_reduction:
if (gimple_code (stmt) == GIMPLE_PHI) if (gimple_code (stmt) == GIMPLE_PHI)
break; break;
/* fall through */ /* fall through */
case vect_used_in_scope:
default: default:
if (vect_print_dump_info (REPORT_DETAILS)) if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "unsupported use of reduction."); fprintf (vect_dump, "unsupported use of reduction.");
VEC_free (gimple, heap, worklist); VEC_free (gimple, heap, worklist);
return false; return false;
} }
live_p = false; live_p = false;
} }
else if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle)
{
enum vect_relevant tmp_relevant = relevant;
switch (tmp_relevant)
{
case vect_unused_in_scope:
case vect_used_in_outer_by_reduction:
case vect_used_in_outer:
break;
default:
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "unsupported use of nested cycle.");
VEC_free (gimple, heap, worklist);
return false;
}
live_p = false;
}
FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE) FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
{ {
tree op = USE_FROM_PTR (use_p); tree op = USE_FROM_PTR (use_p);
...@@ -971,6 +974,7 @@ vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def) ...@@ -971,6 +974,7 @@ vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
/* Case 4: operand is defined by a loop header phi - reduction */ /* Case 4: operand is defined by a loop header phi - reduction */
case vect_reduction_def: case vect_reduction_def:
case vect_nested_cycle:
{ {
struct loop *loop; struct loop *loop;
...@@ -3929,6 +3933,7 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node) ...@@ -3929,6 +3933,7 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
break; break;
case vect_reduction_def: case vect_reduction_def:
case vect_nested_cycle:
gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
|| relevance == vect_used_in_outer_by_reduction || relevance == vect_used_in_outer_by_reduction
|| relevance == vect_unused_in_scope)); || relevance == vect_unused_in_scope));
......
...@@ -61,6 +61,7 @@ enum vect_def_type { ...@@ -61,6 +61,7 @@ enum vect_def_type {
vect_internal_def, vect_internal_def,
vect_induction_def, vect_induction_def,
vect_reduction_def, vect_reduction_def,
vect_nested_cycle,
vect_unknown_def_type vect_unknown_def_type
}; };
...@@ -339,7 +340,11 @@ enum stmt_vec_info_type { ...@@ -339,7 +340,11 @@ enum stmt_vec_info_type {
block. */ block. */
enum vect_relevant { enum vect_relevant {
vect_unused_in_scope = 0, vect_unused_in_scope = 0,
/* The def is in the inner loop, and the use is in the outer loop, and the
use is a reduction stmt. */
vect_used_in_outer_by_reduction, vect_used_in_outer_by_reduction,
/* The def is in the inner loop, and the use is in the outer loop (and is
not part of reduction). */
vect_used_in_outer, vect_used_in_outer,
/* defs that feed computations that end up (only) in a reduction. These /* defs that feed computations that end up (only) in a reduction. These
...@@ -817,7 +822,7 @@ extern tree vect_create_addr_base_for_vector_ref (gimple, gimple_seq *, ...@@ -817,7 +822,7 @@ extern tree vect_create_addr_base_for_vector_ref (gimple, gimple_seq *,
/* In tree-vect-loop.c. */ /* In tree-vect-loop.c. */
/* FORNOW: Used in tree-parloops.c. */ /* FORNOW: Used in tree-parloops.c. */
extern void destroy_loop_vec_info (loop_vec_info, bool); extern void destroy_loop_vec_info (loop_vec_info, bool);
extern gimple vect_is_simple_reduction (loop_vec_info, gimple); extern gimple vect_is_simple_reduction (loop_vec_info, gimple, bool);
/* Drive for loop analysis stage. */ /* Drive for loop analysis stage. */
extern loop_vec_info vect_analyze_loop (struct loop *); extern loop_vec_info vect_analyze_loop (struct loop *);
/* Drive for loop transformation stage. */ /* Drive for loop transformation stage. */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment