Commit 599eabdb by Daniel Berlin Committed by Daniel Berlin

Makefile.in (tree-loop-linear.o): Added.

2004-09-08  Daniel Berlin  <dberlin@dberlin.org>

	* Makefile.in (tree-loop-linear.o): Added.
	(OBJS-common): Add tree-loop-linear.o
	* common.opt: New flag, ftree-loop-linear.
	* timevar.def: New timevar, TV_TREE_LOOP_LINEAR.
	* tree-flow.h: Add prototype for linear_transform_loops.
	* tree-optimize.c: Add linear transform after vectorization.
	* tree-pass.h: Add struct pass_linear_transform.
	* tree-ssa-loop.c: Add pass_linear_transform.
	* tree-loop-linear.c: New file.
	* lambda-code.c: gcc_assertify.
	(gcc_loop_to_lambda_loop): Handle all exit tests.
	Handle case where we have (invariant >= induction var).
	(find_induction_var_from_exit_cond): Ditto.

From-SVN: r87190
parent 6e45f57b
2004-09-08 Daniel Berlin <dberlin@dberlin.org>
* Makefile.in (tree-loop-linear.o): Added.
(OBJS-common): Add tree-loop-linear.o
* common.opt: New flag, ftree-loop-linear.
* timevar.def: New timevar, TV_TREE_LOOP_LINEAR.
* tree-flow.h: Add prototype for linear_transform_loops.
* tree-optimize.c: Add linear transform after vectorization.
* tree-pass.h: Add struct pass_linear_transform.
* tree-ssa-loop.c: Add pass_linear_transform.
* tree-loop-linear.c: New file.
* lambda-code.c: gcc_assertify.
(gcc_loop_to_lambda_loop): Handle all exit tests.
Handle case where we have (invariant >= induction var).
(find_induction_var_from_exit_cond): Ditto.
2004-09-08 Jie Zhang <zhangjie@magima.com.cn> 2004-09-08 Jie Zhang <zhangjie@magima.com.cn>
* tree-ssa-alias.c (compute_flow_insensitive_aliasing): If type * tree-ssa-alias.c (compute_flow_insensitive_aliasing): If type
......
...@@ -927,7 +927,7 @@ OBJS-common = \ ...@@ -927,7 +927,7 @@ OBJS-common = \
varasm.o varray.o vec.o version.o vmsdbgout.o xcoffout.o alloc-pool.o \ varasm.o varray.o vec.o version.o vmsdbgout.o xcoffout.o alloc-pool.o \
et-forest.o cfghooks.o bt-load.o pretty-print.o $(GGC) web.o passes.o \ et-forest.o cfghooks.o bt-load.o pretty-print.o $(GGC) web.o passes.o \
rtl-profile.o tree-profile.o rtlhooks.o cfgexpand.o lambda-mat.o \ rtl-profile.o tree-profile.o rtlhooks.o cfgexpand.o lambda-mat.o \
lambda-trans.o lambda-code.o lambda-trans.o lambda-code.o tree-loop-linear.o
OBJS-md = $(out_object_file) OBJS-md = $(out_object_file)
OBJS-archive = $(EXTRA_OBJS) $(host_hook_obj) tree-inline.o \ OBJS-archive = $(EXTRA_OBJS) $(host_hook_obj) tree-inline.o \
...@@ -1774,6 +1774,10 @@ tree-vectorizer.o: tree-vectorizer.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) ...@@ -1774,6 +1774,10 @@ tree-vectorizer.o: tree-vectorizer.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H)
errors.h $(GGC_H) $(OPTABS_H) $(TREE_H) $(RTL_H) $(BASIC_BLOCK_H) diagnostic.h \ errors.h $(GGC_H) $(OPTABS_H) $(TREE_H) $(RTL_H) $(BASIC_BLOCK_H) diagnostic.h \
$(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) cfgloop.h tree-pass.h \ $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) cfgloop.h tree-pass.h \
tree-vectorizer.h tree-data-ref.h $(SCEV_H) tree-vectorizer.h tree-data-ref.h $(SCEV_H)
tree-loop-linear.o: tree-loop-linear.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
errors.h $(GGC_H) $(OPTABS_H) $(TREE_H) $(RTL_H) $(BASIC_BLOCK_H) diagnostic.h \
$(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) cfgloop.h tree-pass.h \
$(TREE_DATA_REF_H) $(SCEV_H)
tree-gimple.o : tree-gimple.c $(CONFIG_H) $(SYSTEM_H) $(TREE_H) $(EXPR_H) \ tree-gimple.o : tree-gimple.c $(CONFIG_H) $(SYSTEM_H) $(TREE_H) $(EXPR_H) \
$(RTL_H) $(TREE_GIMPLE_H) $(TM_H) coretypes.h bitmap.h $(GGC_H) $(RTL_H) $(TREE_GIMPLE_H) $(TM_H) coretypes.h bitmap.h $(GGC_H)
tree-mudflap.o : $(CONFIG_H) errors.h $(SYSTEM_H) $(TREE_H) tree-inline.h \ tree-mudflap.o : $(CONFIG_H) errors.h $(SYSTEM_H) $(TREE_H) tree-inline.h \
......
...@@ -864,6 +864,10 @@ ftree-loop-im ...@@ -864,6 +864,10 @@ ftree-loop-im
Common Report Var(flag_tree_loop_im) Init(1) Common Report Var(flag_tree_loop_im) Init(1)
Enable loop invariant motion on trees Enable loop invariant motion on trees
ftree-loop-linear
Common Report Var(flag_tree_loop_linear)
Enable linear loop transforms on trees
ftree-loop-ivcanon ftree-loop-ivcanon
Common Report Var(flag_tree_loop_ivcanon) Common Report Var(flag_tree_loop_ivcanon)
Create canonical induction variables in loops Create canonical induction variables in loops
......
...@@ -316,7 +316,7 @@ Objective-C and Objective-C++ Dialects}. ...@@ -316,7 +316,7 @@ Objective-C and Objective-C++ Dialects}.
-funroll-all-loops -funroll-loops -fpeel-loops @gol -funroll-all-loops -funroll-loops -fpeel-loops @gol
-funswitch-loops -fold-unroll-loops -fold-unroll-all-loops @gol -funswitch-loops -fold-unroll-loops -fold-unroll-all-loops @gol
-ftree-pre -ftree-ccp -ftree-dce -ftree-loop-optimize @gol -ftree-pre -ftree-ccp -ftree-dce -ftree-loop-optimize @gol
-ftree-loop-im -ftree-loop-ivcanon -fivopts @gol -ftree-loop-linear -ftree-loop-im -ftree-loop-ivcanon -fivopts @gol
-ftree-dominator-opts -ftree-dse -ftree-copyrename @gol -ftree-dominator-opts -ftree-dse -ftree-copyrename @gol
-ftree-ch -ftree-sra -ftree-ter -ftree-lrs -ftree-fre -ftree-vectorize @gol -ftree-ch -ftree-sra -ftree-ter -ftree-lrs -ftree-fre -ftree-vectorize @gol
--param @var{name}=@var{value} --param @var{name}=@var{value}
...@@ -4610,6 +4610,10 @@ usually increases code size. ...@@ -4610,6 +4610,10 @@ usually increases code size.
Perform loop optimizations on trees. This flag is enabled by default at -O Perform loop optimizations on trees. This flag is enabled by default at -O
and higher. and higher.
@item -ftree-loop-linear
Perform linear loop transformations on tree. This flag can improve cache
performance and allow further loop optimizations to take place.
@item -ftree-lim @item -ftree-lim
Perform loop invariant motion on trees. This pass moves only invartiants that Perform loop invariant motion on trees. This pass moves only invartiants that
would be hard to handle on rtl level (function calls, operations that expand to would be hard to handle on rtl level (function calls, operations that expand to
......
...@@ -169,8 +169,7 @@ lambda_body_vector_compute_new (lambda_trans_matrix transform, ...@@ -169,8 +169,7 @@ lambda_body_vector_compute_new (lambda_trans_matrix transform,
int depth; int depth;
/* Make sure the matrix is square. */ /* Make sure the matrix is square. */
if (LTM_ROWSIZE (transform) != LTM_COLSIZE (transform)) gcc_assert (LTM_ROWSIZE (transform) == LTM_COLSIZE (transform));
abort ();
depth = LTM_ROWSIZE (transform); depth = LTM_ROWSIZE (transform);
...@@ -297,8 +296,7 @@ print_lambda_loop (FILE * outfile, lambda_loop loop, int depth, ...@@ -297,8 +296,7 @@ print_lambda_loop (FILE * outfile, lambda_loop loop, int depth,
int step; int step;
lambda_linear_expression expr; lambda_linear_expression expr;
if (!loop) gcc_assert (loop);
abort ();
expr = LL_LINEAR_OFFSET (loop); expr = LL_LINEAR_OFFSET (loop);
step = LL_STEP (loop); step = LL_STEP (loop);
...@@ -393,8 +391,7 @@ lambda_lattice_compute_base (lambda_loopnest nest) ...@@ -393,8 +391,7 @@ lambda_lattice_compute_base (lambda_loopnest nest)
for (i = 0; i < depth; i++) for (i = 0; i < depth; i++)
{ {
loop = LN_LOOPS (nest)[i]; loop = LN_LOOPS (nest)[i];
if (!loop) gcc_assert (loop);
abort ();
step = LL_STEP (loop); step = LL_STEP (loop);
/* If we have a step of 1, then the base is one, and the /* If we have a step of 1, then the base is one, and the
origin and invariant coefficients are 0. */ origin and invariant coefficients are 0. */
...@@ -412,9 +409,8 @@ lambda_lattice_compute_base (lambda_loopnest nest) ...@@ -412,9 +409,8 @@ lambda_lattice_compute_base (lambda_loopnest nest)
/* Otherwise, we need the lower bound expression (which must /* Otherwise, we need the lower bound expression (which must
be an affine function) to determine the base. */ be an affine function) to determine the base. */
expression = LL_LOWER_BOUND (loop); expression = LL_LOWER_BOUND (loop);
if (!expression gcc_assert (expression && LLE_NEXT (expression)
|| LLE_NEXT (expression) || LLE_DENOMINATOR (expression) != 1) && LLE_DENOMINATOR (expression) == 1);
abort ();
/* The lower triangular portion of the base is going to be the /* The lower triangular portion of the base is going to be the
coefficient times the step */ coefficient times the step */
...@@ -556,8 +552,8 @@ lambda_compute_auxillary_space (lambda_loopnest nest, ...@@ -556,8 +552,8 @@ lambda_compute_auxillary_space (lambda_loopnest nest,
size++; size++;
/* Need to increase matrix sizes above. */ /* Need to increase matrix sizes above. */
if (size > 127) gcc_assert (size <= 127);
abort ();
} }
/* Then do the exact same thing for the upper bounds. */ /* Then do the exact same thing for the upper bounds. */
...@@ -585,8 +581,8 @@ lambda_compute_auxillary_space (lambda_loopnest nest, ...@@ -585,8 +581,8 @@ lambda_compute_auxillary_space (lambda_loopnest nest,
A[size][i] = LLE_DENOMINATOR (expression); A[size][i] = LLE_DENOMINATOR (expression);
size++; size++;
/* Need to increase matrix sizes above. */ /* Need to increase matrix sizes above. */
if (size > 127) gcc_assert (size <= 127);
abort ();
} }
} }
...@@ -1205,7 +1201,7 @@ gcc_loop_to_lambda_loop (struct loop *loop, int depth, ...@@ -1205,7 +1201,7 @@ gcc_loop_to_lambda_loop (struct loop *loop, int depth,
tree test; tree test;
int stepint; int stepint;
int extra = 0; int extra = 0;
tree uboundvar;
use_optype uses; use_optype uses;
/* Find out induction var and set the pointer so that the caller can /* Find out induction var and set the pointer so that the caller can
...@@ -1225,19 +1221,7 @@ gcc_loop_to_lambda_loop (struct loop *loop, int depth, ...@@ -1225,19 +1221,7 @@ gcc_loop_to_lambda_loop (struct loop *loop, int depth,
} }
test = TREE_OPERAND (exit_cond, 0); test = TREE_OPERAND (exit_cond, 0);
if (TREE_CODE (test) != LE_EXPR
&& TREE_CODE (test) != LT_EXPR && TREE_CODE (test) != NE_EXPR)
{
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file,
"Unable to convert loop: Loop exit test uses unhandled test condition:");
print_generic_stmt (dump_file, test, 0);
fprintf (dump_file, "\n");
}
return NULL;
}
if (SSA_NAME_DEF_STMT (inductionvar) == NULL_TREE) if (SSA_NAME_DEF_STMT (inductionvar) == NULL_TREE)
{ {
...@@ -1349,25 +1333,39 @@ gcc_loop_to_lambda_loop (struct loop *loop, int depth, ...@@ -1349,25 +1333,39 @@ gcc_loop_to_lambda_loop (struct loop *loop, int depth,
return NULL; return NULL;
} }
if (TREE_CODE (TREE_OPERAND (test, 1)) == SSA_NAME) /* One part of the test may be a loop invariant tree. */
if (invariant_in_loop (loop, TREE_OPERAND (test, 1))) if (TREE_CODE (TREE_OPERAND (test, 1)) == SSA_NAME
VEC_safe_push (tree, *invariants, TREE_OPERAND (test, 1)); && invariant_in_loop (loop, TREE_OPERAND (test, 1)))
VEC_safe_push (tree, *invariants, TREE_OPERAND (test, 1));
else if (TREE_CODE (TREE_OPERAND (test, 0)) == SSA_NAME
&& invariant_in_loop (loop, TREE_OPERAND (test, 0)))
VEC_safe_push (tree, *invariants, TREE_OPERAND (test, 0));
/* The non-induction variable part of the test is the upper bound variable.
*/
if (TREE_OPERAND (test, 0) == inductionvar)
uboundvar = TREE_OPERAND (test, 1);
else
uboundvar = TREE_OPERAND (test, 0);
/* We only size the vectors assuming we have, at max, 2 times as many /* We only size the vectors assuming we have, at max, 2 times as many
invariants as we do loops (one for each bound). invariants as we do loops (one for each bound).
This is just an arbitrary number, but it has to be matched against the This is just an arbitrary number, but it has to be matched against the
code below. */ code below. */
if (VEC_length (tree, *invariants) > (unsigned int) (2 * depth)) gcc_assert (VEC_length (tree, *invariants) <= (unsigned int) (2 * depth));
abort ();
/* We might have some leftover. */ /* We might have some leftover. */
if (TREE_CODE (test) == LT_EXPR) if (TREE_CODE (test) == LT_EXPR)
extra = -1 * stepint; extra = -1 * stepint;
else if (TREE_CODE (test) == NE_EXPR) else if (TREE_CODE (test) == NE_EXPR)
extra = -1 * stepint; extra = -1 * stepint;
else if (TREE_CODE (test) == GT_EXPR)
extra = -1 * stepint;
ubound = gcc_tree_to_linear_expression (depth, ubound = gcc_tree_to_linear_expression (depth,
TREE_OPERAND (test, 1), uboundvar,
outerinductionvars, outerinductionvars,
*invariants, extra); *invariants, extra);
if (!ubound) if (!ubound)
...@@ -1393,6 +1391,7 @@ static tree ...@@ -1393,6 +1391,7 @@ static tree
find_induction_var_from_exit_cond (struct loop *loop) find_induction_var_from_exit_cond (struct loop *loop)
{ {
tree expr = get_loop_exit_condition (loop); tree expr = get_loop_exit_condition (loop);
tree ivarop;
tree test; tree test;
if (expr == NULL_TREE) if (expr == NULL_TREE)
return NULL_TREE; return NULL_TREE;
...@@ -1401,9 +1400,28 @@ find_induction_var_from_exit_cond (struct loop *loop) ...@@ -1401,9 +1400,28 @@ find_induction_var_from_exit_cond (struct loop *loop)
test = TREE_OPERAND (expr, 0); test = TREE_OPERAND (expr, 0);
if (TREE_CODE_CLASS (TREE_CODE (test)) != '<') if (TREE_CODE_CLASS (TREE_CODE (test)) != '<')
return NULL_TREE; return NULL_TREE;
if (TREE_CODE (TREE_OPERAND (test, 0)) != SSA_NAME) /* This is a guess. We say that for a <,!=,<= b, a is the induction
variable.
For >, >=, we guess b is the induction variable.
If we are wrong, it'll fail the rest of the induction variable tests, and
everything will be fine anyway. */
switch (TREE_CODE (test))
{
case LT_EXPR:
case LE_EXPR:
case NE_EXPR:
ivarop = TREE_OPERAND (test, 0);
break;
case GT_EXPR:
case GE_EXPR:
ivarop = TREE_OPERAND (test, 1);
break;
default:
gcc_unreachable();
}
if (TREE_CODE (ivarop) != SSA_NAME)
return NULL_TREE; return NULL_TREE;
return TREE_OPERAND (test, 0); return ivarop;
} }
DEF_VEC_GC_P(lambda_loop); DEF_VEC_GC_P(lambda_loop);
...@@ -1693,7 +1711,7 @@ lle_to_gcc_expression (lambda_linear_expression lle, ...@@ -1693,7 +1711,7 @@ lle_to_gcc_expression (lambda_linear_expression lle,
name, build_int_cst (integer_type_node, name, build_int_cst (integer_type_node,
LLE_DENOMINATOR (lle)))); LLE_DENOMINATOR (lle))));
else else
abort (); gcc_unreachable();
/* name = {ceil, floor}(name/denominator) */ /* name = {ceil, floor}(name/denominator) */
name = make_ssa_name (resvar, stmt); name = make_ssa_name (resvar, stmt);
...@@ -1706,9 +1724,8 @@ lle_to_gcc_expression (lambda_linear_expression lle, ...@@ -1706,9 +1724,8 @@ lle_to_gcc_expression (lambda_linear_expression lle,
/* Again, out of laziness, we don't handle this case yet. It's not /* Again, out of laziness, we don't handle this case yet. It's not
hard, it just hasn't occurred. */ hard, it just hasn't occurred. */
if (VEC_length (tree, results) > 2) gcc_assert (VEC_length (tree, results) <= 2);
abort ();
/* We may need to wrap the results in a MAX_EXPR or MIN_EXPR. */ /* We may need to wrap the results in a MAX_EXPR or MIN_EXPR. */
if (VEC_length (tree, results) > 1) if (VEC_length (tree, results) > 1)
{ {
...@@ -1788,10 +1805,9 @@ lambda_loopnest_to_gcc_loopnest (struct loop *old_loopnest, ...@@ -1788,10 +1805,9 @@ lambda_loopnest_to_gcc_loopnest (struct loop *old_loopnest,
cases for now. */ cases for now. */
offset = LL_LINEAR_OFFSET (newloop); offset = LL_LINEAR_OFFSET (newloop);
if (LLE_DENOMINATOR (offset) != 1 gcc_assert (LLE_DENOMINATOR (offset) == 1 &&
|| !lambda_vector_zerop (LLE_COEFFICIENTS (offset), depth)) lambda_vector_zerop (LLE_COEFFICIENTS (offset), depth));
abort ();
/* Now build the new lower bounds, and insert the statements /* Now build the new lower bounds, and insert the statements
necessary to generate it on the loop preheader. */ necessary to generate it on the loop preheader. */
newlowerbound = lle_to_gcc_expression (LL_LOWER_BOUND (newloop), newlowerbound = lle_to_gcc_expression (LL_LOWER_BOUND (newloop),
...@@ -1929,8 +1945,8 @@ lambda_transform_legal_p (lambda_trans_matrix trans, ...@@ -1929,8 +1945,8 @@ lambda_transform_legal_p (lambda_trans_matrix trans,
struct data_dependence_relation *ddr; struct data_dependence_relation *ddr;
#if defined ENABLE_CHECKING #if defined ENABLE_CHECKING
if (LTM_COLSIZE (trans) != nb_loops || LTM_ROWSIZE (trans) != nb_loops) gcc_assert (LTM_COLSIZE (trans) == nb_loops
abort (); && LTM_ROWSIZE (trans) == nb_loops);
#endif #endif
/* When there is an unknown relation in the dependence_relations, we /* When there is an unknown relation in the dependence_relations, we
......
...@@ -86,6 +86,7 @@ DEFTIMEVAR (TV_LIM , "loop invariant motion") ...@@ -86,6 +86,7 @@ DEFTIMEVAR (TV_LIM , "loop invariant motion")
DEFTIMEVAR (TV_TREE_LOOP_IVCANON , "tree canonical iv creation") DEFTIMEVAR (TV_TREE_LOOP_IVCANON , "tree canonical iv creation")
DEFTIMEVAR (TV_COMPLETE_UNROLL , "complete unrolling") DEFTIMEVAR (TV_COMPLETE_UNROLL , "complete unrolling")
DEFTIMEVAR (TV_TREE_VECTORIZATION , "tree loop vectorization") DEFTIMEVAR (TV_TREE_VECTORIZATION , "tree loop vectorization")
DEFTIMEVAR (TV_TREE_LINEAR_TRANSFORM , "tree loop linear transforms")
DEFTIMEVAR (TV_TREE_LOOP_IVOPTS , "tree iv optimization") DEFTIMEVAR (TV_TREE_LOOP_IVOPTS , "tree iv optimization")
DEFTIMEVAR (TV_TREE_CH , "tree copy headers") DEFTIMEVAR (TV_TREE_CH , "tree copy headers")
DEFTIMEVAR (TV_TREE_SSA_TO_NORMAL , "tree SSA to normal") DEFTIMEVAR (TV_TREE_SSA_TO_NORMAL , "tree SSA to normal")
......
...@@ -719,6 +719,9 @@ void insert_edge_copies (tree stmt, basic_block bb); ...@@ -719,6 +719,9 @@ void insert_edge_copies (tree stmt, basic_block bb);
extern void build_ssa_operands (tree, stmt_ann_t, stmt_operands_p, extern void build_ssa_operands (tree, stmt_ann_t, stmt_operands_p,
stmt_operands_p); stmt_operands_p);
/* In tree-loop-linear.c */
extern void linear_transform_loops (struct loops *);
/* In gimplify.c */ /* In gimplify.c */
tree force_gimple_operand (tree, tree *, bool, tree); tree force_gimple_operand (tree, tree *, bool, tree);
......
/* Linear Loop transforms
Copyright (C) 2003, 2004 Free Software Foundation, Inc.
Contributed by Daniel Berlin <dberlin@dberlin.org>.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 2, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING. If not, write to the Free
Software Foundation, 59 Temple Place - Suite 330, Boston, MA
02111-1307, USA. */
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "tm.h"
#include "errors.h"
#include "ggc.h"
#include "tree.h"
#include "target.h"
#include "rtl.h"
#include "basic-block.h"
#include "diagnostic.h"
#include "tree-flow.h"
#include "tree-dump.h"
#include "timevar.h"
#include "cfgloop.h"
#include "expr.h"
#include "optabs.h"
#include "tree-chrec.h"
#include "tree-data-ref.h"
#include "tree-scalar-evolution.h"
#include "tree-pass.h"
#include "varray.h"
#include "lambda.h"
/* Linear loop transforms include any composition of interchange,
scaling, skewing, and reversal. They are used to change the
iteration order of loop nests in order to optimize data locality of
traversals, or remove dependences that prevent
parallelization/vectorization/etc.
TODO: Determine reuse vectors/matrix and use it to determine optimal
transform matrix for locality purposes.
TODO: Completion of partial transforms. */
/* Gather statistics for loop interchange. Initializes SUM the sum of
all the data dependence distances carried by loop LOOP_NUMBER.
NB_DEPS_NOT_CARRIED_BY_LOOP is initialized to the number of
dependence relations for which the loop LOOP_NUMBER is not carrying
any dependence. */
static void
gather_interchange_stats (varray_type dependence_relations,
unsigned int loop_number,
unsigned int *sum,
unsigned int *nb_deps_not_carried_by_loop)
{
unsigned int i;
*sum = 0;
*nb_deps_not_carried_by_loop = 0;
for (i = 0; i < VARRAY_ACTIVE_SIZE (dependence_relations); i++)
{
int dist;
struct data_dependence_relation *ddr =
(struct data_dependence_relation *)
VARRAY_GENERIC_PTR (dependence_relations, i);
if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
{
/* Some constants will need tweaking, but not something that should
be user-accessible. Thus, no --param. */
*sum += 100;
continue;
}
/* When we know that there is no dependence, we know that there
is no reuse of the data. */
if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
{
/* Ditto on the no --param here */
*sum += 1000;
continue;
}
dist = DDR_DIST_VECT (ddr)[loop_number];
if (dist == 0)
*nb_deps_not_carried_by_loop++;
else if (dist < 0)
*sum += -dist;
else
*sum += dist;
}
}
/* Apply to TRANS any loop interchange that minimize inner loop steps.
DEPTH is the depth of the loop nest, and DEPENDENCE_RELATIONS is an array
of dependence relations.
Returns the new transform matrix. The smaller the reuse vector
distances in the inner loops, the fewer the cache misses. */
static lambda_trans_matrix
try_interchange_loops (lambda_trans_matrix trans,
unsigned int depth,
varray_type dependence_relations)
{
unsigned int loop_i, loop_j;
unsigned int steps_i, steps_j;
unsigned int nb_deps_not_carried_by_i, nb_deps_not_carried_by_j;
struct data_dependence_relation *ddr;
/* When there is an unknown relation in the dependence_relations, we
know that it is no worth looking at this loop nest: give up. */
ddr = (struct data_dependence_relation *)
VARRAY_GENERIC_PTR (dependence_relations, 0);
if (ddr == NULL || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
return trans;
/* LOOP_I is always the outer loop. */
for (loop_j = 1; loop_j < depth; loop_j++)
for (loop_i = 0; loop_i < loop_j; loop_i++)
{
gather_interchange_stats (dependence_relations, loop_i, &steps_i,
&nb_deps_not_carried_by_i);
gather_interchange_stats (dependence_relations, loop_j, &steps_j,
&nb_deps_not_carried_by_j);
/* Heuristics for loop interchange profitability:
1. Inner loops should have smallest steps.
2. Inner loops should contain more dependence relations not
carried by the loop.
*/
if (steps_i < steps_j
|| nb_deps_not_carried_by_i > nb_deps_not_carried_by_j)
{
lambda_matrix_row_exchange (LTM_MATRIX (trans), loop_i, loop_j);
/* Validate the resulting matrix. When the transformation
is not valid, reverse to the previous matrix.
FIXME: In this case of transformation it could be
faster to verify the validity of the interchange
without applying the transform to the matrix. But for
the moment do it cleanly: this is just a prototype. */
if (!lambda_transform_legal_p (trans, depth, dependence_relations))
lambda_matrix_row_exchange (LTM_MATRIX (trans), loop_i, loop_j);
}
}
return trans;
}
/* Perform a set of linear transforms on LOOPS. */
void
linear_transform_loops (struct loops *loops)
{
unsigned int i;
for (i = 1; i < loops->num; i++)
{
unsigned int depth = 0;
varray_type datarefs;
varray_type dependence_relations;
struct loop *loop_nest = loops->parray[i];
struct loop *temp;
VEC (tree) *oldivs;
VEC (tree) *invariants;
lambda_loopnest before, after;
lambda_trans_matrix trans;
bool problem = false;
/* If it's not a loop nest, we don't want it.
We also don't handle sibling loops properly,
which are loops of the following form:
for (i = 0; i < 50; i++)
{
for (j = 0; j < 50; j++)
{
...
}
for (j = 0; j < 50; j++)
{
...
}
} */
if (!loop_nest->inner)
continue;
for (temp = loop_nest; temp; temp = temp->inner)
{
flow_loop_scan (temp, LOOP_ALL);
/* If we have a sibling loop or multiple exit edges, jump ship. */
if (temp->next || temp->num_exits != 1)
{
problem = true;
break;
}
depth ++;
}
if (problem)
continue;
/* Analyze data references and dependence relations using scev. */
VARRAY_GENERIC_PTR_INIT (datarefs, 10, "datarefs");
VARRAY_GENERIC_PTR_INIT (dependence_relations, 10,
"dependence_relations");
compute_data_dependences_for_loop (depth, loop_nest,
&datarefs, &dependence_relations);
if (dump_file && (dump_flags & TDF_DETAILS))
{
unsigned int j;
for (j = 0; j < VARRAY_ACTIVE_SIZE (dependence_relations); j++)
{
struct data_dependence_relation *ddr =
(struct data_dependence_relation *)
VARRAY_GENERIC_PTR (dependence_relations, j);
if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
{
fprintf (dump_file, "DISTANCE_V (");
print_lambda_vector (dump_file, DDR_DIST_VECT (ddr),
loops->num);
fprintf (dump_file, ")\n");
fprintf (dump_file, "DIRECTION_V (");
print_lambda_vector (dump_file, DDR_DIR_VECT (ddr),
loops->num);
fprintf (dump_file, ")\n");
}
}
fprintf (dump_file, "\n\n");
}
/* Build the transformation matrix. */
trans = lambda_trans_matrix_new (depth, depth);
lambda_matrix_id (LTM_MATRIX (trans), depth);
trans = try_interchange_loops (trans, depth, dependence_relations);
/* Check whether the transformation is legal. */
if (!lambda_transform_legal_p (trans, depth, dependence_relations))
{
if (dump_file)
fprintf (dump_file, "Can't transform loop, transform is illegal:\n");
continue;
}
before = gcc_loopnest_to_lambda_loopnest (loop_nest, &oldivs,
&invariants);
if (!before)
continue;
if (dump_file)
{
fprintf (dump_file, "Before:\n");
print_lambda_loopnest (dump_file, before, 'i');
}
after = lambda_loopnest_transform (before, trans);
if (dump_file)
{
fprintf (dump_file, "After:\n");
print_lambda_loopnest (dump_file, after, 'u');
}
lambda_loopnest_to_gcc_loopnest (loop_nest, oldivs, invariants,
after, trans);
oldivs = NULL;
invariants = NULL;
free_dependence_relations (dependence_relations);
free_data_refs (datarefs);
}
}
...@@ -397,6 +397,7 @@ init_tree_optimization_passes (void) ...@@ -397,6 +397,7 @@ init_tree_optimization_passes (void)
NEXT_PASS (pass_iv_canon); NEXT_PASS (pass_iv_canon);
NEXT_PASS (pass_if_conversion); NEXT_PASS (pass_if_conversion);
NEXT_PASS (pass_vectorize); NEXT_PASS (pass_vectorize);
NEXT_PASS (pass_linear_transform);
NEXT_PASS (pass_complete_unroll); NEXT_PASS (pass_complete_unroll);
NEXT_PASS (pass_iv_optimize); NEXT_PASS (pass_iv_optimize);
NEXT_PASS (pass_loop_done); NEXT_PASS (pass_loop_done);
......
...@@ -160,6 +160,6 @@ extern struct tree_opt_pass pass_rename_ssa_copies; ...@@ -160,6 +160,6 @@ extern struct tree_opt_pass pass_rename_ssa_copies;
extern struct tree_opt_pass pass_expand; extern struct tree_opt_pass pass_expand;
extern struct tree_opt_pass pass_rest_of_compilation; extern struct tree_opt_pass pass_rest_of_compilation;
extern struct tree_opt_pass pass_fre; extern struct tree_opt_pass pass_fre;
extern struct tree_opt_pass pass_linear_transform;
#endif /* GCC_TREE_PASS_H */ #endif /* GCC_TREE_PASS_H */
...@@ -194,6 +194,41 @@ struct tree_opt_pass pass_vectorize = ...@@ -194,6 +194,41 @@ struct tree_opt_pass pass_vectorize =
0 /* letter */ 0 /* letter */
}; };
/* Loop nest optimizations. */
static void
tree_linear_transform (void)
{
if (!current_loops)
return;
linear_transform_loops (current_loops);
}
static bool
gate_tree_linear_transform (void)
{
return flag_tree_loop_linear != 0;
}
struct tree_opt_pass pass_linear_transform =
{
"ltrans", /* name */
gate_tree_linear_transform, /* gate */
tree_linear_transform, /* execute */
NULL, /* sub */
NULL, /* next */
0, /* static_pass_number */
TV_TREE_LINEAR_TRANSFORM, /* tv_id */
PROP_cfg | PROP_ssa, /* properties_required */
0, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
TODO_dump_func, /* todo_flags_finish */
0 /* letter */
};
/* Canonical induction variable creation pass. */ /* Canonical induction variable creation pass. */
static void static void
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment