Commit 17684618 by Zdenek Dvorak Committed by Zdenek Dvorak

invoke.texi (-fprefetch-loop-arrays, [...]): Document.

	* doc/invoke.texi (-fprefetch-loop-arrays, -fprefetch-loop-arrays-rtl):
	Document.
	* tree-ssa-loop-niter.c (number_of_iterations_ne,
	number_of_iterations_lt, number_of_iterations_cond): Remember the shape
	of the ending condition.
	* tree-ssa-loop-manip.c: Include params.h.
	(build_if_stmt, can_unroll_loop_p, determine_exit_conditions,
	tree_unroll_loop): New functions.
	* tree-pass.h (pass_loop_prefetch): Declare.
	* loop.c (rest_of_handle_loop_optimize): Test for
	-fprefetch-loop-arrays-rtl.
	* tree-scalar-evolution.h (affine_iv): Moved to tree-flow.h.
	* timevar.def (TV_TREE_PREFETCH): New timevar.
	* tree-ssa-loop.c (tree_ssa_loop_prefetch, gate_tree_ssa_loop_prefetch,
	pass_loop_prefetch): New.
	* tree-cfgcleanup.c: Include tree-scalar-evolution.h.
	(cleanup_tree_cfg_loop): Call scev_reset.
	* common.opt (fprefetch-loop-arrays-rtl): Add.
	* tree-ssa-loop-prefetch.c: New file.
	* tree-outof-ssa.c (struct value_expr_d): Add expr_vars field.
	(new_temp_expr_table): Initialize expr_vars.
	(free_temp_expr_table): Cleanup expr_vars.
	(check_replaceable, find_replaceable_in_bb): Prevent accumulating
	expressions from being merged into one.
	* tree-flow.h (affine_iv): Moved from tree-scalar-evolution.h.
	(struct tree_niter_desc): Add control, bound and cmp fields.
	(tree_ssa_prefetch_arrays, can_unroll_loop_p, tree_unroll_loop):
	Declare.
	* Makefile.in (tree-ssa-loop-prefetch.o): Add.
	(tree-cfgcleanup.o): Add SCEV_H dependency.
	(tree-ssa-loop-manip.o): Add PARAMS_H dependency.
	* passes.c (init_optimization_passes): Add pass_loop_prefetch.

From-SVN: r110964
parent 0a4288d9
2006-02-14 Zdenek Dvorak <dvorakz@suse.cz>
* doc/invoke.texi (-fprefetch-loop-arrays, -fprefetch-loop-arrays-rtl):
Document.
* tree-ssa-loop-niter.c (number_of_iterations_ne,
number_of_iterations_lt, number_of_iterations_cond): Remember the shape
of the ending condition.
* tree-ssa-loop-manip.c: Include params.h.
(build_if_stmt, can_unroll_loop_p, determine_exit_conditions,
tree_unroll_loop): New functions.
* tree-pass.h (pass_loop_prefetch): Declare.
* loop.c (rest_of_handle_loop_optimize): Test for
-fprefetch-loop-arrays-rtl.
* tree-scalar-evolution.h (affine_iv): Moved to tree-flow.h.
* timevar.def (TV_TREE_PREFETCH): New timevar.
* tree-ssa-loop.c (tree_ssa_loop_prefetch, gate_tree_ssa_loop_prefetch,
pass_loop_prefetch): New.
* tree-cfgcleanup.c: Include tree-scalar-evolution.h.
(cleanup_tree_cfg_loop): Call scev_reset.
* common.opt (fprefetch-loop-arrays-rtl): Add.
* tree-ssa-loop-prefetch.c: New file.
* tree-outof-ssa.c (struct value_expr_d): Add expr_vars field.
(new_temp_expr_table): Initialize expr_vars.
(free_temp_expr_table): Cleanup expr_vars.
(check_replaceable, find_replaceable_in_bb): Prevent accumulating
expressions from being merged into one.
* tree-flow.h (affine_iv): Moved from tree-scalar-evolution.h.
(struct tree_niter_desc): Add control, bound and cmp fields.
(tree_ssa_prefetch_arrays, can_unroll_loop_p, tree_unroll_loop):
Declare.
* Makefile.in (tree-ssa-loop-prefetch.o): Add.
(tree-cfgcleanup.o): Add SCEV_H dependency.
(tree-ssa-loop-manip.o): Add PARAMS_H dependency.
* passes.c (init_optimization_passes): Add pass_loop_prefetch.
2006-02-14 Richard Guenther <rguenther@suse.de> 2006-02-14 Richard Guenther <rguenther@suse.de>
PR tree-optimization/26258 PR tree-optimization/26258
......
...@@ -963,7 +963,7 @@ OBJS-common = \ ...@@ -963,7 +963,7 @@ OBJS-common = \
tree-vect-generic.o tree-ssa-loop.o tree-ssa-loop-niter.o \ tree-vect-generic.o tree-ssa-loop.o tree-ssa-loop-niter.o \
tree-ssa-loop-manip.o tree-ssa-threadupdate.o tree-ssa-threadedge.o \ tree-ssa-loop-manip.o tree-ssa-threadupdate.o tree-ssa-threadedge.o \
tree-vectorizer.o tree-vect-analyze.o tree-vect-transform.o \ tree-vectorizer.o tree-vect-analyze.o tree-vect-transform.o \
tree-vect-patterns.o \ tree-vect-patterns.o tree-ssa-loop-prefetch.o \
tree-ssa-loop-ivcanon.o tree-ssa-propagate.o tree-ssa-address.o \ tree-ssa-loop-ivcanon.o tree-ssa-propagate.o tree-ssa-address.o \
tree-ssa-math-opts.o \ tree-ssa-math-opts.o \
tree-ssa-loop-ivopts.o tree-if-conv.o tree-ssa-loop-unswitch.o \ tree-ssa-loop-ivopts.o tree-if-conv.o tree-ssa-loop-unswitch.o \
...@@ -1975,6 +1975,12 @@ tree-ssa-loop-ch.o : tree-ssa-loop-ch.c $(TREE_FLOW_H) $(CONFIG_H) \ ...@@ -1975,6 +1975,12 @@ tree-ssa-loop-ch.o : tree-ssa-loop-ch.c $(TREE_FLOW_H) $(CONFIG_H) \
$(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) $(TREE_INLINE_H) \ $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) $(TREE_INLINE_H) \
output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \ output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
tree-pass.h $(FLAGS_H) $(BASIC_BLOCK_H) hard-reg-set.h tree-pass.h $(FLAGS_H) $(BASIC_BLOCK_H) hard-reg-set.h
tree-ssa-loop-prefetch.o: tree-ssa-loop-prefetch.c $(TREE_FLOW_H) $(CONFIG_H) \
$(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) $(EXPR_H) \
output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
tree-pass.h $(GGC_H) $(RECOG_H) insn-config.h $(HASHTAB_H) $(SCEV_H) \
$(CFGLOOP_H) $(PARAMS_H) langhooks.h $(BASIC_BLOCK_H) hard-reg-set.h \
tree-chrec.h toplev.h langhooks.h
tree-ssa-loop-ivopts.o : tree-ssa-loop-ivopts.c $(TREE_FLOW_H) $(CONFIG_H) \ tree-ssa-loop-ivopts.o : tree-ssa-loop-ivopts.c $(TREE_FLOW_H) $(CONFIG_H) \
$(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) $(EXPR_H) \ $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) $(EXPR_H) \
output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \ output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
...@@ -1984,7 +1990,8 @@ tree-ssa-loop-ivopts.o : tree-ssa-loop-ivopts.c $(TREE_FLOW_H) $(CONFIG_H) \ ...@@ -1984,7 +1990,8 @@ tree-ssa-loop-ivopts.o : tree-ssa-loop-ivopts.c $(TREE_FLOW_H) $(CONFIG_H) \
tree-ssa-loop-manip.o : tree-ssa-loop-manip.c $(TREE_FLOW_H) $(CONFIG_H) \ tree-ssa-loop-manip.o : tree-ssa-loop-manip.c $(TREE_FLOW_H) $(CONFIG_H) \
$(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) \ $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) \
output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \ output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
tree-pass.h $(CFGLAYOUT_H) $(SCEV_H) $(BASIC_BLOCK_H) hard-reg-set.h tree-pass.h $(CFGLAYOUT_H) $(SCEV_H) $(BASIC_BLOCK_H) hard-reg-set.h \
$(PARAMS_H)
tree-ssa-loop-im.o : tree-ssa-loop-im.c $(TREE_FLOW_H) $(CONFIG_H) \ tree-ssa-loop-im.o : tree-ssa-loop-im.c $(TREE_FLOW_H) $(CONFIG_H) \
$(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) domwalk.h \ $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) domwalk.h \
$(PARAMS_H) output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h \ $(PARAMS_H) output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h \
......
...@@ -659,7 +659,11 @@ Common Report Var(flag_pie,1) VarExists ...@@ -659,7 +659,11 @@ Common Report Var(flag_pie,1) VarExists
Generate position-independent code for executables if possible (small mode) Generate position-independent code for executables if possible (small mode)
fprefetch-loop-arrays fprefetch-loop-arrays
Common Report Var(flag_prefetch_loop_arrays) Common Report Var(flag_prefetch_loop_arrays,1)
Generate prefetch instructions, if available, for arrays in loops
fprefetch-loop-arrays-rtl
Common Report Var(flag_prefetch_loop_arrays,2)
Generate prefetch instructions, if available, for arrays in loops Generate prefetch instructions, if available, for arrays in loops
fprofile fprofile
......
...@@ -321,7 +321,7 @@ Objective-C and Objective-C++ Dialects}. ...@@ -321,7 +321,7 @@ Objective-C and Objective-C++ Dialects}.
-funsafe-math-optimizations -funsafe-loop-optimizations -ffinite-math-only @gol -funsafe-math-optimizations -funsafe-loop-optimizations -ffinite-math-only @gol
-fno-toplevel-reorder -fno-trapping-math -fno-zero-initialized-in-bss @gol -fno-toplevel-reorder -fno-trapping-math -fno-zero-initialized-in-bss @gol
-fomit-frame-pointer -foptimize-register-move @gol -fomit-frame-pointer -foptimize-register-move @gol
-foptimize-sibling-calls -fprefetch-loop-arrays @gol -foptimize-sibling-calls -fprefetch-loop-arrays -fprefetch-loop-arrays-rtl @gol
-fprofile-generate -fprofile-use @gol -fprofile-generate -fprofile-use @gol
-fregmove -frename-registers @gol -fregmove -frename-registers @gol
-freorder-blocks -freorder-blocks-and-partition -freorder-functions @gol -freorder-blocks -freorder-blocks-and-partition -freorder-functions @gol
...@@ -5171,7 +5171,9 @@ With this option, the compiler will create multiple copies of some ...@@ -5171,7 +5171,9 @@ With this option, the compiler will create multiple copies of some
local variables when unrolling a loop which can result in superior code. local variables when unrolling a loop which can result in superior code.
@item -fprefetch-loop-arrays @item -fprefetch-loop-arrays
@itemx -fprefetch-loop-arrays-rtl
@opindex fprefetch-loop-arrays @opindex fprefetch-loop-arrays
@opindex fprefetch-loop-arrays-rtl
If supported by the target machine, generate instructions to prefetch If supported by the target machine, generate instructions to prefetch
memory to improve the performance of loops that access large arrays. memory to improve the performance of loops that access large arrays.
...@@ -5709,7 +5711,9 @@ Move branches with loop invariant conditions out of the loop, with duplicates ...@@ -5709,7 +5711,9 @@ Move branches with loop invariant conditions out of the loop, with duplicates
of the loop on both branches (modified according to result of the condition). of the loop on both branches (modified according to result of the condition).
@item -fprefetch-loop-arrays @item -fprefetch-loop-arrays
@itemx -fprefetch-loop-arrays-rtl
@opindex fprefetch-loop-arrays @opindex fprefetch-loop-arrays
@opindex fprefetch-loop-arrays-rtl
If supported by the target machine, generate instructions to prefetch If supported by the target machine, generate instructions to prefetch
memory to improve the performance of loops that access large arrays. memory to improve the performance of loops that access large arrays.
......
...@@ -11780,7 +11780,7 @@ rest_of_handle_loop_optimize (void) ...@@ -11780,7 +11780,7 @@ rest_of_handle_loop_optimize (void)
free_bb_for_insn (); free_bb_for_insn ();
profile_status = PROFILE_ABSENT; profile_status = PROFILE_ABSENT;
do_prefetch = flag_prefetch_loop_arrays ? LOOP_PREFETCH : 0; do_prefetch = flag_prefetch_loop_arrays == 2 ? LOOP_PREFETCH : 0;
if (flag_rerun_loop_opt) if (flag_rerun_loop_opt)
{ {
......
...@@ -601,6 +601,7 @@ init_optimization_passes (void) ...@@ -601,6 +601,7 @@ init_optimization_passes (void)
vectorizer creates alias relations that are not supported by vectorizer creates alias relations that are not supported by
pass_may_alias. */ pass_may_alias. */
NEXT_PASS (pass_complete_unroll); NEXT_PASS (pass_complete_unroll);
NEXT_PASS (pass_loop_prefetch);
NEXT_PASS (pass_iv_optimize); NEXT_PASS (pass_iv_optimize);
NEXT_PASS (pass_tree_loop_done); NEXT_PASS (pass_tree_loop_done);
*p = NULL; *p = NULL;
......
...@@ -107,6 +107,7 @@ DEFTIMEVAR (TV_TREE_LOOP_UNSWITCH , "tree loop unswitching") ...@@ -107,6 +107,7 @@ DEFTIMEVAR (TV_TREE_LOOP_UNSWITCH , "tree loop unswitching")
DEFTIMEVAR (TV_COMPLETE_UNROLL , "complete unrolling") DEFTIMEVAR (TV_COMPLETE_UNROLL , "complete unrolling")
DEFTIMEVAR (TV_TREE_VECTORIZATION , "tree vectorization") DEFTIMEVAR (TV_TREE_VECTORIZATION , "tree vectorization")
DEFTIMEVAR (TV_TREE_LINEAR_TRANSFORM , "tree loop linear") DEFTIMEVAR (TV_TREE_LINEAR_TRANSFORM , "tree loop linear")
DEFTIMEVAR (TV_TREE_PREFETCH , "tree prefetching")
DEFTIMEVAR (TV_TREE_LOOP_IVOPTS , "tree iv optimization") DEFTIMEVAR (TV_TREE_LOOP_IVOPTS , "tree iv optimization")
DEFTIMEVAR (TV_TREE_LOOP_INIT , "tree loop init") DEFTIMEVAR (TV_TREE_LOOP_INIT , "tree loop init")
DEFTIMEVAR (TV_TREE_LOOP_FINI , "tree loop fini") DEFTIMEVAR (TV_TREE_LOOP_FINI , "tree loop fini")
......
...@@ -45,6 +45,7 @@ Boston, MA 02110-1301, USA. */ ...@@ -45,6 +45,7 @@ Boston, MA 02110-1301, USA. */
#include "cfglayout.h" #include "cfglayout.h"
#include "hashtab.h" #include "hashtab.h"
#include "tree-ssa-propagate.h" #include "tree-ssa-propagate.h"
#include "tree-scalar-evolution.h"
/* Remove any fallthru edge from EV. Return true if an edge was removed. */ /* Remove any fallthru edge from EV. Return true if an edge was removed. */
...@@ -559,23 +560,26 @@ cleanup_tree_cfg (void) ...@@ -559,23 +560,26 @@ cleanup_tree_cfg (void)
void void
cleanup_tree_cfg_loop (void) cleanup_tree_cfg_loop (void)
{ {
bitmap changed_bbs = BITMAP_ALLOC (NULL); bool changed = cleanup_tree_cfg ();
cleanup_tree_cfg (); if (changed)
{
fix_loop_structure (current_loops, changed_bbs); bitmap changed_bbs = BITMAP_ALLOC (NULL);
calculate_dominance_info (CDI_DOMINATORS); fix_loop_structure (current_loops, changed_bbs);
calculate_dominance_info (CDI_DOMINATORS);
/* This usually does nothing. But sometimes parts of cfg that originally /* This usually does nothing. But sometimes parts of cfg that originally
were inside a loop get out of it due to edge removal (since they were inside a loop get out of it due to edge removal (since they
become unreachable by back edges from latch). */ become unreachable by back edges from latch). */
rewrite_into_loop_closed_ssa (changed_bbs, TODO_update_ssa); rewrite_into_loop_closed_ssa (changed_bbs, TODO_update_ssa);
BITMAP_FREE (changed_bbs); BITMAP_FREE (changed_bbs);
#ifdef ENABLE_CHECKING #ifdef ENABLE_CHECKING
verify_loop_structure (current_loops); verify_loop_structure (current_loops);
#endif #endif
scev_reset ();
}
} }
/* Merge the PHI nodes at BB into those at BB's sole successor. */ /* Merge the PHI nodes at BB into those at BB's sole successor. */
......
...@@ -667,6 +667,17 @@ extern void replace_exp (use_operand_p, tree); ...@@ -667,6 +667,17 @@ extern void replace_exp (use_operand_p, tree);
extern bool may_propagate_copy (tree, tree); extern bool may_propagate_copy (tree, tree);
extern bool may_propagate_copy_into_asm (tree); extern bool may_propagate_copy_into_asm (tree);
/* Affine iv. */
typedef struct
{
/* Iv = BASE + STEP * i. */
tree base, step;
/* True if this iv does not overflow. */
bool no_overflow;
} affine_iv;
/* Description of number of iterations of a loop. All the expressions inside /* Description of number of iterations of a loop. All the expressions inside
the structure can be evaluated at the end of the loop's preheader the structure can be evaluated at the end of the loop's preheader
(and due to ssa form, also anywhere inside the body of the loop). */ (and due to ssa form, also anywhere inside the body of the loop). */
...@@ -697,6 +708,15 @@ struct tree_niter_desc ...@@ -697,6 +708,15 @@ struct tree_niter_desc
MAX_SIGNED_INT. However if the (n <= 0) assumption MAX_SIGNED_INT. However if the (n <= 0) assumption
is eliminated (by looking at the guard on entry of is eliminated (by looking at the guard on entry of
the loop), then the information would be lost. */ the loop), then the information would be lost. */
/* The simplified shape of the exit condition. The loop exits if
CONTROL CMP BOUND is false, where CMP is one of NE_EXPR,
LT_EXPR, or GT_EXPR, and step of CONTROL is positive if CMP is
LE_EXPR and negative if CMP is GE_EXPR. This information is used
by loop unrolling. */
affine_iv control;
tree bound;
enum tree_code cmp;
}; };
/* In tree-vectorizer.c */ /* In tree-vectorizer.c */
...@@ -711,6 +731,7 @@ void tree_ssa_lim (struct loops *); ...@@ -711,6 +731,7 @@ void tree_ssa_lim (struct loops *);
void tree_ssa_unswitch_loops (struct loops *); void tree_ssa_unswitch_loops (struct loops *);
void canonicalize_induction_variables (struct loops *); void canonicalize_induction_variables (struct loops *);
void tree_unroll_loops_completely (struct loops *, bool); void tree_unroll_loops_completely (struct loops *, bool);
void tree_ssa_prefetch_arrays (struct loops *);
void remove_empty_loops (struct loops *); void remove_empty_loops (struct loops *);
void tree_ssa_iv_optimize (struct loops *); void tree_ssa_iv_optimize (struct loops *);
...@@ -748,6 +769,10 @@ struct loop *tree_ssa_loop_version (struct loops *, struct loop *, tree, ...@@ -748,6 +769,10 @@ struct loop *tree_ssa_loop_version (struct loops *, struct loop *, tree,
tree expand_simple_operations (tree); tree expand_simple_operations (tree);
void substitute_in_loop_info (struct loop *, tree, tree); void substitute_in_loop_info (struct loop *, tree, tree);
edge single_dom_exit (struct loop *); edge single_dom_exit (struct loop *);
bool can_unroll_loop_p (struct loop *loop, unsigned factor,
struct tree_niter_desc *niter);
void tree_unroll_loop (struct loops *, struct loop *, unsigned,
edge, struct tree_niter_desc *);
/* In tree-ssa-threadedge.c */ /* In tree-ssa-threadedge.c */
extern bool potentially_threadable_block (basic_block); extern bool potentially_threadable_block (basic_block);
......
...@@ -1299,7 +1299,8 @@ typedef struct value_expr_d ...@@ -1299,7 +1299,8 @@ typedef struct value_expr_d
typedef struct temp_expr_table_d typedef struct temp_expr_table_d
{ {
var_map map; var_map map;
void **version_info; void **version_info;
bitmap *expr_vars;
value_expr_p *partition_dep_list; value_expr_p *partition_dep_list;
bitmap replaceable; bitmap replaceable;
bool saw_replaceable; bool saw_replaceable;
...@@ -1344,6 +1345,7 @@ new_temp_expr_table (var_map map) ...@@ -1344,6 +1345,7 @@ new_temp_expr_table (var_map map)
t->map = map; t->map = map;
t->version_info = XCNEWVEC (void *, num_ssa_names + 1); t->version_info = XCNEWVEC (void *, num_ssa_names + 1);
t->expr_vars = XCNEWVEC (bitmap, num_ssa_names + 1);
t->partition_dep_list = XCNEWVEC (value_expr_p, t->partition_dep_list = XCNEWVEC (value_expr_p,
num_var_partitions (map) + 1); num_var_partitions (map) + 1);
...@@ -1367,6 +1369,7 @@ free_temp_expr_table (temp_expr_table_p t) ...@@ -1367,6 +1369,7 @@ free_temp_expr_table (temp_expr_table_p t)
{ {
value_expr_p p; value_expr_p p;
tree *ret = NULL; tree *ret = NULL;
unsigned i;
#ifdef ENABLE_CHECKING #ifdef ENABLE_CHECKING
unsigned x; unsigned x;
...@@ -1383,6 +1386,11 @@ free_temp_expr_table (temp_expr_table_p t) ...@@ -1383,6 +1386,11 @@ free_temp_expr_table (temp_expr_table_p t)
BITMAP_FREE (t->partition_in_use); BITMAP_FREE (t->partition_in_use);
BITMAP_FREE (t->replaceable); BITMAP_FREE (t->replaceable);
for (i = 0; i <= num_ssa_names; i++)
if (t->expr_vars[i])
BITMAP_FREE (t->expr_vars[i]);
free (t->expr_vars);
free (t->partition_dep_list); free (t->partition_dep_list);
if (t->saw_replaceable) if (t->saw_replaceable)
ret = (tree *)t->version_info; ret = (tree *)t->version_info;
...@@ -1545,11 +1553,12 @@ add_dependance (temp_expr_table_p tab, int version, tree var) ...@@ -1545,11 +1553,12 @@ add_dependance (temp_expr_table_p tab, int version, tree var)
static bool static bool
check_replaceable (temp_expr_table_p tab, tree stmt) check_replaceable (temp_expr_table_p tab, tree stmt)
{ {
tree var, def; tree var, def, basevar;
int version; int version;
var_map map = tab->map; var_map map = tab->map;
ssa_op_iter iter; ssa_op_iter iter;
tree call_expr; tree call_expr;
bitmap def_vars = BITMAP_ALLOC (NULL), use_vars;
if (TREE_CODE (stmt) != MODIFY_EXPR) if (TREE_CODE (stmt) != MODIFY_EXPR)
return false; return false;
...@@ -1580,12 +1589,19 @@ check_replaceable (temp_expr_table_p tab, tree stmt) ...@@ -1580,12 +1589,19 @@ check_replaceable (temp_expr_table_p tab, tree stmt)
} }
version = SSA_NAME_VERSION (def); version = SSA_NAME_VERSION (def);
basevar = SSA_NAME_VAR (def);
bitmap_set_bit (def_vars, DECL_UID (basevar));
/* Add this expression to the dependency list for each use partition. */ /* Add this expression to the dependency list for each use partition. */
FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_USE) FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_USE)
{ {
add_dependance (tab, version, var); add_dependance (tab, version, var);
use_vars = tab->expr_vars[SSA_NAME_VERSION (var)];
if (use_vars)
bitmap_ior_into (def_vars, use_vars);
} }
tab->expr_vars[version] = def_vars;
/* If there are VUSES, add a dependence on virtual defs. */ /* If there are VUSES, add a dependence on virtual defs. */
if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VUSE)) if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VUSE))
...@@ -1704,7 +1720,7 @@ static void ...@@ -1704,7 +1720,7 @@ static void
find_replaceable_in_bb (temp_expr_table_p tab, basic_block bb) find_replaceable_in_bb (temp_expr_table_p tab, basic_block bb)
{ {
block_stmt_iterator bsi; block_stmt_iterator bsi;
tree stmt, def; tree stmt, def, use;
stmt_ann_t ann; stmt_ann_t ann;
int partition; int partition;
var_map map = tab->map; var_map map = tab->map;
...@@ -1717,30 +1733,34 @@ find_replaceable_in_bb (temp_expr_table_p tab, basic_block bb) ...@@ -1717,30 +1733,34 @@ find_replaceable_in_bb (temp_expr_table_p tab, basic_block bb)
ann = stmt_ann (stmt); ann = stmt_ann (stmt);
/* Determine if this stmt finishes an existing expression. */ /* Determine if this stmt finishes an existing expression. */
FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_USE) FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
{ {
if (tab->version_info[SSA_NAME_VERSION (def)]) unsigned ver = SSA_NAME_VERSION (use);
if (tab->version_info[ver])
{ {
bool same_root_var = false; bool same_root_var = false;
tree def2;
ssa_op_iter iter2; ssa_op_iter iter2;
bitmap vars = tab->expr_vars[ver];
/* See if the root variables are the same. If they are, we /* See if the root variables are the same. If they are, we
do not want to do the replacement to avoid problems with do not want to do the replacement to avoid problems with
code size, see PR tree-optimization/17549. */ code size, see PR tree-optimization/17549. */
FOR_EACH_SSA_TREE_OPERAND (def2, stmt, iter2, SSA_OP_DEF) FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter2, SSA_OP_DEF)
if (SSA_NAME_VAR (def) == SSA_NAME_VAR (def2)) {
{ if (bitmap_bit_p (vars, DECL_UID (SSA_NAME_VAR (def))))
same_root_var = true; {
break; same_root_var = true;
} break;
}
}
/* Mark expression as replaceable unless stmt is volatile /* Mark expression as replaceable unless stmt is volatile
or DEF sets the same root variable as STMT. */ or DEF sets the same root variable as STMT. */
if (!ann->has_volatile_ops && !same_root_var) if (!ann->has_volatile_ops && !same_root_var)
mark_replaceable (tab, def); mark_replaceable (tab, use);
else else
finish_expr (tab, SSA_NAME_VERSION (def), false); finish_expr (tab, ver, false);
} }
} }
......
...@@ -247,6 +247,7 @@ extern struct tree_opt_pass pass_record_bounds; ...@@ -247,6 +247,7 @@ extern struct tree_opt_pass pass_record_bounds;
extern struct tree_opt_pass pass_if_conversion; extern struct tree_opt_pass pass_if_conversion;
extern struct tree_opt_pass pass_vectorize; extern struct tree_opt_pass pass_vectorize;
extern struct tree_opt_pass pass_complete_unroll; extern struct tree_opt_pass pass_complete_unroll;
extern struct tree_opt_pass pass_loop_prefetch;
extern struct tree_opt_pass pass_iv_optimize; extern struct tree_opt_pass pass_iv_optimize;
extern struct tree_opt_pass pass_tree_loop_done; extern struct tree_opt_pass pass_tree_loop_done;
extern struct tree_opt_pass pass_ch; extern struct tree_opt_pass pass_ch;
......
...@@ -34,17 +34,6 @@ extern void gather_stats_on_scev_database (void); ...@@ -34,17 +34,6 @@ extern void gather_stats_on_scev_database (void);
extern void scev_analysis (void); extern void scev_analysis (void);
void scev_const_prop (void); void scev_const_prop (void);
/* Affine iv. */
typedef struct
{
/* Iv = BASE + STEP * i. */
tree base, step;
/* True if this iv does not overflow. */
bool no_overflow;
} affine_iv;
extern bool simple_iv (struct loop *, tree, tree, affine_iv *, bool); extern bool simple_iv (struct loop *, tree, tree, affine_iv *, bool);
#endif /* GCC_TREE_SCALAR_EVOLUTION_H */ #endif /* GCC_TREE_SCALAR_EVOLUTION_H */
...@@ -140,6 +140,10 @@ number_of_iterations_ne (tree type, affine_iv *iv, tree final, ...@@ -140,6 +140,10 @@ number_of_iterations_ne (tree type, affine_iv *iv, tree final,
tree niter_type = unsigned_type_for (type); tree niter_type = unsigned_type_for (type);
tree s, c, d, bits, assumption, tmp, bound; tree s, c, d, bits, assumption, tmp, bound;
niter->control = *iv;
niter->bound = final;
niter->cmp = NE_EXPR;
/* Rearrange the terms so that we get inequality s * i <> c, with s /* Rearrange the terms so that we get inequality s * i <> c, with s
positive. Also cast everything to the unsigned type. */ positive. Also cast everything to the unsigned type. */
if (tree_int_cst_sign_bit (iv->step)) if (tree_int_cst_sign_bit (iv->step))
...@@ -410,6 +414,19 @@ number_of_iterations_lt (tree type, affine_iv *iv0, affine_iv *iv1, ...@@ -410,6 +414,19 @@ number_of_iterations_lt (tree type, affine_iv *iv0, affine_iv *iv1,
tree niter_type = unsigned_type_for (type); tree niter_type = unsigned_type_for (type);
tree delta, step, s; tree delta, step, s;
if (nonzero_p (iv0->step))
{
niter->control = *iv0;
niter->cmp = LT_EXPR;
niter->bound = iv1->base;
}
else
{
niter->control = *iv1;
niter->cmp = GT_EXPR;
niter->bound = iv0->base;
}
delta = fold_build2 (MINUS_EXPR, niter_type, delta = fold_build2 (MINUS_EXPR, niter_type,
fold_convert (niter_type, iv1->base), fold_convert (niter_type, iv1->base),
fold_convert (niter_type, iv0->base)); fold_convert (niter_type, iv0->base));
...@@ -543,6 +560,9 @@ number_of_iterations_cond (tree type, affine_iv *iv0, enum tree_code code, ...@@ -543,6 +560,9 @@ number_of_iterations_cond (tree type, affine_iv *iv0, enum tree_code code,
niter->niter = NULL_TREE; niter->niter = NULL_TREE;
niter->additional_info = boolean_true_node; niter->additional_info = boolean_true_node;
niter->bound = NULL_TREE;
niter->cmp = ERROR_MARK;
/* Make < comparison from > ones, and for NE_EXPR comparisons, ensure that /* Make < comparison from > ones, and for NE_EXPR comparisons, ensure that
the control variable is on lhs. */ the control variable is on lhs. */
if (code == GE_EXPR || code == GT_EXPR if (code == GE_EXPR || code == GT_EXPR
......
...@@ -401,6 +401,40 @@ struct tree_opt_pass pass_complete_unroll = ...@@ -401,6 +401,40 @@ struct tree_opt_pass pass_complete_unroll =
0 /* letter */ 0 /* letter */
}; };
/* Prefetching. */
static void
tree_ssa_loop_prefetch (void)
{
if (!current_loops)
return;
tree_ssa_prefetch_arrays (current_loops);
}
static bool
gate_tree_ssa_loop_prefetch (void)
{
return flag_prefetch_loop_arrays == 1;
}
struct tree_opt_pass pass_loop_prefetch =
{
"prefetch", /* name */
gate_tree_ssa_loop_prefetch, /* gate */
tree_ssa_loop_prefetch, /* execute */
NULL, /* sub */
NULL, /* next */
0, /* static_pass_number */
TV_TREE_PREFETCH, /* tv_id */
PROP_cfg | PROP_ssa, /* properties_required */
0, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
TODO_dump_func | TODO_verify_loops, /* todo_flags_finish */
0 /* letter */
};
/* Induction variable optimizations. */ /* Induction variable optimizations. */
static void static void
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment