Commit 17684618 by Zdenek Dvorak Committed by Zdenek Dvorak

invoke.texi (-fprefetch-loop-arrays, [...]): Document.

	* doc/invoke.texi (-fprefetch-loop-arrays, -fprefetch-loop-arrays-rtl):
	Document.
	* tree-ssa-loop-niter.c (number_of_iterations_ne,
	number_of_iterations_lt, number_of_iterations_cond): Remember the shape
	of the ending condition.
	* tree-ssa-loop-manip.c: Include params.h.
	(build_if_stmt, can_unroll_loop_p, determine_exit_conditions,
	tree_unroll_loop): New functions.
	* tree-pass.h (pass_loop_prefetch): Declare.
	* loop.c (rest_of_handle_loop_optimize): Test for
	-fprefetch-loop-arrays-rtl.
	* tree-scalar-evolution.h (affine_iv): Moved to tree-flow.h.
	* timevar.def (TV_TREE_PREFETCH): New timevar.
	* tree-ssa-loop.c (tree_ssa_loop_prefetch, gate_tree_ssa_loop_prefetch,
	pass_loop_prefetch): New.
	* tree-cfgcleanup.c: Include tree-scalar-evolution.h.
	(cleanup_tree_cfg_loop): Call scev_reset.
	* common.opt (fprefetch-loop-arrays-rtl): Add.
	* tree-ssa-loop-prefetch.c: New file.
	* tree-outof-ssa.c (struct value_expr_d): Add expr_vars field.
	(new_temp_expr_table): Initialize expr_vars.
	(free_temp_expr_table): Cleanup expr_vars.
	(check_replaceable, find_replaceable_in_bb): Prevent accumulating
	expressions from being merged into one.
	* tree-flow.h (affine_iv): Moved from tree-scalar-evolution.h.
	(struct tree_niter_desc): Add control, bound and cmp fields.
	(tree_ssa_prefetch_arrays, can_unroll_loop_p, tree_unroll_loop):
	Declare.
	* Makefile.in (tree-ssa-loop-prefetch.o): Add.
	(tree-cfgcleanup.o): Add SCEV_H dependency.
	(tree-ssa-loop-manip.o): Add PARAMS_H dependency.
	* passes.c (init_optimization_passes): Add pass_loop_prefetch.

From-SVN: r110964
parent 0a4288d9
2006-02-14 Zdenek Dvorak <dvorakz@suse.cz>
* doc/invoke.texi (-fprefetch-loop-arrays, -fprefetch-loop-arrays-rtl):
Document.
* tree-ssa-loop-niter.c (number_of_iterations_ne,
number_of_iterations_lt, number_of_iterations_cond): Remember the shape
of the ending condition.
* tree-ssa-loop-manip.c: Include params.h.
(build_if_stmt, can_unroll_loop_p, determine_exit_conditions,
tree_unroll_loop): New functions.
* tree-pass.h (pass_loop_prefetch): Declare.
* loop.c (rest_of_handle_loop_optimize): Test for
-fprefetch-loop-arrays-rtl.
* tree-scalar-evolution.h (affine_iv): Moved to tree-flow.h.
* timevar.def (TV_TREE_PREFETCH): New timevar.
* tree-ssa-loop.c (tree_ssa_loop_prefetch, gate_tree_ssa_loop_prefetch,
pass_loop_prefetch): New.
* tree-cfgcleanup.c: Include tree-scalar-evolution.h.
(cleanup_tree_cfg_loop): Call scev_reset.
* common.opt (fprefetch-loop-arrays-rtl): Add.
* tree-ssa-loop-prefetch.c: New file.
* tree-outof-ssa.c (struct value_expr_d): Add expr_vars field.
(new_temp_expr_table): Initialize expr_vars.
(free_temp_expr_table): Cleanup expr_vars.
(check_replaceable, find_replaceable_in_bb): Prevent accumulating
expressions from being merged into one.
* tree-flow.h (affine_iv): Moved from tree-scalar-evolution.h.
(struct tree_niter_desc): Add control, bound and cmp fields.
(tree_ssa_prefetch_arrays, can_unroll_loop_p, tree_unroll_loop):
Declare.
* Makefile.in (tree-ssa-loop-prefetch.o): Add.
(tree-cfgcleanup.o): Add SCEV_H dependency.
(tree-ssa-loop-manip.o): Add PARAMS_H dependency.
* passes.c (init_optimization_passes): Add pass_loop_prefetch.
2006-02-14 Richard Guenther <rguenther@suse.de> 2006-02-14 Richard Guenther <rguenther@suse.de>
PR tree-optimization/26258 PR tree-optimization/26258
......
...@@ -963,7 +963,7 @@ OBJS-common = \ ...@@ -963,7 +963,7 @@ OBJS-common = \
tree-vect-generic.o tree-ssa-loop.o tree-ssa-loop-niter.o \ tree-vect-generic.o tree-ssa-loop.o tree-ssa-loop-niter.o \
tree-ssa-loop-manip.o tree-ssa-threadupdate.o tree-ssa-threadedge.o \ tree-ssa-loop-manip.o tree-ssa-threadupdate.o tree-ssa-threadedge.o \
tree-vectorizer.o tree-vect-analyze.o tree-vect-transform.o \ tree-vectorizer.o tree-vect-analyze.o tree-vect-transform.o \
tree-vect-patterns.o \ tree-vect-patterns.o tree-ssa-loop-prefetch.o \
tree-ssa-loop-ivcanon.o tree-ssa-propagate.o tree-ssa-address.o \ tree-ssa-loop-ivcanon.o tree-ssa-propagate.o tree-ssa-address.o \
tree-ssa-math-opts.o \ tree-ssa-math-opts.o \
tree-ssa-loop-ivopts.o tree-if-conv.o tree-ssa-loop-unswitch.o \ tree-ssa-loop-ivopts.o tree-if-conv.o tree-ssa-loop-unswitch.o \
...@@ -1975,6 +1975,12 @@ tree-ssa-loop-ch.o : tree-ssa-loop-ch.c $(TREE_FLOW_H) $(CONFIG_H) \ ...@@ -1975,6 +1975,12 @@ tree-ssa-loop-ch.o : tree-ssa-loop-ch.c $(TREE_FLOW_H) $(CONFIG_H) \
$(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) $(TREE_INLINE_H) \ $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) $(TREE_INLINE_H) \
output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \ output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
tree-pass.h $(FLAGS_H) $(BASIC_BLOCK_H) hard-reg-set.h tree-pass.h $(FLAGS_H) $(BASIC_BLOCK_H) hard-reg-set.h
tree-ssa-loop-prefetch.o: tree-ssa-loop-prefetch.c $(TREE_FLOW_H) $(CONFIG_H) \
$(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) $(EXPR_H) \
output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
tree-pass.h $(GGC_H) $(RECOG_H) insn-config.h $(HASHTAB_H) $(SCEV_H) \
$(CFGLOOP_H) $(PARAMS_H) langhooks.h $(BASIC_BLOCK_H) hard-reg-set.h \
tree-chrec.h toplev.h langhooks.h
tree-ssa-loop-ivopts.o : tree-ssa-loop-ivopts.c $(TREE_FLOW_H) $(CONFIG_H) \ tree-ssa-loop-ivopts.o : tree-ssa-loop-ivopts.c $(TREE_FLOW_H) $(CONFIG_H) \
$(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) $(EXPR_H) \ $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) $(EXPR_H) \
output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \ output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
...@@ -1984,7 +1990,8 @@ tree-ssa-loop-ivopts.o : tree-ssa-loop-ivopts.c $(TREE_FLOW_H) $(CONFIG_H) \ ...@@ -1984,7 +1990,8 @@ tree-ssa-loop-ivopts.o : tree-ssa-loop-ivopts.c $(TREE_FLOW_H) $(CONFIG_H) \
tree-ssa-loop-manip.o : tree-ssa-loop-manip.c $(TREE_FLOW_H) $(CONFIG_H) \ tree-ssa-loop-manip.o : tree-ssa-loop-manip.c $(TREE_FLOW_H) $(CONFIG_H) \
$(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) \ $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) \
output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \ output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
tree-pass.h $(CFGLAYOUT_H) $(SCEV_H) $(BASIC_BLOCK_H) hard-reg-set.h tree-pass.h $(CFGLAYOUT_H) $(SCEV_H) $(BASIC_BLOCK_H) hard-reg-set.h \
$(PARAMS_H)
tree-ssa-loop-im.o : tree-ssa-loop-im.c $(TREE_FLOW_H) $(CONFIG_H) \ tree-ssa-loop-im.o : tree-ssa-loop-im.c $(TREE_FLOW_H) $(CONFIG_H) \
$(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) domwalk.h \ $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) domwalk.h \
$(PARAMS_H) output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h \ $(PARAMS_H) output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h \
......
...@@ -659,7 +659,11 @@ Common Report Var(flag_pie,1) VarExists ...@@ -659,7 +659,11 @@ Common Report Var(flag_pie,1) VarExists
Generate position-independent code for executables if possible (small mode) Generate position-independent code for executables if possible (small mode)
fprefetch-loop-arrays fprefetch-loop-arrays
Common Report Var(flag_prefetch_loop_arrays) Common Report Var(flag_prefetch_loop_arrays,1)
Generate prefetch instructions, if available, for arrays in loops
fprefetch-loop-arrays-rtl
Common Report Var(flag_prefetch_loop_arrays,2)
Generate prefetch instructions, if available, for arrays in loops Generate prefetch instructions, if available, for arrays in loops
fprofile fprofile
......
...@@ -321,7 +321,7 @@ Objective-C and Objective-C++ Dialects}. ...@@ -321,7 +321,7 @@ Objective-C and Objective-C++ Dialects}.
-funsafe-math-optimizations -funsafe-loop-optimizations -ffinite-math-only @gol -funsafe-math-optimizations -funsafe-loop-optimizations -ffinite-math-only @gol
-fno-toplevel-reorder -fno-trapping-math -fno-zero-initialized-in-bss @gol -fno-toplevel-reorder -fno-trapping-math -fno-zero-initialized-in-bss @gol
-fomit-frame-pointer -foptimize-register-move @gol -fomit-frame-pointer -foptimize-register-move @gol
-foptimize-sibling-calls -fprefetch-loop-arrays @gol -foptimize-sibling-calls -fprefetch-loop-arrays -fprefetch-loop-arrays-rtl @gol
-fprofile-generate -fprofile-use @gol -fprofile-generate -fprofile-use @gol
-fregmove -frename-registers @gol -fregmove -frename-registers @gol
-freorder-blocks -freorder-blocks-and-partition -freorder-functions @gol -freorder-blocks -freorder-blocks-and-partition -freorder-functions @gol
...@@ -5171,7 +5171,9 @@ With this option, the compiler will create multiple copies of some ...@@ -5171,7 +5171,9 @@ With this option, the compiler will create multiple copies of some
local variables when unrolling a loop which can result in superior code. local variables when unrolling a loop which can result in superior code.
@item -fprefetch-loop-arrays @item -fprefetch-loop-arrays
@itemx -fprefetch-loop-arrays-rtl
@opindex fprefetch-loop-arrays @opindex fprefetch-loop-arrays
@opindex fprefetch-loop-arrays-rtl
If supported by the target machine, generate instructions to prefetch If supported by the target machine, generate instructions to prefetch
memory to improve the performance of loops that access large arrays. memory to improve the performance of loops that access large arrays.
...@@ -5709,7 +5711,9 @@ Move branches with loop invariant conditions out of the loop, with duplicates ...@@ -5709,7 +5711,9 @@ Move branches with loop invariant conditions out of the loop, with duplicates
of the loop on both branches (modified according to result of the condition). of the loop on both branches (modified according to result of the condition).
@item -fprefetch-loop-arrays @item -fprefetch-loop-arrays
@itemx -fprefetch-loop-arrays-rtl
@opindex fprefetch-loop-arrays @opindex fprefetch-loop-arrays
@opindex fprefetch-loop-arrays-rtl
If supported by the target machine, generate instructions to prefetch If supported by the target machine, generate instructions to prefetch
memory to improve the performance of loops that access large arrays. memory to improve the performance of loops that access large arrays.
......
...@@ -11780,7 +11780,7 @@ rest_of_handle_loop_optimize (void) ...@@ -11780,7 +11780,7 @@ rest_of_handle_loop_optimize (void)
free_bb_for_insn (); free_bb_for_insn ();
profile_status = PROFILE_ABSENT; profile_status = PROFILE_ABSENT;
do_prefetch = flag_prefetch_loop_arrays ? LOOP_PREFETCH : 0; do_prefetch = flag_prefetch_loop_arrays == 2 ? LOOP_PREFETCH : 0;
if (flag_rerun_loop_opt) if (flag_rerun_loop_opt)
{ {
......
...@@ -601,6 +601,7 @@ init_optimization_passes (void) ...@@ -601,6 +601,7 @@ init_optimization_passes (void)
vectorizer creates alias relations that are not supported by vectorizer creates alias relations that are not supported by
pass_may_alias. */ pass_may_alias. */
NEXT_PASS (pass_complete_unroll); NEXT_PASS (pass_complete_unroll);
NEXT_PASS (pass_loop_prefetch);
NEXT_PASS (pass_iv_optimize); NEXT_PASS (pass_iv_optimize);
NEXT_PASS (pass_tree_loop_done); NEXT_PASS (pass_tree_loop_done);
*p = NULL; *p = NULL;
......
...@@ -107,6 +107,7 @@ DEFTIMEVAR (TV_TREE_LOOP_UNSWITCH , "tree loop unswitching") ...@@ -107,6 +107,7 @@ DEFTIMEVAR (TV_TREE_LOOP_UNSWITCH , "tree loop unswitching")
DEFTIMEVAR (TV_COMPLETE_UNROLL , "complete unrolling") DEFTIMEVAR (TV_COMPLETE_UNROLL , "complete unrolling")
DEFTIMEVAR (TV_TREE_VECTORIZATION , "tree vectorization") DEFTIMEVAR (TV_TREE_VECTORIZATION , "tree vectorization")
DEFTIMEVAR (TV_TREE_LINEAR_TRANSFORM , "tree loop linear") DEFTIMEVAR (TV_TREE_LINEAR_TRANSFORM , "tree loop linear")
DEFTIMEVAR (TV_TREE_PREFETCH , "tree prefetching")
DEFTIMEVAR (TV_TREE_LOOP_IVOPTS , "tree iv optimization") DEFTIMEVAR (TV_TREE_LOOP_IVOPTS , "tree iv optimization")
DEFTIMEVAR (TV_TREE_LOOP_INIT , "tree loop init") DEFTIMEVAR (TV_TREE_LOOP_INIT , "tree loop init")
DEFTIMEVAR (TV_TREE_LOOP_FINI , "tree loop fini") DEFTIMEVAR (TV_TREE_LOOP_FINI , "tree loop fini")
......
...@@ -45,6 +45,7 @@ Boston, MA 02110-1301, USA. */ ...@@ -45,6 +45,7 @@ Boston, MA 02110-1301, USA. */
#include "cfglayout.h" #include "cfglayout.h"
#include "hashtab.h" #include "hashtab.h"
#include "tree-ssa-propagate.h" #include "tree-ssa-propagate.h"
#include "tree-scalar-evolution.h"
/* Remove any fallthru edge from EV. Return true if an edge was removed. */ /* Remove any fallthru edge from EV. Return true if an edge was removed. */
...@@ -559,10 +560,11 @@ cleanup_tree_cfg (void) ...@@ -559,10 +560,11 @@ cleanup_tree_cfg (void)
void void
cleanup_tree_cfg_loop (void) cleanup_tree_cfg_loop (void)
{ {
bitmap changed_bbs = BITMAP_ALLOC (NULL); bool changed = cleanup_tree_cfg ();
cleanup_tree_cfg ();
if (changed)
{
bitmap changed_bbs = BITMAP_ALLOC (NULL);
fix_loop_structure (current_loops, changed_bbs); fix_loop_structure (current_loops, changed_bbs);
calculate_dominance_info (CDI_DOMINATORS); calculate_dominance_info (CDI_DOMINATORS);
...@@ -576,6 +578,8 @@ cleanup_tree_cfg_loop (void) ...@@ -576,6 +578,8 @@ cleanup_tree_cfg_loop (void)
#ifdef ENABLE_CHECKING #ifdef ENABLE_CHECKING
verify_loop_structure (current_loops); verify_loop_structure (current_loops);
#endif #endif
scev_reset ();
}
} }
/* Merge the PHI nodes at BB into those at BB's sole successor. */ /* Merge the PHI nodes at BB into those at BB's sole successor. */
......
...@@ -667,6 +667,17 @@ extern void replace_exp (use_operand_p, tree); ...@@ -667,6 +667,17 @@ extern void replace_exp (use_operand_p, tree);
extern bool may_propagate_copy (tree, tree); extern bool may_propagate_copy (tree, tree);
extern bool may_propagate_copy_into_asm (tree); extern bool may_propagate_copy_into_asm (tree);
/* Affine iv. */
typedef struct
{
/* Iv = BASE + STEP * i. */
tree base, step;
/* True if this iv does not overflow. */
bool no_overflow;
} affine_iv;
/* Description of number of iterations of a loop. All the expressions inside /* Description of number of iterations of a loop. All the expressions inside
the structure can be evaluated at the end of the loop's preheader the structure can be evaluated at the end of the loop's preheader
(and due to ssa form, also anywhere inside the body of the loop). */ (and due to ssa form, also anywhere inside the body of the loop). */
...@@ -697,6 +708,15 @@ struct tree_niter_desc ...@@ -697,6 +708,15 @@ struct tree_niter_desc
MAX_SIGNED_INT. However if the (n <= 0) assumption MAX_SIGNED_INT. However if the (n <= 0) assumption
is eliminated (by looking at the guard on entry of is eliminated (by looking at the guard on entry of
the loop), then the information would be lost. */ the loop), then the information would be lost. */
/* The simplified shape of the exit condition. The loop exits if
CONTROL CMP BOUND is false, where CMP is one of NE_EXPR,
LT_EXPR, or GT_EXPR, and step of CONTROL is positive if CMP is
LE_EXPR and negative if CMP is GE_EXPR. This information is used
by loop unrolling. */
affine_iv control;
tree bound;
enum tree_code cmp;
}; };
/* In tree-vectorizer.c */ /* In tree-vectorizer.c */
...@@ -711,6 +731,7 @@ void tree_ssa_lim (struct loops *); ...@@ -711,6 +731,7 @@ void tree_ssa_lim (struct loops *);
void tree_ssa_unswitch_loops (struct loops *); void tree_ssa_unswitch_loops (struct loops *);
void canonicalize_induction_variables (struct loops *); void canonicalize_induction_variables (struct loops *);
void tree_unroll_loops_completely (struct loops *, bool); void tree_unroll_loops_completely (struct loops *, bool);
void tree_ssa_prefetch_arrays (struct loops *);
void remove_empty_loops (struct loops *); void remove_empty_loops (struct loops *);
void tree_ssa_iv_optimize (struct loops *); void tree_ssa_iv_optimize (struct loops *);
...@@ -748,6 +769,10 @@ struct loop *tree_ssa_loop_version (struct loops *, struct loop *, tree, ...@@ -748,6 +769,10 @@ struct loop *tree_ssa_loop_version (struct loops *, struct loop *, tree,
tree expand_simple_operations (tree); tree expand_simple_operations (tree);
void substitute_in_loop_info (struct loop *, tree, tree); void substitute_in_loop_info (struct loop *, tree, tree);
edge single_dom_exit (struct loop *); edge single_dom_exit (struct loop *);
bool can_unroll_loop_p (struct loop *loop, unsigned factor,
struct tree_niter_desc *niter);
void tree_unroll_loop (struct loops *, struct loop *, unsigned,
edge, struct tree_niter_desc *);
/* In tree-ssa-threadedge.c */ /* In tree-ssa-threadedge.c */
extern bool potentially_threadable_block (basic_block); extern bool potentially_threadable_block (basic_block);
......
...@@ -1300,6 +1300,7 @@ typedef struct temp_expr_table_d ...@@ -1300,6 +1300,7 @@ typedef struct temp_expr_table_d
{ {
var_map map; var_map map;
void **version_info; void **version_info;
bitmap *expr_vars;
value_expr_p *partition_dep_list; value_expr_p *partition_dep_list;
bitmap replaceable; bitmap replaceable;
bool saw_replaceable; bool saw_replaceable;
...@@ -1344,6 +1345,7 @@ new_temp_expr_table (var_map map) ...@@ -1344,6 +1345,7 @@ new_temp_expr_table (var_map map)
t->map = map; t->map = map;
t->version_info = XCNEWVEC (void *, num_ssa_names + 1); t->version_info = XCNEWVEC (void *, num_ssa_names + 1);
t->expr_vars = XCNEWVEC (bitmap, num_ssa_names + 1);
t->partition_dep_list = XCNEWVEC (value_expr_p, t->partition_dep_list = XCNEWVEC (value_expr_p,
num_var_partitions (map) + 1); num_var_partitions (map) + 1);
...@@ -1367,6 +1369,7 @@ free_temp_expr_table (temp_expr_table_p t) ...@@ -1367,6 +1369,7 @@ free_temp_expr_table (temp_expr_table_p t)
{ {
value_expr_p p; value_expr_p p;
tree *ret = NULL; tree *ret = NULL;
unsigned i;
#ifdef ENABLE_CHECKING #ifdef ENABLE_CHECKING
unsigned x; unsigned x;
...@@ -1383,6 +1386,11 @@ free_temp_expr_table (temp_expr_table_p t) ...@@ -1383,6 +1386,11 @@ free_temp_expr_table (temp_expr_table_p t)
BITMAP_FREE (t->partition_in_use); BITMAP_FREE (t->partition_in_use);
BITMAP_FREE (t->replaceable); BITMAP_FREE (t->replaceable);
for (i = 0; i <= num_ssa_names; i++)
if (t->expr_vars[i])
BITMAP_FREE (t->expr_vars[i]);
free (t->expr_vars);
free (t->partition_dep_list); free (t->partition_dep_list);
if (t->saw_replaceable) if (t->saw_replaceable)
ret = (tree *)t->version_info; ret = (tree *)t->version_info;
...@@ -1545,11 +1553,12 @@ add_dependance (temp_expr_table_p tab, int version, tree var) ...@@ -1545,11 +1553,12 @@ add_dependance (temp_expr_table_p tab, int version, tree var)
static bool static bool
check_replaceable (temp_expr_table_p tab, tree stmt) check_replaceable (temp_expr_table_p tab, tree stmt)
{ {
tree var, def; tree var, def, basevar;
int version; int version;
var_map map = tab->map; var_map map = tab->map;
ssa_op_iter iter; ssa_op_iter iter;
tree call_expr; tree call_expr;
bitmap def_vars = BITMAP_ALLOC (NULL), use_vars;
if (TREE_CODE (stmt) != MODIFY_EXPR) if (TREE_CODE (stmt) != MODIFY_EXPR)
return false; return false;
...@@ -1580,12 +1589,19 @@ check_replaceable (temp_expr_table_p tab, tree stmt) ...@@ -1580,12 +1589,19 @@ check_replaceable (temp_expr_table_p tab, tree stmt)
} }
version = SSA_NAME_VERSION (def); version = SSA_NAME_VERSION (def);
basevar = SSA_NAME_VAR (def);
bitmap_set_bit (def_vars, DECL_UID (basevar));
/* Add this expression to the dependency list for each use partition. */ /* Add this expression to the dependency list for each use partition. */
FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_USE) FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_USE)
{ {
add_dependance (tab, version, var); add_dependance (tab, version, var);
use_vars = tab->expr_vars[SSA_NAME_VERSION (var)];
if (use_vars)
bitmap_ior_into (def_vars, use_vars);
} }
tab->expr_vars[version] = def_vars;
/* If there are VUSES, add a dependence on virtual defs. */ /* If there are VUSES, add a dependence on virtual defs. */
if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VUSE)) if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VUSE))
...@@ -1704,7 +1720,7 @@ static void ...@@ -1704,7 +1720,7 @@ static void
find_replaceable_in_bb (temp_expr_table_p tab, basic_block bb) find_replaceable_in_bb (temp_expr_table_p tab, basic_block bb)
{ {
block_stmt_iterator bsi; block_stmt_iterator bsi;
tree stmt, def; tree stmt, def, use;
stmt_ann_t ann; stmt_ann_t ann;
int partition; int partition;
var_map map = tab->map; var_map map = tab->map;
...@@ -1717,30 +1733,34 @@ find_replaceable_in_bb (temp_expr_table_p tab, basic_block bb) ...@@ -1717,30 +1733,34 @@ find_replaceable_in_bb (temp_expr_table_p tab, basic_block bb)
ann = stmt_ann (stmt); ann = stmt_ann (stmt);
/* Determine if this stmt finishes an existing expression. */ /* Determine if this stmt finishes an existing expression. */
FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_USE) FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
{ {
if (tab->version_info[SSA_NAME_VERSION (def)]) unsigned ver = SSA_NAME_VERSION (use);
if (tab->version_info[ver])
{ {
bool same_root_var = false; bool same_root_var = false;
tree def2;
ssa_op_iter iter2; ssa_op_iter iter2;
bitmap vars = tab->expr_vars[ver];
/* See if the root variables are the same. If they are, we /* See if the root variables are the same. If they are, we
do not want to do the replacement to avoid problems with do not want to do the replacement to avoid problems with
code size, see PR tree-optimization/17549. */ code size, see PR tree-optimization/17549. */
FOR_EACH_SSA_TREE_OPERAND (def2, stmt, iter2, SSA_OP_DEF) FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter2, SSA_OP_DEF)
if (SSA_NAME_VAR (def) == SSA_NAME_VAR (def2)) {
if (bitmap_bit_p (vars, DECL_UID (SSA_NAME_VAR (def))))
{ {
same_root_var = true; same_root_var = true;
break; break;
} }
}
/* Mark expression as replaceable unless stmt is volatile /* Mark expression as replaceable unless stmt is volatile
or DEF sets the same root variable as STMT. */ or DEF sets the same root variable as STMT. */
if (!ann->has_volatile_ops && !same_root_var) if (!ann->has_volatile_ops && !same_root_var)
mark_replaceable (tab, def); mark_replaceable (tab, use);
else else
finish_expr (tab, SSA_NAME_VERSION (def), false); finish_expr (tab, ver, false);
} }
} }
......
...@@ -247,6 +247,7 @@ extern struct tree_opt_pass pass_record_bounds; ...@@ -247,6 +247,7 @@ extern struct tree_opt_pass pass_record_bounds;
extern struct tree_opt_pass pass_if_conversion; extern struct tree_opt_pass pass_if_conversion;
extern struct tree_opt_pass pass_vectorize; extern struct tree_opt_pass pass_vectorize;
extern struct tree_opt_pass pass_complete_unroll; extern struct tree_opt_pass pass_complete_unroll;
extern struct tree_opt_pass pass_loop_prefetch;
extern struct tree_opt_pass pass_iv_optimize; extern struct tree_opt_pass pass_iv_optimize;
extern struct tree_opt_pass pass_tree_loop_done; extern struct tree_opt_pass pass_tree_loop_done;
extern struct tree_opt_pass pass_ch; extern struct tree_opt_pass pass_ch;
......
...@@ -34,17 +34,6 @@ extern void gather_stats_on_scev_database (void); ...@@ -34,17 +34,6 @@ extern void gather_stats_on_scev_database (void);
extern void scev_analysis (void); extern void scev_analysis (void);
void scev_const_prop (void); void scev_const_prop (void);
/* Affine iv. */
typedef struct
{
/* Iv = BASE + STEP * i. */
tree base, step;
/* True if this iv does not overflow. */
bool no_overflow;
} affine_iv;
extern bool simple_iv (struct loop *, tree, tree, affine_iv *, bool); extern bool simple_iv (struct loop *, tree, tree, affine_iv *, bool);
#endif /* GCC_TREE_SCALAR_EVOLUTION_H */ #endif /* GCC_TREE_SCALAR_EVOLUTION_H */
...@@ -36,6 +36,7 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA ...@@ -36,6 +36,7 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
#include "tree-pass.h" #include "tree-pass.h"
#include "cfglayout.h" #include "cfglayout.h"
#include "tree-scalar-evolution.h" #include "tree-scalar-evolution.h"
#include "params.h"
/* Creates an induction variable with value BASE + STEP * iteration in LOOP. /* Creates an induction variable with value BASE + STEP * iteration in LOOP.
It is expected that neither BASE nor STEP are shared with other expressions It is expected that neither BASE nor STEP are shared with other expressions
...@@ -618,3 +619,328 @@ tree_duplicate_loop_to_header_edge (struct loop *loop, edge e, ...@@ -618,3 +619,328 @@ tree_duplicate_loop_to_header_edge (struct loop *loop, edge e,
return true; return true;
} }
/* Build if (COND) goto THEN_LABEL; else goto ELSE_LABEL; */
static tree
build_if_stmt (tree cond, tree then_label, tree else_label)
{
return build3 (COND_EXPR, void_type_node,
cond,
build1 (GOTO_EXPR, void_type_node, then_label),
build1 (GOTO_EXPR, void_type_node, else_label));
}
/* Returns true if we can unroll LOOP FACTOR times. Number
of iterations of the loop is returned in NITER. */
bool
can_unroll_loop_p (struct loop *loop, unsigned factor,
struct tree_niter_desc *niter)
{
edge exit;
/* Check whether unrolling is possible. We only want to unroll loops
for that we are able to determine number of iterations. We also
want to split the extra iterations of the loop from its end,
therefore we require that the loop has precisely one
exit. */
exit = single_dom_exit (loop);
if (!exit)
return false;
if (!number_of_iterations_exit (loop, exit, niter, false)
|| niter->cmp == ERROR_MARK)
return false;
/* And of course, we must be able to duplicate the loop. */
if (!can_duplicate_loop_p (loop))
return false;
/* The final loop should be small enough. */
if (tree_num_loop_insns (loop) * factor
> (unsigned) PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS))
return false;
return true;
}
/* Determines the conditions that control execution of LOOP unrolled FACTOR
times. DESC is number of iterations of LOOP. ENTER_COND is set to
condition that must be true if the main loop can be entered.
EXIT_BASE, EXIT_STEP, EXIT_CMP and EXIT_BOUND are set to values describing
how the exit from the unrolled loop should be controlled. */
static void
determine_exit_conditions (struct loop *loop, struct tree_niter_desc *desc,
unsigned factor, tree *enter_cond,
tree *exit_base, tree *exit_step,
enum tree_code *exit_cmp, tree *exit_bound)
{
tree stmts;
tree base = desc->control.base;
tree step = desc->control.step;
tree bound = desc->bound;
tree type = TREE_TYPE (base);
tree bigstep, delta;
tree min = lower_bound_in_type (type, type);
tree max = upper_bound_in_type (type, type);
enum tree_code cmp = desc->cmp;
tree cond = boolean_true_node, assum;
*enter_cond = boolean_false_node;
*exit_base = NULL_TREE;
*exit_step = NULL_TREE;
*exit_cmp = ERROR_MARK;
*exit_bound = NULL_TREE;
gcc_assert (cmp != ERROR_MARK);
/* We only need to be correct when we answer question
"Do at least FACTOR more iterations remain?" in the unrolled loop.
Thus, transforming BASE + STEP * i <> BOUND to
BASE + STEP * i < BOUND is ok. */
if (cmp == NE_EXPR)
{
if (tree_int_cst_sign_bit (step))
cmp = GT_EXPR;
else
cmp = LT_EXPR;
}
else if (cmp == LT_EXPR)
{
gcc_assert (!tree_int_cst_sign_bit (step));
}
else if (cmp == GT_EXPR)
{
gcc_assert (tree_int_cst_sign_bit (step));
}
else
gcc_unreachable ();
/* The main body of the loop may be entered iff:
1) desc->may_be_zero is false.
2) it is possible to check that there are at least FACTOR iterations
of the loop, i.e., BOUND - step * FACTOR does not overflow.
3) # of iterations is at least FACTOR */
if (!zero_p (desc->may_be_zero))
cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
invert_truthvalue (desc->may_be_zero),
cond);
bigstep = fold_build2 (MULT_EXPR, type, step,
build_int_cst_type (type, factor));
delta = fold_build2 (MINUS_EXPR, type, bigstep, step);
if (cmp == LT_EXPR)
assum = fold_build2 (GE_EXPR, boolean_type_node,
bound,
fold_build2 (PLUS_EXPR, type, min, delta));
else
assum = fold_build2 (LE_EXPR, boolean_type_node,
bound,
fold_build2 (PLUS_EXPR, type, max, delta));
cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, assum, cond);
bound = fold_build2 (MINUS_EXPR, type, bound, delta);
assum = fold_build2 (cmp, boolean_type_node, base, bound);
cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, assum, cond);
cond = force_gimple_operand (unshare_expr (cond), &stmts, false, NULL_TREE);
if (stmts)
bsi_insert_on_edge_immediate_loop (loop_preheader_edge (loop), stmts);
/* cond now may be a gimple comparison, which would be OK, but also any
other gimple rhs (say a && b). In this case we need to force it to
operand. */
if (!is_gimple_condexpr (cond))
{
cond = force_gimple_operand (cond, &stmts, true, NULL_TREE);
if (stmts)
bsi_insert_on_edge_immediate_loop (loop_preheader_edge (loop), stmts);
}
*enter_cond = cond;
base = force_gimple_operand (unshare_expr (base), &stmts, true, NULL_TREE);
if (stmts)
bsi_insert_on_edge_immediate_loop (loop_preheader_edge (loop), stmts);
bound = force_gimple_operand (unshare_expr (bound), &stmts, true, NULL_TREE);
if (stmts)
bsi_insert_on_edge_immediate_loop (loop_preheader_edge (loop), stmts);
*exit_base = base;
*exit_step = bigstep;
*exit_cmp = cmp;
*exit_bound = bound;
}
/* Unroll LOOP FACTOR times. LOOPS is the loops tree. DESC describes
number of iterations of LOOP. EXIT is the exit of the loop to that
DESC corresponds.
If N is number of iterations of the loop and MAY_BE_ZERO is the condition
under that loop exits in the first iteration even if N != 0,
while (1)
{
x = phi (init, next);
pre;
if (st)
break;
post;
}
becomes (with possibly the exit conditions formulated a bit differently,
avoiding the need to create a new iv):
if (MAY_BE_ZERO || N < FACTOR)
goto rest;
do
{
x = phi (init, next);
pre;
post;
pre;
post;
...
pre;
post;
N -= FACTOR;
} while (N >= FACTOR);
rest:
init' = phi (init, x);
while (1)
{
x = phi (init', next);
pre;
if (st)
break;
post;
} */
void
tree_unroll_loop (struct loops *loops, struct loop *loop, unsigned factor,
edge exit, struct tree_niter_desc *desc)
{
tree dont_exit, exit_if, ctr_before, ctr_after;
tree enter_main_cond, exit_base, exit_step, exit_bound;
enum tree_code exit_cmp;
tree phi_old_loop, phi_new_loop, phi_rest, init, next, new_init, var;
struct loop *new_loop;
basic_block rest, exit_bb;
edge old_entry, new_entry, old_latch, precond_edge, new_exit;
edge nonexit, new_nonexit;
block_stmt_iterator bsi;
use_operand_p op;
bool ok;
unsigned est_niter;
sbitmap wont_exit;
est_niter = expected_loop_iterations (loop);
determine_exit_conditions (loop, desc, factor,
&enter_main_cond, &exit_base, &exit_step,
&exit_cmp, &exit_bound);
new_loop = loop_version (loops, loop, enter_main_cond, NULL, true);
gcc_assert (new_loop != NULL);
update_ssa (TODO_update_ssa);
/* Unroll the loop and remove the old exits. */
dont_exit = ((exit->flags & EDGE_TRUE_VALUE)
? boolean_false_node
: boolean_true_node);
if (exit == EDGE_SUCC (exit->src, 0))
nonexit = EDGE_SUCC (exit->src, 1);
else
nonexit = EDGE_SUCC (exit->src, 0);
nonexit->probability = REG_BR_PROB_BASE;
exit->probability = 0;
nonexit->count += exit->count;
exit->count = 0;
exit_if = last_stmt (exit->src);
COND_EXPR_COND (exit_if) = dont_exit;
update_stmt (exit_if);
wont_exit = sbitmap_alloc (factor);
sbitmap_ones (wont_exit);
ok = tree_duplicate_loop_to_header_edge
(loop, loop_latch_edge (loop), loops, factor - 1,
wont_exit, NULL, NULL, NULL, DLTHE_FLAG_UPDATE_FREQ);
free (wont_exit);
gcc_assert (ok);
update_ssa (TODO_update_ssa);
/* Prepare the cfg and update the phi nodes. */
rest = loop_preheader_edge (new_loop)->src;
precond_edge = single_pred_edge (rest);
loop_split_edge_with (loop_latch_edge (loop), NULL);
exit_bb = single_pred (loop->latch);
new_exit = make_edge (exit_bb, rest, EDGE_FALSE_VALUE);
new_exit->count = loop_preheader_edge (loop)->count;
est_niter = est_niter / factor + 1;
new_exit->probability = REG_BR_PROB_BASE / est_niter;
new_nonexit = single_pred_edge (loop->latch);
new_nonexit->flags = EDGE_TRUE_VALUE;
new_nonexit->probability = REG_BR_PROB_BASE - new_exit->probability;
old_entry = loop_preheader_edge (loop);
new_entry = loop_preheader_edge (new_loop);
old_latch = loop_latch_edge (loop);
for (phi_old_loop = phi_nodes (loop->header),
phi_new_loop = phi_nodes (new_loop->header);
phi_old_loop;
phi_old_loop = PHI_CHAIN (phi_old_loop),
phi_new_loop = PHI_CHAIN (phi_new_loop))
{
init = PHI_ARG_DEF_FROM_EDGE (phi_old_loop, old_entry);
op = PHI_ARG_DEF_PTR_FROM_EDGE (phi_new_loop, new_entry);
gcc_assert (operand_equal_for_phi_arg_p (init, USE_FROM_PTR (op)));
next = PHI_ARG_DEF_FROM_EDGE (phi_old_loop, old_latch);
/* Prefer using original variable as a base for the new ssa name.
This is necessary for virtual ops, and useful in order to avoid
losing debug info for real ops. */
if (TREE_CODE (next) == SSA_NAME)
var = SSA_NAME_VAR (next);
else if (TREE_CODE (init) == SSA_NAME)
var = SSA_NAME_VAR (init);
else
{
var = create_tmp_var (TREE_TYPE (init), "unrinittmp");
add_referenced_tmp_var (var);
}
new_init = make_ssa_name (var, NULL_TREE);
phi_rest = create_phi_node (new_init, rest);
SSA_NAME_DEF_STMT (new_init) = phi_rest;
add_phi_arg (phi_rest, init, precond_edge);
add_phi_arg (phi_rest, next, new_exit);
SET_USE (op, new_init);
}
/* Finally create the new counter for number of iterations and add the new
exit instruction. */
bsi = bsi_last (exit_bb);
create_iv (exit_base, exit_step, NULL_TREE, loop,
&bsi, true, &ctr_before, &ctr_after);
exit_if = build_if_stmt (build2 (exit_cmp, boolean_type_node, ctr_after,
exit_bound),
tree_block_label (loop->latch),
tree_block_label (rest));
bsi_insert_after (&bsi, exit_if, BSI_NEW_STMT);
verify_flow_info ();
verify_dominators (CDI_DOMINATORS);
verify_loop_structure (loops);
verify_loop_closed_ssa ();
}
...@@ -140,6 +140,10 @@ number_of_iterations_ne (tree type, affine_iv *iv, tree final, ...@@ -140,6 +140,10 @@ number_of_iterations_ne (tree type, affine_iv *iv, tree final,
tree niter_type = unsigned_type_for (type); tree niter_type = unsigned_type_for (type);
tree s, c, d, bits, assumption, tmp, bound; tree s, c, d, bits, assumption, tmp, bound;
niter->control = *iv;
niter->bound = final;
niter->cmp = NE_EXPR;
/* Rearrange the terms so that we get inequality s * i <> c, with s /* Rearrange the terms so that we get inequality s * i <> c, with s
positive. Also cast everything to the unsigned type. */ positive. Also cast everything to the unsigned type. */
if (tree_int_cst_sign_bit (iv->step)) if (tree_int_cst_sign_bit (iv->step))
...@@ -410,6 +414,19 @@ number_of_iterations_lt (tree type, affine_iv *iv0, affine_iv *iv1, ...@@ -410,6 +414,19 @@ number_of_iterations_lt (tree type, affine_iv *iv0, affine_iv *iv1,
tree niter_type = unsigned_type_for (type); tree niter_type = unsigned_type_for (type);
tree delta, step, s; tree delta, step, s;
if (nonzero_p (iv0->step))
{
niter->control = *iv0;
niter->cmp = LT_EXPR;
niter->bound = iv1->base;
}
else
{
niter->control = *iv1;
niter->cmp = GT_EXPR;
niter->bound = iv0->base;
}
delta = fold_build2 (MINUS_EXPR, niter_type, delta = fold_build2 (MINUS_EXPR, niter_type,
fold_convert (niter_type, iv1->base), fold_convert (niter_type, iv1->base),
fold_convert (niter_type, iv0->base)); fold_convert (niter_type, iv0->base));
...@@ -543,6 +560,9 @@ number_of_iterations_cond (tree type, affine_iv *iv0, enum tree_code code, ...@@ -543,6 +560,9 @@ number_of_iterations_cond (tree type, affine_iv *iv0, enum tree_code code,
niter->niter = NULL_TREE; niter->niter = NULL_TREE;
niter->additional_info = boolean_true_node; niter->additional_info = boolean_true_node;
niter->bound = NULL_TREE;
niter->cmp = ERROR_MARK;
/* Make < comparison from > ones, and for NE_EXPR comparisons, ensure that /* Make < comparison from > ones, and for NE_EXPR comparisons, ensure that
the control variable is on lhs. */ the control variable is on lhs. */
if (code == GE_EXPR || code == GT_EXPR if (code == GE_EXPR || code == GT_EXPR
......
...@@ -401,6 +401,40 @@ struct tree_opt_pass pass_complete_unroll = ...@@ -401,6 +401,40 @@ struct tree_opt_pass pass_complete_unroll =
0 /* letter */ 0 /* letter */
}; };
/* Prefetching. */
static void
tree_ssa_loop_prefetch (void)
{
if (!current_loops)
return;
tree_ssa_prefetch_arrays (current_loops);
}
static bool
gate_tree_ssa_loop_prefetch (void)
{
return flag_prefetch_loop_arrays == 1;
}
struct tree_opt_pass pass_loop_prefetch =
{
"prefetch", /* name */
gate_tree_ssa_loop_prefetch, /* gate */
tree_ssa_loop_prefetch, /* execute */
NULL, /* sub */
NULL, /* next */
0, /* static_pass_number */
TV_TREE_PREFETCH, /* tv_id */
PROP_cfg | PROP_ssa, /* properties_required */
0, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
TODO_dump_func | TODO_verify_loops, /* todo_flags_finish */
0 /* letter */
};
/* Induction variable optimizations. */ /* Induction variable optimizations. */
static void static void
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment