Commit c9e93168 by Tom de Vries Committed by Tom de Vries

re PR middle-end/43864 (Same basic blocks should be merged)

2011-09-27  Tom de Vries  <tom@codesourcery.com>

	PR middle-end/43864
	* tree-ssa-tail-merge.c: New file.
	(struct same_succ_def): Define.
	(same_succ, const_same_succ): New typedef.
	(struct bb_cluster_def): Define.
	(bb_cluster, const_bb_cluster): New typedef.
	(struct aux_bb_info): Define.
	(BB_SIZE, BB_SAME_SUCC, BB_CLUSTER, BB_VOP_AT_EXIT): Define.
	(gvn_uses_equal): New function.
	(same_succ_print, same_succ_print_traverse, update_dep_bb)
	(stmt_update_dep_bb, local_def, same_succ_hash)
	(inverse_flags, same_succ_equal, same_succ_alloc, same_succ_delete)
	(same_succ_reset): New function.
	(same_succ_htab, same_succ_edge_flags)
	(deleted_bbs, deleted_bb_preds): New var.
	(debug_same_succ): New function.
	(worklist): New var.
	(print_worklist, add_to_worklist, find_same_succ_bb, find_same_succ)
	(init_worklist, delete_worklist, delete_basic_block_same_succ)
	(same_succ_flush_bbs, purge_bbs, update_worklist): New function.
	(print_cluster, debug_cluster, update_rep_bb)
	(add_bb_to_cluster, new_cluster, delete_cluster): New function.
	(all_clusters): New var.
	(alloc_cluster_vectors, reset_cluster_vectors, delete_cluster_vectors)
	(merge_clusters, set_cluster): New function.
	(gimple_equal_p, gsi_advance_bw_nondebug_nonlocal, find_duplicate)
	(same_phi_alternatives_1, same_phi_alternatives, bb_has_non_vop_phi)
	(deps_ok_for_redirect_from_bb_to_bb, deps_ok_for_redirect)
	(find_clusters_1, find_clusters): New function.
	(update_vuses, vop_phi, vop_at_entry, replace_block_by): New function.
	(update_bbs): New var.
	(apply_clusters): New function.
	(update_debug_stmt, update_debug_stmts): New function.
	(tail_merge_optimize): New function.
	tree-pass.h (tail_merge_optimize): Declare.
	* tree-ssa-pre.c (execute_pre): Use tail_merge_optimize.
	* Makefile.in (OBJS-common): Add tree-ssa-tail-merge.o.
	(tree-ssa-tail-merge.o): New rule.
	* opts.c (default_options_table): Set OPT_ftree_tail_merge by default at
	OPT_LEVELS_2_PLUS.
	* tree-ssa-sccvn.c (vn_valueize): Move to ...
	* tree-ssa-sccvn.h (vn_valueize): Here.
	* timevar.def (TV_TREE_TAIL_MERGE): New timevar.
	* common.opt (ftree-tail-merge): New switch.
	* params.def (PARAM_MAX_TAIL_MERGE_COMPARISONS)
	(PARAM_MAX_TAIL_MERGE_ITERATIONS): New parameter.
	* doc/invoke.texi (Optimization Options, -O2): Add -ftree-tail-merge.
	(-ftree-tail-merge, max-tail-merge-comparisons)
	(max-tail-merge-iterations): New item.

From-SVN: r179275
parent 99e299a8
2011-09-27 Tom de Vries <tom@codesourcery.com>
PR middle-end/43864
* tree-ssa-tail-merge.c: New file.
(struct same_succ_def): Define.
(same_succ, const_same_succ): New typedef.
(struct bb_cluster_def): Define.
(bb_cluster, const_bb_cluster): New typedef.
(struct aux_bb_info): Define.
(BB_SIZE, BB_SAME_SUCC, BB_CLUSTER, BB_VOP_AT_EXIT): Define.
(gvn_uses_equal): New function.
(same_succ_print, same_succ_print_traverse, update_dep_bb)
(stmt_update_dep_bb, local_def, same_succ_hash)
(inverse_flags, same_succ_equal, same_succ_alloc, same_succ_delete)
(same_succ_reset): New function.
(same_succ_htab, same_succ_edge_flags)
(deleted_bbs, deleted_bb_preds): New var.
(debug_same_succ): New function.
(worklist): New var.
(print_worklist, add_to_worklist, find_same_succ_bb, find_same_succ)
(init_worklist, delete_worklist, delete_basic_block_same_succ)
(same_succ_flush_bbs, purge_bbs, update_worklist): New function.
(print_cluster, debug_cluster, update_rep_bb)
(add_bb_to_cluster, new_cluster, delete_cluster): New function.
(all_clusters): New var.
(alloc_cluster_vectors, reset_cluster_vectors, delete_cluster_vectors)
(merge_clusters, set_cluster): New function.
(gimple_equal_p, gsi_advance_bw_nondebug_nonlocal, find_duplicate)
(same_phi_alternatives_1, same_phi_alternatives, bb_has_non_vop_phi)
(deps_ok_for_redirect_from_bb_to_bb, deps_ok_for_redirect)
(find_clusters_1, find_clusters): New function.
(update_vuses, vop_phi, vop_at_entry, replace_block_by): New function.
(update_bbs): New var.
(apply_clusters): New function.
(update_debug_stmt, update_debug_stmts): New function.
(tail_merge_optimize): New function.
tree-pass.h (tail_merge_optimize): Declare.
* tree-ssa-pre.c (execute_pre): Use tail_merge_optimize.
* Makefile.in (OBJS-common): Add tree-ssa-tail-merge.o.
(tree-ssa-tail-merge.o): New rule.
* opts.c (default_options_table): Set OPT_ftree_tail_merge by default at
OPT_LEVELS_2_PLUS.
* tree-ssa-sccvn.c (vn_valueize): Move to ...
* tree-ssa-sccvn.h (vn_valueize): Here.
* timevar.def (TV_TREE_TAIL_MERGE): New timevar.
* common.opt (ftree-tail-merge): New switch.
* params.def (PARAM_MAX_TAIL_MERGE_COMPARISONS)
(PARAM_MAX_TAIL_MERGE_ITERATIONS): New parameter.
* doc/invoke.texi (Optimization Options, -O2): Add -ftree-tail-merge.
(-ftree-tail-merge, max-tail-merge-comparisons)
(max-tail-merge-iterations): New item.
2011-09-27 Jan Hubicka <jh@suse.cz> 2011-09-27 Jan Hubicka <jh@suse.cz>
* ipa-inline-analysis.c (MAX_TIME): Reduce to avoid overflows. * ipa-inline-analysis.c (MAX_TIME): Reduce to avoid overflows.
...@@ -1476,6 +1476,7 @@ OBJS = \ ...@@ -1476,6 +1476,7 @@ OBJS = \
tree-ssa-sccvn.o \ tree-ssa-sccvn.o \
tree-ssa-sink.o \ tree-ssa-sink.o \
tree-ssa-structalias.o \ tree-ssa-structalias.o \
tree-ssa-tail-merge.o \
tree-ssa-ter.o \ tree-ssa-ter.o \
tree-ssa-threadedge.o \ tree-ssa-threadedge.o \
tree-ssa-threadupdate.o \ tree-ssa-threadupdate.o \
...@@ -2382,6 +2383,13 @@ stor-layout.o : stor-layout.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ ...@@ -2382,6 +2383,13 @@ stor-layout.o : stor-layout.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
$(TREE_H) $(PARAMS_H) $(FLAGS_H) $(FUNCTION_H) $(EXPR_H) output.h $(RTL_H) \ $(TREE_H) $(PARAMS_H) $(FLAGS_H) $(FUNCTION_H) $(EXPR_H) output.h $(RTL_H) \
$(GGC_H) $(TM_P_H) $(TARGET_H) langhooks.h $(REGS_H) gt-stor-layout.h \ $(GGC_H) $(TM_P_H) $(TARGET_H) langhooks.h $(REGS_H) gt-stor-layout.h \
$(DIAGNOSTIC_CORE_H) $(CGRAPH_H) $(TREE_INLINE_H) $(TREE_DUMP_H) $(GIMPLE_H) $(DIAGNOSTIC_CORE_H) $(CGRAPH_H) $(TREE_INLINE_H) $(TREE_DUMP_H) $(GIMPLE_H)
tree-ssa-tail-merge.o: tree-ssa-tail-merge.c \
$(SYSTEM_H) $(CONFIG_H) coretypes.h $(TM_H) $(BITMAP_H) \
$(FLAGS_H) $(TM_P_H) $(BASIC_BLOCK_H) output.h \
$(TREE_H) $(TREE_FLOW_H) $(TREE_INLINE_H) \
$(GIMPLE_H) $(FUNCTION_H) \
$(TIMEVAR_H) tree-ssa-sccvn.h \
$(CGRAPH_H) gimple-pretty-print.h tree-pretty-print.h $(PARAMS_H)
tree-ssa-structalias.o: tree-ssa-structalias.c \ tree-ssa-structalias.o: tree-ssa-structalias.c \
$(SYSTEM_H) $(CONFIG_H) coretypes.h $(TM_H) $(GGC_H) $(OBSTACK_H) $(BITMAP_H) \ $(SYSTEM_H) $(CONFIG_H) coretypes.h $(TM_H) $(GGC_H) $(OBSTACK_H) $(BITMAP_H) \
$(FLAGS_H) $(TM_P_H) $(BASIC_BLOCK_H) output.h \ $(FLAGS_H) $(TM_P_H) $(BASIC_BLOCK_H) output.h \
......
...@@ -1945,6 +1945,10 @@ ftree-dominator-opts ...@@ -1945,6 +1945,10 @@ ftree-dominator-opts
Common Report Var(flag_tree_dom) Optimization Common Report Var(flag_tree_dom) Optimization
Enable dominator optimizations Enable dominator optimizations
ftree-tail-merge
Common Report Var(flag_tree_tail_merge) Optimization
Enable tail merging on trees
ftree-dse ftree-dse
Common Report Var(flag_tree_dse) Optimization Common Report Var(flag_tree_dse) Optimization
Enable dead store elimination Enable dead store elimination
......
...@@ -407,7 +407,7 @@ Objective-C and Objective-C++ Dialects}. ...@@ -407,7 +407,7 @@ Objective-C and Objective-C++ Dialects}.
-ftree-phiprop -ftree-loop-distribution -ftree-loop-distribute-patterns @gol -ftree-phiprop -ftree-loop-distribution -ftree-loop-distribute-patterns @gol
-ftree-loop-ivcanon -ftree-loop-linear -ftree-loop-optimize @gol -ftree-loop-ivcanon -ftree-loop-linear -ftree-loop-optimize @gol
-ftree-parallelize-loops=@var{n} -ftree-pre -ftree-pta -ftree-reassoc @gol -ftree-parallelize-loops=@var{n} -ftree-pre -ftree-pta -ftree-reassoc @gol
-ftree-sink -ftree-sra -ftree-switch-conversion @gol -ftree-sink -ftree-sra -ftree-switch-conversion -ftree-tail-merge @gol
-ftree-ter -ftree-vect-loop-version -ftree-vectorize -ftree-vrp @gol -ftree-ter -ftree-vect-loop-version -ftree-vectorize -ftree-vrp @gol
-funit-at-a-time -funroll-all-loops -funroll-loops @gol -funit-at-a-time -funroll-all-loops -funroll-loops @gol
-funsafe-loop-optimizations -funsafe-math-optimizations -funswitch-loops @gol -funsafe-loop-optimizations -funsafe-math-optimizations -funswitch-loops @gol
...@@ -6137,7 +6137,7 @@ also turns on the following optimization flags: ...@@ -6137,7 +6137,7 @@ also turns on the following optimization flags:
-fsched-interblock -fsched-spec @gol -fsched-interblock -fsched-spec @gol
-fschedule-insns -fschedule-insns2 @gol -fschedule-insns -fschedule-insns2 @gol
-fstrict-aliasing -fstrict-overflow @gol -fstrict-aliasing -fstrict-overflow @gol
-ftree-switch-conversion @gol -ftree-switch-conversion -ftree-tail-merge @gol
-ftree-pre @gol -ftree-pre @gol
-ftree-vrp} -ftree-vrp}
...@@ -7020,6 +7020,13 @@ Perform conversion of simple initializations in a switch to ...@@ -7020,6 +7020,13 @@ Perform conversion of simple initializations in a switch to
initializations from a scalar array. This flag is enabled by default initializations from a scalar array. This flag is enabled by default
at @option{-O2} and higher. at @option{-O2} and higher.
@item -ftree-tail-merge
Look for identical code sequences. When found, replace one with a jump to the
other. This optimization is known as tail merging or cross jumping. This flag
is enabled by default at @option{-O2} and higher. The run time of this pass can
be limited using @option{max-tail-merge-comparisons} parameter and
@option{max-tail-merge-iterations} parameter.
@item -ftree-dce @item -ftree-dce
@opindex ftree-dce @opindex ftree-dce
Perform dead code elimination (DCE) on trees. This flag is enabled by Perform dead code elimination (DCE) on trees. This flag is enabled by
...@@ -8603,6 +8610,14 @@ This is used to avoid quadratic behavior in hoisting algorithm. ...@@ -8603,6 +8610,14 @@ This is used to avoid quadratic behavior in hoisting algorithm.
The value of 0 will avoid limiting the search, but may slow down compilation The value of 0 will avoid limiting the search, but may slow down compilation
of huge functions. The default value is 30. of huge functions. The default value is 30.
@item max-tail-merge-comparisons
The maximum amount of similar bbs to compare a bb with. This is used to
avoid quadratic behaviour in tree tail merging. The default value is 10.
@item max-tail-merge-iterations
The maximum amount of iterations of the pass over the function. This is used to
limit run time in tree tail merging. The default value is 2.
@item max-unrolled-insns @item max-unrolled-insns
The maximum number of instructions that a loop should have if that loop The maximum number of instructions that a loop should have if that loop
is unrolled, and if the loop is unrolled, it determines how many times is unrolled, and if the loop is unrolled, it determines how many times
......
...@@ -484,6 +484,7 @@ static const struct default_options default_options_table[] = ...@@ -484,6 +484,7 @@ static const struct default_options default_options_table[] =
{ OPT_LEVELS_2_PLUS, OPT_falign_jumps, NULL, 1 }, { OPT_LEVELS_2_PLUS, OPT_falign_jumps, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_falign_labels, NULL, 1 }, { OPT_LEVELS_2_PLUS, OPT_falign_labels, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_falign_functions, NULL, 1 }, { OPT_LEVELS_2_PLUS, OPT_falign_functions, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_ftree_tail_merge, NULL, 1 },
/* -O3 optimizations. */ /* -O3 optimizations. */
{ OPT_LEVELS_3_PLUS, OPT_ftree_loop_distribute_patterns, NULL, 1 }, { OPT_LEVELS_3_PLUS, OPT_ftree_loop_distribute_patterns, NULL, 1 },
......
...@@ -921,6 +921,15 @@ DEFPARAM (PARAM_TREE_REASSOC_WIDTH, ...@@ -921,6 +921,15 @@ DEFPARAM (PARAM_TREE_REASSOC_WIDTH,
"reassociated tree. If 0, use the target dependent heuristic.", "reassociated tree. If 0, use the target dependent heuristic.",
0, 0, 0) 0, 0, 0)
DEFPARAM (PARAM_MAX_TAIL_MERGE_COMPARISONS,
"max-tail-merge-comparisons",
"Maximum amount of similar bbs to compare a bb with",
10, 0, 0)
DEFPARAM (PARAM_MAX_TAIL_MERGE_ITERATIONS,
"max-tail-merge-iterations",
"Maximum amount of iterations of the pass over a function",
2, 0, 0)
/* /*
Local variables: Local variables:
......
...@@ -127,6 +127,7 @@ DEFTIMEVAR (TV_TREE_GIMPLIFY , "tree gimplify") ...@@ -127,6 +127,7 @@ DEFTIMEVAR (TV_TREE_GIMPLIFY , "tree gimplify")
DEFTIMEVAR (TV_TREE_EH , "tree eh") DEFTIMEVAR (TV_TREE_EH , "tree eh")
DEFTIMEVAR (TV_TREE_CFG , "tree CFG construction") DEFTIMEVAR (TV_TREE_CFG , "tree CFG construction")
DEFTIMEVAR (TV_TREE_CLEANUP_CFG , "tree CFG cleanup") DEFTIMEVAR (TV_TREE_CLEANUP_CFG , "tree CFG cleanup")
DEFTIMEVAR (TV_TREE_TAIL_MERGE , "tree tail merge")
DEFTIMEVAR (TV_TREE_VRP , "tree VRP") DEFTIMEVAR (TV_TREE_VRP , "tree VRP")
DEFTIMEVAR (TV_TREE_COPY_PROP , "tree copy propagation") DEFTIMEVAR (TV_TREE_COPY_PROP , "tree copy propagation")
DEFTIMEVAR (TV_FIND_REFERENCED_VARS , "tree find ref. vars") DEFTIMEVAR (TV_FIND_REFERENCED_VARS , "tree find ref. vars")
......
...@@ -401,6 +401,7 @@ extern struct gimple_opt_pass pass_call_cdce; ...@@ -401,6 +401,7 @@ extern struct gimple_opt_pass pass_call_cdce;
extern struct gimple_opt_pass pass_merge_phi; extern struct gimple_opt_pass pass_merge_phi;
extern struct gimple_opt_pass pass_split_crit_edges; extern struct gimple_opt_pass pass_split_crit_edges;
extern struct gimple_opt_pass pass_pre; extern struct gimple_opt_pass pass_pre;
extern unsigned int tail_merge_optimize (unsigned int);
extern struct gimple_opt_pass pass_profile; extern struct gimple_opt_pass pass_profile;
extern struct gimple_opt_pass pass_strip_predict_hints; extern struct gimple_opt_pass pass_strip_predict_hints;
extern struct gimple_opt_pass pass_lower_complex_O0; extern struct gimple_opt_pass pass_lower_complex_O0;
......
...@@ -4915,7 +4915,6 @@ execute_pre (bool do_fre) ...@@ -4915,7 +4915,6 @@ execute_pre (bool do_fre)
statistics_counter_event (cfun, "Constified", pre_stats.constified); statistics_counter_event (cfun, "Constified", pre_stats.constified);
clear_expression_ids (); clear_expression_ids ();
free_scc_vn ();
if (!do_fre) if (!do_fre)
{ {
remove_dead_inserted_code (); remove_dead_inserted_code ();
...@@ -4925,6 +4924,17 @@ execute_pre (bool do_fre) ...@@ -4925,6 +4924,17 @@ execute_pre (bool do_fre)
scev_finalize (); scev_finalize ();
fini_pre (do_fre); fini_pre (do_fre);
if (!do_fre)
/* TODO: tail_merge_optimize may merge all predecessors of a block, in which
case we can merge the block with the remaining predecessor of the block.
It should either:
- call merge_blocks after each tail merge iteration
- call merge_blocks after all tail merge iterations
- mark TODO_cleanup_cfg when necessary
- share the cfg cleanup with fini_pre. */
todo |= tail_merge_optimize (todo);
free_scc_vn ();
return todo; return todo;
} }
......
...@@ -2820,19 +2820,6 @@ stmt_has_constants (gimple stmt) ...@@ -2820,19 +2820,6 @@ stmt_has_constants (gimple stmt)
return false; return false;
} }
/* Valueize NAME if it is an SSA name, otherwise just return it. */
static inline tree
vn_valueize (tree name)
{
if (TREE_CODE (name) == SSA_NAME)
{
tree tem = SSA_VAL (name);
return tem == VN_TOP ? name : tem;
}
return name;
}
/* Replace SSA_NAMES in expr with their value numbers, and return the /* Replace SSA_NAMES in expr with their value numbers, and return the
result. result.
This is performed in place. */ This is performed in place. */
......
...@@ -215,4 +215,18 @@ unsigned int get_constant_value_id (tree); ...@@ -215,4 +215,18 @@ unsigned int get_constant_value_id (tree);
unsigned int get_or_alloc_constant_value_id (tree); unsigned int get_or_alloc_constant_value_id (tree);
bool value_id_constant_p (unsigned int); bool value_id_constant_p (unsigned int);
tree fully_constant_vn_reference_p (vn_reference_t); tree fully_constant_vn_reference_p (vn_reference_t);
/* Valueize NAME if it is an SSA name, otherwise just return it. */
static inline tree
vn_valueize (tree name)
{
if (TREE_CODE (name) == SSA_NAME)
{
tree tem = VN_INFO (name)->valnum;
return tem == VN_TOP ? name : tem;
}
return name;
}
#endif /* TREE_SSA_SCCVN_H */ #endif /* TREE_SSA_SCCVN_H */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment