Commit 20769d5e by Sebastian Pop Committed by Sebastian Pop

Add -ftree-loop-distribute-patterns enabled at -O3.

2010-08-02  Sebastian Pop  <sebastian.pop@amd.com>

	* common.opt (ftree-loop-distribute-patterns): New.
	* invoke.texi (-ftree-loop-distribute-patterns): Documented.
	* opts.c (decode_options): Enable flag_tree_loop_distribute_patterns
	at -O3.
	* tree-data-ref.c (stores_zero_from_loop): New.
	* tree-data-ref.h (stores_zero_from_loop): Declared.
	* tree-loop-distribution.c (tree_loop_distribution): Call
	stores_zero_from_loop.
	(tree_loop_distribution): Check flag_tree_loop_distribute_patterns.

From-SVN: r162822
parent ef973f3f
2010-08-02 Sebastian Pop <sebastian.pop@amd.com>
* common.opt (ftree-loop-distribute-patterns): New.
* invoke.texi (-ftree-loop-distribute-patterns): Documented.
* opts.c (decode_options): Enable flag_tree_loop_distribute_patterns
at -O3.
* tree-data-ref.c (stores_zero_from_loop): New.
* tree-data-ref.h (stores_zero_from_loop): Declared.
* tree-loop-distribution.c (tree_loop_distribution): Call
stores_zero_from_loop.
(tree_loop_distribution): Check flag_tree_loop_distribute_patterns.
2010-08-02 Bernd Schmidt <bernds@codesourcery.com>
* postreload.c (reload_cse_simplify_operands): Take attribute enabled
......
......@@ -1333,6 +1333,10 @@ ftree-loop-distribution
Common Report Var(flag_tree_loop_distribution) Optimization
Enable loop distribution on trees
ftree-loop-distribute-patterns
Common Report Var(flag_tree_loop_distribute_patterns) Optimization
Enable loop distribution for patterns transformed into a library call
ftree-loop-im
Common Report Var(flag_tree_loop_im) Init(1) Optimization
Enable loop invariant motion on trees
......
......@@ -384,7 +384,7 @@ Objective-C and Objective-C++ Dialects}.
-ftree-builtin-call-dce -ftree-ccp -ftree-ch -ftree-copy-prop @gol
-ftree-copyrename -ftree-dce -ftree-dominator-opts -ftree-dse @gol
-ftree-forwprop -ftree-fre -ftree-loop-if-convert -ftree-loop-im @gol
-ftree-phiprop -ftree-loop-distribution @gol
-ftree-phiprop -ftree-loop-distribution -ftree-loop-distribute-patterns @gol
-ftree-loop-ivcanon -ftree-loop-linear -ftree-loop-optimize @gol
-ftree-parallelize-loops=@var{n} -ftree-pre -ftree-pta -ftree-reassoc @gol
-ftree-sink -ftree-sra -ftree-switch-conversion @gol
......@@ -6925,6 +6925,29 @@ DO I = 1, N
ENDDO
@end smallexample
@item -ftree-loop-distribute-patterns
Perform loop distribution of patterns that can be code generated with
calls to a library. This flag is enabled by default at @option{-O3}.
This pass distributes the initialization loops and generates a call to
memset zero. For example, the loop
@smallexample
DO I = 1, N
A(I) = 0
B(I) = A(I) + I
ENDDO
@end smallexample
is transformed to
@smallexample
DO I = 1, N
A(I) = 0
ENDDO
DO I = 1, N
B(I) = A(I) + I
ENDDO
@end smallexample
and the initialization loop is transformed into a call to memset zero.
@item -ftree-loop-im
@opindex ftree-loop-im
Perform loop invariant motion on trees. This pass moves only invariants that
......
......@@ -819,6 +819,7 @@ decode_options (unsigned int argc, const char **argv,
/* -O3 optimizations. */
opt3 = (optimize >= 3);
flag_tree_loop_distribute_patterns = opt3;
flag_predictive_commoning = opt3;
flag_inline_functions = opt3;
flag_unswitch_loops = opt3;
......
......@@ -5038,6 +5038,32 @@ stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
free (bbs);
}
/* Initialize STMTS with all the statements of LOOP that contain a
store to memory of the form "A[i] = 0". */
void
stores_zero_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
{
unsigned int i;
basic_block bb;
gimple_stmt_iterator si;
gimple stmt;
tree op;
basic_block *bbs = get_loop_body_in_dom_order (loop);
for (i = 0; i < loop->num_nodes; i++)
for (bb = bbs[i], si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
if ((stmt = gsi_stmt (si))
&& gimple_vdef (stmt)
&& is_gimple_assign (stmt)
&& gimple_assign_rhs_code (stmt) == INTEGER_CST
&& (op = gimple_assign_rhs1 (stmt))
&& (integer_zerop (op) || real_zerop (op)))
VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si));
free (bbs);
}
/* For a data reference REF, return the declaration of its base
address or NULL_TREE if the base is not determined. */
......
......@@ -564,6 +564,7 @@ index_in_loop_nest (int var, VEC (loop_p, heap) *loop_nest)
}
void stores_from_loop (struct loop *, VEC (gimple, heap) **);
void stores_zero_from_loop (struct loop *, VEC (gimple, heap) **);
void remove_similar_memory_refs (VEC (gimple, heap) **);
bool rdg_defs_used_in_other_loops_p (struct graph *, int);
bool have_similar_memory_accesses (gimple, gimple);
......
......@@ -1184,18 +1184,36 @@ tree_loop_distribution (void)
{
VEC (gimple, heap) *work_list = VEC_alloc (gimple, heap, 3);
/* With the following working list, we're asking distribute_loop
to separate the stores of the loop: when dependences allow,
it will end on having one store per loop. */
stores_from_loop (loop, &work_list);
/* A simple heuristic for cache locality is to not split stores
to the same array. Without this call, an unrolled loop would
be split into as many loops as unroll factor, each loop
storing in the same array. */
remove_similar_memory_refs (&work_list);
nb_generated_loops = distribute_loop (loop, work_list);
/* If both flag_tree_loop_distribute_patterns and
flag_tree_loop_distribution are set, then only
distribute_patterns is executed. */
if (flag_tree_loop_distribute_patterns)
{
/* With the following working list, we're asking
distribute_loop to separate from the rest of the loop the
stores of the form "A[i] = 0". */
stores_zero_from_loop (loop, &work_list);
/* Do nothing if there are no patterns to be distributed. */
if (VEC_length (gimple, work_list) > 0)
nb_generated_loops = distribute_loop (loop, work_list);
}
else if (flag_tree_loop_distribution)
{
/* With the following working list, we're asking
distribute_loop to separate the stores of the loop: when
dependences allow, it will end on having one store per
loop. */
stores_from_loop (loop, &work_list);
/* A simple heuristic for cache locality is to not split
stores to the same array. Without this call, an unrolled
loop would be split into as many loops as unroll factor,
each loop storing in the same array. */
remove_similar_memory_refs (&work_list);
nb_generated_loops = distribute_loop (loop, work_list);
}
if (dump_file && (dump_flags & TDF_DETAILS))
{
......@@ -1217,7 +1235,8 @@ tree_loop_distribution (void)
static bool
gate_tree_loop_distribution (void)
{
return flag_tree_loop_distribution != 0;
return flag_tree_loop_distribution
|| flag_tree_loop_distribute_patterns;
}
struct gimple_opt_pass pass_loop_distribution =
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment