Commit 98af4c9f by Sebastian Pop Committed by Sebastian Pop

New pass: loop flattening.

2010-09-09  Sebastian Pop  <sebastian.pop@amd.com>

	* Makefile.in (OBJS-common): Add graphite-flattening.o.
	(graphite-flattening.o): New rule.
	* common.opt (floop-flatten): New flag.
	* doc/invoke.texi (-floop-flatten): Documented.
	* graphite-flattening.c: New.
	* graphite-poly.c (apply_poly_transforms): Call flatten_all_loops.
	* graphite-poly.h (flatten_all_loops): Declared.
	(lst_remove_loop_and_inline_stmts_in_loop_father): New.
	* tree-ssa-loop.c (gate_graphite_transforms): When flag_loop_flatten
	is set, also set flag_graphite.

From-SVN: r164804
parent c498b9b9
2010-09-30 Sebastian Pop <sebastian.pop@amd.com>
* Makefile.in (OBJS-common): Add graphite-flattening.o.
(graphite-flattening.o): New rule.
* common.opt (floop-flatten): New flag.
* doc/invoke.texi (-floop-flatten): Documented.
* graphite-flattening.c: New.
* graphite-poly.c (apply_poly_transforms): Call flatten_all_loops.
* graphite-poly.h (flatten_all_loops): Declared.
(lst_remove_loop_and_inline_stmts_in_loop_father): New.
* tree-ssa-loop.c (gate_graphite_transforms): When flag_loop_flatten
is set, also set flag_graphite.
2010-09-30 Sebastian Pop <sebastian.pop@amd.com>
* graphite-poly.c (cloog_checksum): New.
* graphite-poly.h (cloog_checksum): Declared.
2010-09-09 Sebastian Pop <sebastian.pop@amd.com>
* Makefile.in (OBJS-common): Add graphite-flattening.o.
(graphite-flattening.o): New rule.
* common.opt (floop-flatten): New flag.
* doc/invoke.texi (-floop-flatten): Documented.
* graphite-flattening.c: New.
* graphite-poly.c (apply_poly_transforms): Call flatten_all_loops.
* graphite-poly.h (flatten_all_loops): Declared.
(lst_remove_loop_and_inline_stmts_in_loop_father): New.
* tree-ssa-loop.c (gate_graphite_transforms): When flag_loop_flatten
is set, also set flag_graphite.
2010-09-09 Sebastian Pop <sebastian.pop@amd.com>
* graphite-poly.c (cloog_checksum): New.
* graphite-poly.h (cloog_checksum): Declared.
......
......@@ -1244,6 +1244,7 @@ OBJS-common = \
graphite-clast-to-gimple.o \
graphite-cloog-util.o \
graphite-dependences.o \
graphite-flattening.o \
graphite-interchange.o \
graphite-poly.o \
graphite-ppl.o \
......@@ -2695,6 +2696,12 @@ graphite-dependences.o: graphite-dependences.c $(CONFIG_H) $(SYSTEM_H) \
$(TOPLEV_H) $(DIAGNOSTIC_CORE_H) $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) \
$(GIMPLE_H) $(TREE_DATA_REF_H) tree-pass.h domwalk.h \
graphite.h graphite-poly.h graphite-ppl.h graphite-dependences.h
graphite-flattening.o: graphite-flattening.c $(CONFIG_H) $(SYSTEM_H) \
coretypes.h $(TM_H) $(GGC_H) $(TREE_H) $(RTL_H) output.h \
$(BASIC_BLOCK_H) $(DIAGNOSTIC_H) $(TOPLEV_H) $(TREE_FLOW_H) \
$(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) $(GIMPLE_H) \
$(TREE_DATA_REF_H) tree-pass.h domwalk.h value-prof.h graphite.h \
graphite-poly.h graphite-ppl.h
graphite-interchange.o: graphite-interchange.c $(CONFIG_H) $(SYSTEM_H) \
coretypes.h \
$(TM_H) $(GGC_H) $(TREE_H) $(RTL_H) output.h $(BASIC_BLOCK_H) $(DIAGNOSTIC_H) \
......
......@@ -870,6 +870,10 @@ floop-block
Common Report Var(flag_loop_block) Optimization
Enable Loop Blocking transformation
floop-flatten
Common Report Var(flag_loop_flatten) Optimization
Enable Loop Flattening transformation
fstrict-volatile-bitfields
Common Report Var(flag_strict_volatile_bitfields) Init(-1)
Force bitfield accesses to match their type width
......
......@@ -352,7 +352,7 @@ Objective-C and Objective-C++ Dialects}.
-fira-loop-pressure -fno-ira-share-save-slots @gol
-fno-ira-share-spill-slots -fira-verbose=@var{n} @gol
-fivopts -fkeep-inline-functions -fkeep-static-consts @gol
-floop-block -floop-interchange -floop-strip-mine @gol
-floop-block -floop-flatten -floop-interchange -floop-strip-mine @gol
-floop-parallelize-all -flto -flto-compression-level -flto-report @gol
-fltrans -fltrans-output-list -fmerge-all-constants -fmerge-constants @gol
-fmodulo-sched -fmodulo-sched-allow-regmoves -fmove-loop-invariants @gol
......@@ -6798,6 +6798,7 @@ Perform linear loop transformations on tree. This flag can improve cache
performance and allow further loop optimizations to take place.
@item -floop-interchange
@opindex floop-interchange
Perform loop interchange transformations on loops. Interchanging two
nested loops switches the inner and outer loops. For example, given a
loop like:
......@@ -6826,6 +6827,7 @@ with @option{--with-ppl} and @option{--with-cloog} to enable the
Graphite loop transformation infrastructure.
@item -floop-strip-mine
@opindex floop-strip-mine
Perform loop strip mining transformations on loops. Strip mining
splits a loop into two nested loops. The outer loop has strides
equal to the strip size and the inner loop has strides of the
......@@ -6851,6 +6853,7 @@ be configured with @option{--with-ppl} and @option{--with-cloog} to
enable the Graphite loop transformation infrastructure.
@item -floop-block
@opindex floop-block
Perform loop blocking transformations on loops. Blocking strip mines
each loop in the loop nest such that the memory accesses of the
element loops fit inside caches. The strip length can be changed
......@@ -6892,7 +6895,14 @@ GIMPLE -> GRAPHITE -> GIMPLE transformation. Some minimal optimizations
are also performed by the code generator CLooG, like index splitting and
dead code elimination in loops.
@item -floop-flatten
@opindex floop-flatten
Removes the loop nesting structure: transforms the loop nest into a
single loop. This transformation can be useful to vectorize all the
levels of the loop nest.
@item -floop-parallelize-all
@opindex floop-parallelize-all
Use the Graphite data dependence analysis to identify loops that can
be parallelized. Parallelize all the loops that can be analyzed to
not contain loop carried dependences without checking that it is
......
......@@ -783,6 +783,9 @@ apply_poly_transforms (scop_p scop)
transform_done |= scop_do_interchange (scop);
}
if (flag_loop_flatten)
transform_done |= flatten_all_loops (scop);
/* This feature is only enabled in the Graphite branch. */
if (0)
{
......@@ -1688,7 +1691,8 @@ pbb_number_of_iterations_at_time (poly_bb_p pbb,
ppl_delete_Constraint_System (cs);
}
/* Compute the lower bound on the original iteration domain. */
/* Compute the lower bound on the original iteration domain and add
it to the scattering. */
ppl_new_Pointset_Powerset_C_Polyhedron_from_C_Polyhedron
(&sctr_lb, PBB_TRANSFORMED_SCATTERING (pbb));
for (i = 0; i < (int) domain_dim; i++)
......
......@@ -414,6 +414,7 @@ extern void debug_iteration_domains (scop_p, int);
extern bool scop_do_interchange (scop_p);
extern bool scop_do_strip_mine (scop_p);
extern bool scop_do_block (scop_p);
extern bool flatten_all_loops (scop_p);
extern void pbb_number_of_iterations_at_time (poly_bb_p, graphite_dim_t, mpz_t);
extern void pbb_remove_duplicate_pdrs (poly_bb_p);
......@@ -944,7 +945,7 @@ find_lst_loop (lst_p stmt, int loop_depth)
return loop;
}
/* Return the first lst representing a PBB statement in LST. */
/* Return the first LST representing a PBB statement in LST. */
static inline lst_p
lst_find_first_pbb (lst_p lst)
......@@ -968,7 +969,7 @@ lst_find_first_pbb (lst_p lst)
return NULL;
}
/* Returns true when LST is a loop that does not contains
/* Returns true when LST is a loop that does not contain
statements. */
static inline bool
......@@ -977,7 +978,7 @@ lst_empty_p (lst_p lst)
return !lst_find_first_pbb (lst);
}
/* Return the last lst representing a PBB statement in LST. */
/* Return the last LST representing a PBB statement in LST. */
static inline lst_p
lst_find_last_pbb (lst_p lst)
......@@ -1061,6 +1062,26 @@ lst_remove_from_sequence (lst_p lst)
LST_LOOP_FATHER (lst) = NULL;
}
/* Removes the loop LST and inline its body in the father loop. */
static inline void
lst_remove_loop_and_inline_stmts_in_loop_father (lst_p lst)
{
lst_p l, father = LST_LOOP_FATHER (lst);
int i, dewey = lst_dewey_number (lst);
gcc_assert (lst && father && dewey >= 0);
VEC_ordered_remove (lst_p, LST_SEQ (father), dewey);
LST_LOOP_FATHER (lst) = NULL;
FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, l)
{
VEC_safe_insert (lst_p, heap, LST_SEQ (father), dewey + i, l);
LST_LOOP_FATHER (l) = father;
}
}
/* Sets NITER to the upper bound approximation of the number of
iterations of loop LST. */
......
......@@ -303,8 +303,12 @@ gate_graphite_transforms (void)
{
/* Enable -fgraphite pass if any one of the graphite optimization flags
is turned on. */
if (flag_loop_block || flag_loop_interchange || flag_loop_strip_mine
|| flag_graphite_identity || flag_loop_parallelize_all)
if (flag_loop_block
|| flag_loop_interchange
|| flag_loop_strip_mine
|| flag_graphite_identity
|| flag_loop_parallelize_all
|| flag_loop_flatten)
flag_graphite = 1;
return flag_graphite != 0;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment