Commit 113d659a by Zdenek Dvorak Committed by Zdenek Dvorak

Makefile.in (loop-unroll.o): Add HASHTAB_H and RECOG_H dependency.

	* Makefile.in (loop-unroll.o): Add HASHTAB_H and RECOG_H dependency.
	* basic-block.h (struct reorder_block_def): Add copy_number field.
	* cfgloop.h (biv_p): Declare.
	* cfgloopmanip.c (duplicate_loop_to_header_edge): Set copy_number.
	* common.opt (fsplit-ivs-in-unroller): New flag.
	* loop-iv.c (biv_p): New function.
	* loop-unroll.c: Include hashtab.h and recog.h.
	(struct iv_to_split, struct split_ivs_info): New types.
	(analyze_ivs_to_split, si_info_start_duplication, split_ivs_in_copies,
	free_si_info, si_info_hash, si_info_eq, analyze_iv_to_split_insn,
	determine_split_iv_delta, get_ivts_expr, allocate_basic_variable,
	insert_base_initialization, split_iv): New functions.
	(peel_loop_completely, unroll_loop_constant_iterations,
	unroll_loop_runtime_iterations, peel_loop_simple, unroll_loop_stupid):
	Use them.
	* doc/invoke.texi (-fsplit-ivs-in-unroller): Document.

Co-Authored-By: Steven Bosscher <stevenb@suse.de>

From-SVN: r87487
parent d840495b
2004-09-14 Zdenek Dvorak <rakdver@atrey.karlin.mff.cuni.cz>
Steven Bosscher <stevenb@suse.de>
* Makefile.in (loop-unroll.o): Add HASHTAB_H and RECOG_H dependency.
* basic-block.h (struct reorder_block_def): Add copy_number field.
* cfgloop.h (biv_p): Declare.
* cfgloopmanip.c (duplicate_loop_to_header_edge): Set copy_number.
* common.opt (fsplit-ivs-in-unroller): New flag.
* loop-iv.c (biv_p): New function.
* loop-unroll.c: Include hashtab.h and recog.h.
(struct iv_to_split, struct split_ivs_info): New types.
(analyze_ivs_to_split, si_info_start_duplication, split_ivs_in_copies,
free_si_info, si_info_hash, si_info_eq, analyze_iv_to_split_insn,
determine_split_iv_delta, get_ivts_expr, allocate_basic_variable,
insert_base_initialization, split_iv): New functions.
(peel_loop_completely, unroll_loop_constant_iterations,
unroll_loop_runtime_iterations, peel_loop_simple, unroll_loop_stupid):
Use them.
* doc/invoke.texi (-fsplit-ivs-in-unroller): Document.
2004-09-14 Zdenek Dvorak <rakdver@atrey.karlin.mff.cuni.cz>
* tree-cfg.c (thread_jumps): Update dominators correctly in
case destination of threaded edge dominates its source.
......
......@@ -2028,7 +2028,7 @@ loop-unswitch.o : loop-unswitch.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(TM_H) \
output.h $(EXPR_H) coretypes.h $(TM_H)
loop-unroll.o: loop-unroll.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(TM_H) \
$(BASIC_BLOCK_H) hard-reg-set.h $(CFGLOOP_H) $(CFGLAYOUT_H) $(PARAMS_H) \
output.h $(EXPR_H) coretypes.h $(TM_H)
output.h $(EXPR_H) coretypes.h $(TM_H) $(HASHTAB_H) $(RECOG_H)
dominance.o : dominance.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
hard-reg-set.h $(BASIC_BLOCK_H) et-forest.h
et-forest.o : et-forest.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) et-forest.h alloc-pool.h
......
......@@ -295,6 +295,7 @@ typedef struct reorder_block_def
/* Used by loop copying. */
basic_block copy;
int duplicated;
int copy_number;
/* These fields are used by bb-reorder pass. */
int visited;
......
......@@ -410,6 +410,7 @@ extern void iv_analysis_loop_init (struct loop *);
extern rtx iv_get_reaching_def (rtx, rtx);
extern bool iv_analyze (rtx, rtx, struct rtx_iv *);
extern rtx get_iv_value (struct rtx_iv *, rtx);
extern bool biv_p (rtx, rtx);
extern void find_simple_exit (struct loop *, struct niter_desc *);
extern void iv_number_of_iterations (struct loop *, rtx, rtx,
struct niter_desc *);
......
......@@ -991,6 +991,9 @@ duplicate_loop_to_header_edge (struct loop *loop, edge e, struct loops *loops,
/* Copy bbs. */
copy_bbs (bbs, n, new_bbs, spec_edges, 2, new_spec_edges, loop);
for (i = 0; i < n; i++)
new_bbs[i]->rbi->copy_number = j + 1;
/* Note whether the blocks and edges belong to an irreducible loop. */
if (add_irreducible_flag)
{
......@@ -1069,6 +1072,8 @@ duplicate_loop_to_header_edge (struct loop *loop, edge e, struct loops *loops,
int n_dom_bbs,j;
bb = bbs[i];
bb->rbi->copy_number = 0;
n_dom_bbs = get_dominated_by (CDI_DOMINATORS, bb, &dom_bbs);
for (j = 0; j < n_dom_bbs; j++)
{
......
......@@ -744,6 +744,10 @@ fspeculative-prefetching
Common Report Var(flag_speculative_prefetching)
Use value profiling for speculative prefetching
fsplit-ivs-in-unroller
Common Report Var(flag_split_ivs_in_unroller) Init(1)
Split lifetimes of induction variables when loops are unrolled.
; Emit code to probe the stack, to help detect stack overflow; also
; may cause large objects to be allocated dynamically.
fstack-check
......
......@@ -315,7 +315,7 @@ Objective-C and Objective-C++ Dialects}.
-fsignaling-nans -fsingle-precision-constant -fspeculative-prefetching @gol
-fstrength-reduce -fstrict-aliasing -ftracer -fthread-jumps @gol
-funroll-all-loops -funroll-loops -fpeel-loops @gol
-funswitch-loops @gol
-fsplit-ivs-in-unroller -funswitch-loops @gol
-ftree-pre -ftree-ccp -ftree-dce -ftree-loop-optimize @gol
-ftree-loop-linear -ftree-loop-im -ftree-loop-ivcanon -fivopts @gol
-ftree-dominator-opts -ftree-dse -ftree-copyrename @gol
......@@ -4696,6 +4696,20 @@ the loop is entered. This usually makes programs run more slowly.
@option{-funroll-all-loops} implies the same options as
@option{-funroll-loops},
@item -fsplit-ivs-in-unroller
@opindex -fsplit-ivs-in-unroller
Enables expressing of values of induction variables in later iterations
of the unrolled loop using the value in the first iteration. This breaks
long dependency chains, thus improving efficiency of the scheduling passes
(for best results, @option{-fweb} should be used as well).
Combination of @option{-fweb} and CSE is often sufficient to obtain the
same effect. However in cases the loop body is more complicated than
a single basic block, this is not reliable. It also does not work at all
on some of the architectures due to restrictions in the CSE pass.
This optimization is enabled by default.
@item -fprefetch-loop-arrays
@opindex fprefetch-loop-arrays
If supported by the target machine, generate instructions to prefetch
......
......@@ -1183,6 +1183,24 @@ iv_analyze (rtx insn, rtx def, struct rtx_iv *iv)
return iv->base != NULL_RTX;
}
/* Checks whether definition of register REG in INSN a basic induction
variable. IV analysis must have been initialized (via a call to
iv_analysis_loop_init) for this function to produce a result. */
bool
biv_p (rtx insn, rtx reg)
{
struct rtx_iv iv;
if (!REG_P (reg))
return false;
if (last_def[REGNO (reg)] != insn)
return false;
return iv_analyze_biv (reg, &iv);
}
/* Calculates value of IV at ITERATION-th iteration. */
rtx
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment