Commit aa634f11 by Josef Zlomek Committed by Josef Zlomek

Makefile.in (bb-reorder.o): Add dependency on $(FIBHEAP_H).

	* Makefile.in (bb-reorder.o): Add dependency on $(FIBHEAP_H).
	* bb-reorder.c (make_reorder_chain): Deleted.
	(make_reorder_chain_1): Deleted.
	(find_traces): New function.
	(rotate_loop): New function.
	(mark_bb_visited): New function.
	(find_traces_1_round): New function.
	(copy_bb): New function.
	(bb_to_key): New function.
	(better_edge_p): New function.
	(connect_traces): New function.
	(copy_bb_p): New function.
	(get_uncond_jump_length): New function.
	(reorder_basic_blocks): Use new functions (Software Trace Cache).
	* cfgcleanup.c (outgoing_edges_match): Enable crossjumping across loop
	boundaries.

From-SVN: r62645
parent 17edbda5
2003-02-10 Josef Zlomek <zlomekj@suse.cz>
* Makefile.in (bb-reorder.o): Add dependency on $(FIBHEAP_H).
* bb-reorder.c (make_reorder_chain): Deleted.
(make_reorder_chain_1): Deleted.
(find_traces): New function.
(rotate_loop): New function.
(mark_bb_visited): New function.
(find_traces_1_round): New function.
(copy_bb): New function.
(bb_to_key): New function.
(better_edge_p): New function.
(connect_traces): New function.
(copy_bb_p): New function.
(get_uncond_jump_length): New function.
(reorder_basic_blocks): Use new functions (Software Trace Cache).
* cfgcleanup.c (outgoing_edges_match): Enable crossjumping across loop
boundaries.
2003-02-10 Aldy Hernandez <aldyh@redhat.com> 2003-02-10 Aldy Hernandez <aldyh@redhat.com>
* config/rs6000/rs6000.c (bdesc_2arg): Change spe_evxor to xorv2si3. * config/rs6000/rs6000.c (bdesc_2arg): Change spe_evxor to xorv2si3.
......
...@@ -1691,8 +1691,9 @@ predict.o: predict.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) $(TREE ...@@ -1691,8 +1691,9 @@ predict.o: predict.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) $(TREE
$(RECOG_H) function.h except.h $(EXPR_H) $(TM_P_H) $(PREDICT_H) sreal.h \ $(RECOG_H) function.h except.h $(EXPR_H) $(TM_P_H) $(PREDICT_H) sreal.h \
$(PARAMS_H) $(TARGET_H) cfgloop.h $(PARAMS_H) $(TARGET_H) cfgloop.h
lists.o: lists.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) toplev.h $(RTL_H) $(GGC_H) lists.o: lists.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) toplev.h $(RTL_H) $(GGC_H)
bb-reorder.o : bb-reorder.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \ bb-reorder.o : bb-reorder.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
$(TREE_H) flags.h $(BASIC_BLOCK_H) hard-reg-set.h output.h cfglayout.h $(TARGET_H) $(RTL_H) $(BASIC_BLOCK_H) flags.h output.h cfglayout.h $(FIBHEAP_H) \
$(TARGET_H)
tracer.o : tracer.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) $(TREE_H) \ tracer.o : tracer.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) $(TREE_H) \
$(BASIC_BLOCK_H) hard-reg-set.h output.h cfglayout.h flags.h \ $(BASIC_BLOCK_H) hard-reg-set.h output.h cfglayout.h flags.h \
$(PARAMS_H) profile.h $(PARAMS_H) profile.h
......
/* Basic block reordering routines for the GNU compiler. /* Basic block reordering routines for the GNU compiler.
Copyright (C) 2000, 2002 Free Software Foundation, Inc. Copyright (C) 2000, 2002, 2003 Free Software Foundation, Inc.
This file is part of GCC. This file is part of GCC.
...@@ -18,243 +18,1037 @@ ...@@ -18,243 +18,1037 @@
Software Foundation, 59 Temple Place - Suite 330, Boston, MA Software Foundation, 59 Temple Place - Suite 330, Boston, MA
02111-1307, USA. */ 02111-1307, USA. */
/* References: /* This (greedy) algorithm constructs traces in several rounds.
The construction starts from "seeds". The seed for the first round
"Profile Guided Code Positioning" is the entry point of function. When there are more than one seed
Pettis and Hanson; PLDI '90. that one is selected first that has the lowest key in the heap
(see function bb_to_key). Then the algorithm repeatedly adds the most
TODO: probable successor to the end of a trace. Finally it connects the traces.
(1) Consider: There are two parameters: Branch Threshold and Exec Threshold.
If the edge to a successor of the actual basic block is lower than
if (p) goto A; // predict taken Branch Threshold or the frequency of the successor is lower than
foo (); Exec Threshold the successor will be the seed in one of the next rounds.
A: Each round has these parameters lower than the previous one.
if (q) goto B; // predict taken The last round has to have these parameters set to zero
bar (); so that the remaining blocks are picked up.
B:
baz (); The algorithm selects the most probable successor from all unvisited
return; successors and successors that have been added to this trace.
The other successors (that has not been "sent" to the next round) will be
We'll currently reorder this as other seeds for this round and the secondary traces will start in them.
If the successor has not been visited in this trace it is added to the trace
if (!p) goto C; (however, there is some heuristic for simple branches).
A: If the successor has been visited in this trace the loop has been found.
if (!q) goto D; If the loop has many iterations the loop is rotated so that the
B: source block of the most probable edge going out from the loop
baz (); is the last block of the trace.
return; If the loop has few iterations and there is no edge from the last block of
D: the loop going out from loop the loop header is duplicated.
bar (); Finally, the construction of the trace is terminated.
goto B;
C: When connecting traces it first checks whether there is an edge from the
foo (); last block of one trace to the first block of another trace.
goto A; When there are still some unconnected traces it checks whether there exists
a basic block BB such that BB is a successor of the last bb of one trace
A better ordering is and BB is a predecessor of the first block of another trace. In this case,
BB is duplicated and the traces are connected through this duplicate.
if (!p) goto C; The rest of traces are simply connected so there will be a jump to the
if (!q) goto D; beginning of the rest of trace.
B:
baz ();
return; References:
C:
foo (); "Software Trace Cache"
if (q) goto B; A. Ramirez, J. Larriba-Pey, C. Navarro, J. Torrellas and M. Valero; 1999
D: http://citeseer.nj.nec.com/15361.html
bar ();
goto B;
This requires that we be able to duplicate the jump at A, and
adjust the graph traversal such that greedy placement doesn't
fix D before C is considered.
(2) Coordinate with shorten_branches to minimize the number of
long branches.
(3) Invent a method by which sufficiently non-predicted code can
be moved to either the end of the section or another section
entirely. Some sort of NOTE_INSN note would work fine.
This completely scroggs all debugging formats, so the user
would have to explicitly ask for it.
*/ */
#include "config.h" #include "config.h"
#include "system.h" #include "system.h"
#include "coretypes.h" #include "coretypes.h"
#include "tm.h" #include "tm.h"
#include "tree.h"
#include "rtl.h" #include "rtl.h"
#include "hard-reg-set.h"
#include "basic-block.h" #include "basic-block.h"
#include "flags.h" #include "flags.h"
#include "output.h" #include "output.h"
#include "cfglayout.h" #include "cfglayout.h"
#include "fibheap.h"
#include "target.h" #include "target.h"
/* The number of rounds. */
#define N_ROUNDS 4
/* Branch thresholds in thousandths (per mille) of the REG_BR_PROB_BASE. */
static int branch_threshold[N_ROUNDS] = {400, 200, 100, 0};
/* Exec thresholds in thousandths (per mille) of the frequency of bb 0. */
static int exec_threshold[N_ROUNDS] = {500, 200, 50, 0};
/* If edge frequency is lower than DUPLICATION_THRESHOLD per mille of entry
block the edge destination is not duplicated while connecting traces. */
#define DUPLICATION_THRESHOLD 100
/* Length of unconditional jump instruction. */
static int uncond_jump_length;
/* Structure to hold needed information for each basic block. */
typedef struct bbro_basic_block_data_def
{
/* Which trace is the bb start of (-1 means it is not a start of a trace). */
int start_of_trace;
/* Which trace is the bb end of (-1 means it is not an end of a trace). */
int end_of_trace;
/* Which heap is BB in (if any)? */
fibheap_t heap;
/* Which heap node is BB in (if any)? */
fibnode_t node;
} bbro_basic_block_data;
/* The current size of the following dynamic array. */
static int array_size;
/* The array which holds needed information for basic blocks. */
static bbro_basic_block_data *bbd;
/* To avoid frequent reallocation the size of arrays is greater than needed,
the number of elements is (not less than) 1.25 * size_wanted. */
#define GET_ARRAY_SIZE(X) ((((X) / 4) + 1) * 5)
/* Free the memory and set the pointer to NULL. */
#define FREE(P) \
do { if (P) { free (P); P = 0; } else { abort (); } } while (0)
/* Structure for holding information about a trace. */
struct trace
{
/* First and last basic block of the trace. */
basic_block first, last;
/* The round of the STC creation which this trace was found in. */
int round;
/* The length (i.e. the number of basic blocks) of the trace. */
int length;
};
/* Maximum frequency and count of one of the entry blocks. */
int max_entry_frequency;
gcov_type max_entry_count;
/* Local function prototypes. */ /* Local function prototypes. */
static void make_reorder_chain PARAMS ((void)); static void find_traces PARAMS ((int *, struct trace *));
static basic_block make_reorder_chain_1 PARAMS ((basic_block, basic_block)); static basic_block rotate_loop PARAMS ((edge, struct trace *, int));
static void mark_bb_visited PARAMS ((basic_block, int));
static void find_traces_1_round PARAMS ((int, int, gcov_type,
struct trace *, int *, int,
fibheap_t *));
static basic_block copy_bb PARAMS ((basic_block, edge,
basic_block, int));
static fibheapkey_t bb_to_key PARAMS ((basic_block));
static bool better_edge_p PARAMS ((basic_block, edge, int, int,
int, int));
static void connect_traces PARAMS ((int, struct trace *));
static bool copy_bb_p PARAMS ((basic_block, int));
static int get_uncond_jump_length PARAMS ((void));
/* Compute an ordering for a subgraph beginning with block BB. Record the /* Find the traces for Software Trace Cache. Chain each trace through
ordering in RBI()->index and chained through RBI()->next. */ RBI()->next. Store the number of traces to N_TRACES and description of
traces to TRACES. */
static void static void
make_reorder_chain () find_traces (n_traces, traces)
int *n_traces;
struct trace *traces;
{ {
basic_block prev = NULL; int i;
basic_block next, bb; edge e;
fibheap_t heap;
/* Insert entry points of function into heap. */
heap = fibheap_new ();
max_entry_frequency = 0;
max_entry_count = 0;
for (e = ENTRY_BLOCK_PTR->succ; e; e = e->succ_next)
{
bbd[e->dest->index].heap = heap;
bbd[e->dest->index].node = fibheap_insert (heap, bb_to_key (e->dest),
e->dest);
if (e->dest->frequency > max_entry_frequency)
max_entry_frequency = e->dest->frequency;
if (e->dest->count > max_entry_count)
max_entry_count = e->dest->count;
}
/* Find the traces. */
for (i = 0; i < N_ROUNDS; i++)
{
gcov_type count_threshold;
/* Loop until we've placed every block. */ if (rtl_dump_file)
fprintf (rtl_dump_file, "STC - round %d\n", i + 1);
if (max_entry_count < INT_MAX / 1000)
count_threshold = max_entry_count * exec_threshold[i] / 1000;
else
count_threshold = max_entry_count / 1000 * exec_threshold[i];
find_traces_1_round (REG_BR_PROB_BASE * branch_threshold[i] / 1000,
max_entry_frequency * exec_threshold[i] / 1000,
count_threshold, traces, n_traces, i, &heap);
}
fibheap_delete (heap);
if (rtl_dump_file)
{
for (i = 0; i < *n_traces; i++)
{
basic_block bb;
fprintf (rtl_dump_file, "Trace %d (round %d): ", i + 1,
traces[i].round + 1);
for (bb = traces[i].first; bb != traces[i].last; bb = RBI (bb)->next)
fprintf (rtl_dump_file, "%d [%d] ", bb->index, bb->frequency);
fprintf (rtl_dump_file, "%d [%d]\n", bb->index, bb->frequency);
}
fflush (rtl_dump_file);
}
}
/* Rotate loop whose back edge is BACK_EDGE in the tail of trace TRACE
(with sequential number TRACE_N). */
static basic_block
rotate_loop (back_edge, trace, trace_n)
edge back_edge;
struct trace *trace;
int trace_n;
{
basic_block bb;
/* Information about the best end (end after rotation) of the loop. */
basic_block best_bb = NULL;
edge best_edge = NULL;
int best_freq = -1;
gcov_type best_count = -1;
/* The best edge is preferred when its destination is not visited yet
or is a start block of some trace. */
bool is_preferred = false;
/* Find the most frequent edge that goes out from current trace. */
bb = back_edge->dest;
do do
{ {
next = NULL; edge e;
for (e = bb->succ; e; e = e->succ_next)
/* Find the next unplaced block. */ if (e->dest != EXIT_BLOCK_PTR
/* ??? Get rid of this loop, and track which blocks are not yet && RBI (e->dest)->visited != trace_n
placed more directly, so as to avoid the O(N^2) worst case. && (e->flags & EDGE_CAN_FALLTHRU)
Perhaps keep a doubly-linked list of all to-be-placed blocks; && !(e->flags & EDGE_COMPLEX))
remove from the list as we place. The head of that list is {
what we're looking for here. */ if (is_preferred)
{
FOR_EACH_BB (bb) /* The best edge is preferred. */
if (! RBI (bb)->visited) if (!RBI (e->dest)->visited
{ || bbd[e->dest->index].start_of_trace >= 0)
next = bb; {
break; /* The current edge E is also preferred. */
} int freq = EDGE_FREQUENCY (e);
if (freq > best_freq || e->count > best_count)
{
best_freq = freq;
best_count = e->count;
best_edge = e;
best_bb = bb;
}
}
}
else
{
if (!RBI (e->dest)->visited
|| bbd[e->dest->index].start_of_trace >= 0)
{
/* The current edge E is preferred. */
is_preferred = true;
best_freq = EDGE_FREQUENCY (e);
best_count = e->count;
best_edge = e;
best_bb = bb;
}
else
{
int freq = EDGE_FREQUENCY (e);
if (!best_edge || freq > best_freq || e->count > best_count)
{
best_freq = freq;
best_count = e->count;
best_edge = e;
best_bb = bb;
}
}
}
}
bb = RBI (bb)->next;
}
while (bb != back_edge->dest);
if (best_bb)
{
/* Rotate the loop so that the BEST_EDGE goes out from the last block of
the trace. */
if (back_edge->dest == trace->first)
{
trace->first = RBI (best_bb)->next;
}
else
{
basic_block prev_bb;
if (next) for (prev_bb = trace->first;
prev = make_reorder_chain_1 (next, prev); RBI (prev_bb)->next != back_edge->dest;
prev_bb = RBI (prev_bb)->next)
;
RBI (prev_bb)->next = RBI (best_bb)->next;
/* Try to get rid of uncond jump to cond jump. */
if (prev_bb->succ && !prev_bb->succ->succ_next)
{
basic_block header = prev_bb->succ->dest;
/* Duplicate HEADER if it is a small block containing cond jump
in the end. */
if (any_condjump_p (header->end) && copy_bb_p (header, 0))
{
copy_bb (header, prev_bb->succ, prev_bb, trace_n);
}
}
}
}
else
{
/* We have not found suitable loop tail so do no rotation. */
best_bb = back_edge->src;
} }
while (next); RBI (best_bb)->next = NULL;
RBI (prev)->next = NULL; return best_bb;
} }
/* A helper function for make_reorder_chain. /* This function marks BB that it was visited in trace number TRACE. */
static void
mark_bb_visited (bb, trace)
basic_block bb;
int trace;
{
RBI (bb)->visited = trace;
if (bbd[bb->index].heap)
{
fibheap_delete_node (bbd[bb->index].heap, bbd[bb->index].node);
bbd[bb->index].heap = NULL;
bbd[bb->index].node = NULL;
}
}
/* One round of finding traces. Find traces for BRANCH_TH and EXEC_TH i.e. do
not include basic blocks their probability is lower than BRANCH_TH or their
frequency is lower than EXEC_TH into traces (or count is lower than
COUNT_TH). It stores the new traces into TRACES and modifies the number of
traces *N_TRACES. Sets the round (which the trace belongs to) to ROUND. It
expects that starting basic blocks are in *HEAP and at the end it deletes
*HEAP and stores starting points for the next round into new *HEAP. */
static void
find_traces_1_round (branch_th, exec_th, count_th, traces, n_traces, round,
heap)
int branch_th;
int exec_th;
gcov_type count_th;
struct trace *traces;
int *n_traces;
int round;
fibheap_t *heap;
{
/* Heap for discarded basic blocks which are possible starting points for
the next round. */
fibheap_t new_heap = fibheap_new ();
while (!fibheap_empty (*heap))
{
basic_block bb;
struct trace *trace;
edge best_edge, e;
fibheapkey_t key;
bb = fibheap_extract_min (*heap);
bbd[bb->index].heap = NULL;
bbd[bb->index].node = NULL;
if (rtl_dump_file)
fprintf (rtl_dump_file, "Getting bb %d\n", bb->index);
/* If the BB's frequency is too low send BB to the next round. */
if (bb->frequency < exec_th || bb->count < count_th
|| ((round < N_ROUNDS - 1) && probably_never_executed_bb_p (bb)))
{
int key = bb_to_key (bb);
bbd[bb->index].heap = new_heap;
bbd[bb->index].node = fibheap_insert (new_heap, key, bb);
if (rtl_dump_file)
fprintf (rtl_dump_file,
" Possible start point of next round: %d (key: %d)\n",
bb->index, key);
continue;
}
trace = traces + *n_traces;
trace->first = bb;
trace->round = round;
trace->length = 0;
(*n_traces)++;
do
{
int prob, freq;
/* The probability and frequency of the best edge. */
int best_prob = INT_MIN / 2;
int best_freq = INT_MIN / 2;
best_edge = NULL;
mark_bb_visited (bb, *n_traces);
trace->length++;
if (rtl_dump_file)
fprintf (rtl_dump_file, "Basic block %d was visited in trace %d\n",
bb->index, *n_traces - 1);
/* Select the successor that will be placed after BB. */
for (e = bb->succ; e; e = e->succ_next)
{
if (e->flags & EDGE_FAKE)
abort ();
if (e->dest == EXIT_BLOCK_PTR)
continue;
if (RBI (e->dest)->visited
&& RBI (e->dest)->visited != *n_traces)
continue;
prob = e->probability;
freq = EDGE_FREQUENCY (e);
/* Edge that cannot be fallthru or improbable or infrequent
successor (ie. it is unsuitable successor). */
if (!(e->flags & EDGE_CAN_FALLTHRU) || (e->flags & EDGE_COMPLEX)
|| prob < branch_th || freq < exec_th || e->count < count_th)
continue;
if (better_edge_p (bb, e, prob, freq, best_prob, best_freq))
{
best_edge = e;
best_prob = prob;
best_freq = freq;
}
}
/* Add all non-selected successors to the heaps. */
for (e = bb->succ; e; e = e->succ_next)
{
if (e == best_edge
|| e->dest == EXIT_BLOCK_PTR
|| RBI (e->dest)->visited)
continue;
key = bb_to_key (e->dest);
if (bbd[e->dest->index].heap)
{
/* E->DEST is already in some heap. */
if (key != bbd[e->dest->index].node->key)
{
if (rtl_dump_file)
{
fprintf (rtl_dump_file,
"Changing key for bb %d from %ld to %ld.\n",
e->dest->index,
(long) bbd[e->dest->index].node->key,
key);
}
fibheap_replace_key (bbd[e->dest->index].heap,
bbd[e->dest->index].node, key);
}
}
else
{
fibheap_t which_heap = *heap;
prob = e->probability;
freq = EDGE_FREQUENCY (e);
if (!(e->flags & EDGE_CAN_FALLTHRU)
|| (e->flags & EDGE_COMPLEX)
|| prob < branch_th || freq < exec_th
|| e->count < count_th)
{
if (round < N_ROUNDS - 1)
which_heap = new_heap;
}
We do not follow EH edges, or non-fallthru edges to noreturn blocks. bbd[e->dest->index].heap = which_heap;
These are assumed to be the error condition and we wish to cluster bbd[e->dest->index].node = fibheap_insert (which_heap,
all of them at the very end of the function for the benefit of cache key, e->dest);
locality for the rest of the function.
??? We could do slightly better by noticing earlier that some subgraph if (rtl_dump_file)
has all paths leading to noreturn functions, but for there to be more {
than one block in such a subgraph is rare. */ fprintf (rtl_dump_file,
" Possible start of %s round: %d (key: %ld)\n",
(which_heap == new_heap) ? "next" : "this",
e->dest->index, (long) key);
}
}
}
if (best_edge) /* Suitable successor was found. */
{
if (RBI (best_edge->dest)->visited == *n_traces)
{
/* We do nothing with one basic block loops. */
if (best_edge->dest != bb)
{
if (EDGE_FREQUENCY (best_edge)
> 4 * best_edge->dest->frequency / 5)
{
/* The loop has at least 4 iterations. If the loop
header is not the first block of the function
we can rotate the loop. */
if (best_edge->dest != ENTRY_BLOCK_PTR->next_bb)
{
if (rtl_dump_file)
{
fprintf (rtl_dump_file,
"Rotating loop %d - %d\n",
best_edge->dest->index, bb->index);
}
RBI (bb)->next = best_edge->dest;
bb = rotate_loop (best_edge, trace, *n_traces);
}
}
else
{
/* The loop has less than 4 iterations. */
/* Check whether there is another edge from BB. */
edge another_edge;
for (another_edge = bb->succ;
another_edge;
another_edge = another_edge->succ_next)
if (another_edge != best_edge)
break;
if (!another_edge && copy_bb_p (best_edge->dest,
!optimize_size))
{
bb = copy_bb (best_edge->dest, best_edge, bb,
*n_traces);
}
}
}
/* Terminate the trace. */
break;
}
else
{
/* Check for a situation
A
/|
B |
\|
C
where
EDGE_FREQUENCY (AB) + EDGE_FREQUENCY (BC)
>= EDGE_FREQUENCY (AC).
(i.e. 2 * B->frequency >= EDGE_FREQUENCY (AC) )
Best ordering is then A B C.
This situation is created for example by:
if (A) B;
C;
*/
for (e = bb->succ; e; e = e->succ_next)
if (e != best_edge
&& (e->flags & EDGE_CAN_FALLTHRU)
&& !(e->flags & EDGE_COMPLEX)
&& !RBI (e->dest)->visited
&& !e->dest->pred->pred_next
&& e->dest->succ
&& (e->dest->succ->flags & EDGE_CAN_FALLTHRU)
&& !(e->dest->succ->flags & EDGE_COMPLEX)
&& !e->dest->succ->succ_next
&& e->dest->succ->dest == best_edge->dest
&& 2 * e->dest->frequency >= EDGE_FREQUENCY (best_edge))
{
best_edge = e;
if (rtl_dump_file)
fprintf (rtl_dump_file, "Selecting BB %d\n",
best_edge->dest->index);
break;
}
RBI (bb)->next = best_edge->dest;
bb = best_edge->dest;
}
}
}
while (best_edge);
trace->last = bb;
bbd[trace->first->index].start_of_trace = *n_traces - 1;
bbd[trace->last->index].end_of_trace = *n_traces - 1;
/* The trace is terminated so we have to recount the keys in heap
(some block can have a lower key because now one of its predecessors
is an end of the trace). */
for (e = bb->succ; e; e = e->succ_next)
{
if (e->dest == EXIT_BLOCK_PTR
|| RBI (e->dest)->visited)
continue;
if (bbd[e->dest->index].heap)
{
key = bb_to_key (e->dest);
if (key != bbd[e->dest->index].node->key)
{
if (rtl_dump_file)
{
fprintf (rtl_dump_file,
"Changing key for bb %d from %ld to %ld.\n",
e->dest->index,
(long) bbd[e->dest->index].node->key, key);
}
fibheap_replace_key (bbd[e->dest->index].heap,
bbd[e->dest->index].node,
key);
}
}
}
}
fibheap_delete (*heap);
/* "Return" the new heap. */
*heap = new_heap;
}
/* Create a duplicate of the basic block OLD_BB and redirect edge E to it, add
it to trace after BB, mark OLD_BB visited and update pass' data structures
(TRACE is a number of trace which OLD_BB is duplicated to). */
static basic_block static basic_block
make_reorder_chain_1 (bb, prev) copy_bb (old_bb, e, bb, trace)
basic_block old_bb;
edge e;
basic_block bb; basic_block bb;
basic_block prev; int trace;
{ {
edge e; basic_block new_bb;
basic_block next;
rtx note; new_bb = cfg_layout_duplicate_bb (old_bb, e);
if (e->dest != new_bb)
abort ();
if (RBI (e->dest)->visited)
abort ();
if (rtl_dump_file)
fprintf (rtl_dump_file,
"Duplicated bb %d (created bb %d)\n",
old_bb->index, new_bb->index);
RBI (new_bb)->visited = trace;
RBI (new_bb)->next = RBI (bb)->next;
RBI (bb)->next = new_bb;
/* Mark this block visited. */ if (new_bb->index >= array_size || last_basic_block > array_size)
if (prev)
{ {
restart: int i;
RBI (prev)->next = bb; int new_size;
new_size = MAX (last_basic_block, new_bb->index + 1);
new_size = GET_ARRAY_SIZE (new_size);
bbd = xrealloc (bbd, new_size * sizeof (bbro_basic_block_data));
for (i = array_size; i < new_size; i++)
{
bbd[i].start_of_trace = -1;
bbd[i].end_of_trace = -1;
bbd[i].heap = NULL;
bbd[i].node = NULL;
}
array_size = new_size;
if (rtl_dump_file && prev->next_bb != bb) if (rtl_dump_file)
fprintf (rtl_dump_file, "Reordering block %d after %d\n", {
bb->index, prev->index); fprintf (rtl_dump_file,
"Growing the dynamic array to %d elements.\n",
array_size);
}
} }
else
return new_bb;
}
/* Compute and return the key (for the heap) of the basic block BB. */
static fibheapkey_t
bb_to_key (bb)
basic_block bb;
{
edge e;
int priority = 0;
/* Do not start in probably never executed blocks. */
if (probably_never_executed_bb_p (bb))
return BB_FREQ_MAX;
/* Prefer blocks whose predecessor is an end of some trace
or whose predecessor edge is EDGE_DFS_BACK. */
for (e = bb->pred; e; e = e->pred_next)
{ {
if (bb->prev_bb != ENTRY_BLOCK_PTR) if ((e->src != ENTRY_BLOCK_PTR && bbd[e->src->index].end_of_trace >= 0)
abort (); || (e->flags & EDGE_DFS_BACK))
{
int edge_freq = EDGE_FREQUENCY (e);
if (edge_freq > priority)
priority = edge_freq;
}
} }
RBI (bb)->visited = 1;
prev = bb;
if (bb->succ == NULL) if (priority)
return prev; /* The block with priority should have significantly lower key. */
return -(100 * BB_FREQ_MAX + 100 * priority + bb->frequency);
return -bb->frequency;
}
/* Return true when the edge E from basic block BB is better than the temporary
best edge (details are in function). The probability of edge E is PROB. The
frequency of the successor is FREQ. The current best probability is
BEST_PROB, the best frequency is BEST_FREQ.
The edge is considered to be equivalent when PROB does not differ much from
BEST_PROB; similarly for frequency. */
static bool
better_edge_p (bb, e, prob, freq, best_prob, best_freq)
basic_block bb;
edge e;
int prob;
int freq;
int best_prob;
int best_freq;
{
bool is_better_edge;
/* Find the most probable block. */ /* The BEST_* values do not have to be best, but can be a bit smaller than
maximum values. */
int diff_prob = best_prob / 10;
int diff_freq = best_freq / 10;
next = NULL; if (prob > best_prob + diff_prob)
if (any_condjump_p (bb->end) /* The edge has higher probability than the temporary best edge. */
&& (note = find_reg_note (bb->end, REG_BR_PROB, 0)) != NULL) is_better_edge = true;
else if (prob < best_prob - diff_prob)
/* The edge has lower probability than the temporary best edge. */
is_better_edge = false;
else if (freq < best_freq - diff_freq)
/* The edge and the temporary best edge have almost equivalent
probabilities. The higher frequency of a successor now means
that there is another edge going into that successor.
This successor has lower frequency so it is better. */
is_better_edge = true;
else if (freq > best_freq + diff_freq)
/* This successor has higher frequency so it is worse. */
is_better_edge = false;
else if (e->dest->prev_bb == bb)
/* The edges have equivalent probabilities and the successors
have equivalent frequencies. Select the previous successor. */
is_better_edge = true;
else
is_better_edge = false;
return is_better_edge;
}
/* Connect traces in array TRACES, N_TRACES is the count of traces. */
static void
connect_traces (n_traces, traces)
int n_traces;
struct trace *traces;
{
int i;
bool *connected;
int last_trace;
int freq_threshold;
gcov_type count_threshold;
freq_threshold = max_entry_frequency * DUPLICATION_THRESHOLD / 1000;
if (max_entry_count < INT_MAX / 1000)
count_threshold = max_entry_count * DUPLICATION_THRESHOLD / 1000;
else
count_threshold = max_entry_count / 1000 * DUPLICATION_THRESHOLD;
connected = xcalloc (n_traces, sizeof (bool));
last_trace = -1;
for (i = 0; i < n_traces; i++)
{ {
int taken, probability; int t = i;
edge e_taken, e_fall; int t2;
edge e, best;
int best_len;
probability = INTVAL (XEXP (note, 0)); if (connected[t])
taken = probability > REG_BR_PROB_BASE / 2; continue;
/* Find the normal taken edge and the normal fallthru edge. connected[t] = true;
Note, conditional jumps with other side effects may not /* Find the predecessor traces. */
be fully optimized. In this case it is possible for for (t2 = t; t2 > 0;)
the conditional jump to branch to the same location as {
the fallthru path. best = NULL;
best_len = 0;
for (e = traces[t2].first->pred; e; e = e->pred_next)
{
int si = e->src->index;
We should probably work to improve optimization of that if (e->src != ENTRY_BLOCK_PTR
case; however, it seems silly not to also deal with such && (e->flags & EDGE_CAN_FALLTHRU)
problems here if they happen to occur. */ && !(e->flags & EDGE_COMPLEX)
&& bbd[si].end_of_trace >= 0
&& !connected[bbd[si].end_of_trace]
&& (!best
|| e->probability > best->probability
|| (e->probability == best->probability
&& traces[bbd[si].end_of_trace].length > best_len)))
{
best = e;
best_len = traces[bbd[si].end_of_trace].length;
}
}
if (best)
{
RBI (best->src)->next = best->dest;
t2 = bbd[best->src->index].end_of_trace;
connected[t2] = true;
if (rtl_dump_file)
{
fprintf (rtl_dump_file, "Connection: %d %d\n",
best->src->index, best->dest->index);
}
}
else
break;
}
e_taken = e_fall = NULL; if (last_trace >= 0)
for (e = bb->succ; e ; e = e->succ_next) RBI (traces[last_trace].last)->next = traces[t2].first;
last_trace = t;
/* Find the successor traces. */
while (1)
{ {
if (e->flags & EDGE_FALLTHRU) /* Find the continuation of the chain. */
e_fall = e; best = NULL;
else if (! (e->flags & EDGE_EH)) best_len = 0;
e_taken = e; for (e = traces[t].last->succ; e; e = e->succ_next)
{
int di = e->dest->index;
if (e->dest != EXIT_BLOCK_PTR
&& (e->flags & EDGE_CAN_FALLTHRU)
&& !(e->flags & EDGE_COMPLEX)
&& bbd[di].start_of_trace >= 0
&& !connected[bbd[di].start_of_trace]
&& (!best
|| e->probability > best->probability
|| (e->probability == best->probability
&& traces[bbd[di].start_of_trace].length > best_len)))
{
best = e;
best_len = traces[bbd[di].start_of_trace].length;
}
}
if (best)
{
if (rtl_dump_file)
{
fprintf (rtl_dump_file, "Connection: %d %d\n",
best->src->index, best->dest->index);
}
t = bbd[best->dest->index].start_of_trace;
RBI (traces[last_trace].last)->next = traces[t].first;
connected[t] = true;
last_trace = t;
}
else
{
/* Try to connect the traces by duplication of 1 block. */
edge e2;
basic_block next_bb = NULL;
for (e = traces[t].last->succ; e; e = e->succ_next)
if (e->dest != EXIT_BLOCK_PTR
&& (e->flags & EDGE_CAN_FALLTHRU)
&& !(e->flags & EDGE_COMPLEX)
&& (EDGE_FREQUENCY (e) >= freq_threshold)
&& (e->count >= count_threshold)
&& (!best
|| e->probability > best->probability))
{
edge best2 = NULL;
int best2_len = 0;
for (e2 = e->dest->succ; e2; e2 = e2->succ_next)
{
int di = e2->dest->index;
if (e2->dest == EXIT_BLOCK_PTR
|| ((e2->flags & EDGE_CAN_FALLTHRU)
&& !(e2->flags & EDGE_COMPLEX)
&& bbd[di].start_of_trace >= 0
&& !connected[bbd[di].start_of_trace]
&& (EDGE_FREQUENCY (e2) >= freq_threshold)
&& (e2->count >= count_threshold)
&& (!best2
|| e2->probability > best2->probability
|| (e2->probability == best2->probability
&& traces[bbd[di].start_of_trace].length
> best2_len))))
{
best = e;
best2 = e2;
if (e2->dest != EXIT_BLOCK_PTR)
best2_len = traces[bbd[di].start_of_trace].length;
else
best2_len = INT_MAX;
next_bb = e2->dest;
}
}
}
if (best && next_bb && copy_bb_p (best->dest, !optimize_size))
{
basic_block new_bb;
if (rtl_dump_file)
{
fprintf (rtl_dump_file, "Connection: %d %d ",
traces[t].last->index, best->dest->index);
if (next_bb == EXIT_BLOCK_PTR)
fprintf (rtl_dump_file, "exit\n");
else
fprintf (rtl_dump_file, "%d\n", next_bb->index);
}
new_bb = copy_bb (best->dest, best, traces[t].last, t);
traces[t].last = new_bb;
if (next_bb != EXIT_BLOCK_PTR)
{
t = bbd[next_bb->index].start_of_trace;
RBI (traces[last_trace].last)->next = traces[t].first;
connected[t] = true;
last_trace = t;
}
else
break; /* Stop finding the successor traces. */
}
else
break; /* Stop finding the successor traces. */
}
} }
}
if (rtl_dump_file)
{
basic_block bb;
next = ((taken && e_taken) ? e_taken : e_fall)->dest; fprintf (rtl_dump_file, "Final order:\n");
for (bb = traces[0].first; bb; bb = RBI (bb)->next)
fprintf (rtl_dump_file, "%d ", bb->index);
fprintf (rtl_dump_file, "\n");
fflush (rtl_dump_file);
} }
/* In the absence of a prediction, disturb things as little as possible FREE (connected);
by selecting the old "next" block from the list of successors. If }
there had been a fallthru edge, that will be the one. */
/* Note that the fallthru block may not be next any time we eliminate /* Return true when BB can and should be copied. CODE_MAY_GROW is true
forwarder blocks. */ when code size is allowed to grow by duplication. */
if (! next)
static bool
copy_bb_p (bb, code_may_grow)
basic_block bb;
int code_may_grow;
{
int size = 0;
int max_size = uncond_jump_length;
rtx insn;
if (!bb->frequency)
return false;
if (!bb->pred || !bb->pred->pred_next)
return false;
if (!cfg_layout_can_duplicate_bb_p (bb))
return false;
if (code_may_grow && maybe_hot_bb_p (bb))
max_size *= 8;
for (insn = bb->head; insn != NEXT_INSN (bb->end);
insn = NEXT_INSN (insn))
{ {
for (e = bb->succ; e ; e = e->succ_next) if (INSN_P (insn))
if (e->flags & EDGE_FALLTHRU) size += get_attr_length (insn);
{
next = e->dest;
break;
}
else if (e->dest == bb->next_bb)
{
if (! (e->flags & (EDGE_ABNORMAL_CALL | EDGE_EH)))
next = e->dest;
}
} }
/* Make sure we didn't select a silly next block. */ if (size <= max_size)
if (! next || next == EXIT_BLOCK_PTR || RBI (next)->visited) return true;
next = NULL;
if (rtl_dump_file)
/* Recurse on the successors. Unroll the last call, as the normal
case is exactly one or two edges, and we can tail recurse. */
for (e = bb->succ; e; e = e->succ_next)
if (e->dest != EXIT_BLOCK_PTR
&& ! RBI (e->dest)->visited
&& e->dest->succ
&& ! (e->flags & (EDGE_ABNORMAL_CALL | EDGE_EH)))
{
if (next)
{
prev = make_reorder_chain_1 (next, prev);
next = RBI (e->dest)->visited ? NULL : e->dest;
}
else
next = e->dest;
}
if (next)
{ {
bb = next; fprintf (rtl_dump_file,
goto restart; "Block %d can't be copied because its size = %d.\n",
bb->index, size);
} }
return prev; return false;
}
/* Return the length of unconditional jump instruction. */
static int
get_uncond_jump_length ()
{
rtx label, jump;
int length;
label = emit_label_before (gen_label_rtx (), get_insns ());
jump = emit_jump_insn (gen_jump (label));
length = get_attr_length (jump);
delete_insn (jump);
delete_insn (label);
return length;
} }
/* Reorder basic blocks. The main entry point to this file. */ /* Reorder basic blocks. The main entry point to this file. */
...@@ -262,6 +1056,10 @@ make_reorder_chain_1 (bb, prev) ...@@ -262,6 +1056,10 @@ make_reorder_chain_1 (bb, prev)
void void
reorder_basic_blocks () reorder_basic_blocks ()
{ {
int n_traces;
int i;
struct trace *traces;
if (n_basic_blocks <= 1) if (n_basic_blocks <= 1)
return; return;
...@@ -270,7 +1068,31 @@ reorder_basic_blocks () ...@@ -270,7 +1068,31 @@ reorder_basic_blocks ()
cfg_layout_initialize (NULL); cfg_layout_initialize (NULL);
make_reorder_chain (); set_edge_can_fallthru_flag ();
mark_dfs_back_edges ();
/* We are estimating the lenght of uncond jump insn only once since the code
for getting the insn lenght always returns the minimal length now. */
if (uncond_jump_length == 0)
uncond_jump_length = get_uncond_jump_length ();
/* We need to know some information for each basic block. */
array_size = GET_ARRAY_SIZE (last_basic_block);
bbd = xmalloc (array_size * sizeof (bbro_basic_block_data));
for (i = 0; i < array_size; i++)
{
bbd[i].start_of_trace = -1;
bbd[i].end_of_trace = -1;
bbd[i].heap = NULL;
bbd[i].node = NULL;
}
traces = xmalloc (n_basic_blocks * sizeof (struct trace));
n_traces = 0;
find_traces (&n_traces, traces);
connect_traces (n_traces, traces);
FREE (traces);
FREE (bbd);
if (rtl_dump_file) if (rtl_dump_file)
dump_flow_info (rtl_dump_file); dump_flow_info (rtl_dump_file);
......
...@@ -1143,17 +1143,6 @@ outgoing_edges_match (mode, bb1, bb2) ...@@ -1143,17 +1143,6 @@ outgoing_edges_match (mode, bb1, bb2)
|| !onlyjump_p (bb2->end)) || !onlyjump_p (bb2->end))
return false; return false;
/* Do not crossjump across loop boundaries. This is a temporary
workaround for the common scenario in which crossjumping results
in killing the duplicated loop condition, making bb-reorder rotate
the loop incorrectly, leaving an extra unconditional jump inside
the loop.
This check should go away once bb-reorder knows how to duplicate
code in this case or rotate the loops to avoid this scenario. */
if (bb1->loop_depth != bb2->loop_depth)
return false;
b1 = BRANCH_EDGE (bb1); b1 = BRANCH_EDGE (bb1);
b2 = BRANCH_EDGE (bb2); b2 = BRANCH_EDGE (bb2);
f1 = FALLTHRU_EDGE (bb1); f1 = FALLTHRU_EDGE (bb1);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment