Commit 248c16c4 by Segher Boessenkool Committed by Segher Boessenkool

bb-reorder: Add the "simple" algorithm


2015-10-01  Segher Boessenkool  <segher@kernel.crashing.org>

	* bb-reorder.c: Add intro comment.
	(reorder_basic_blocks_software_trace_cache): Print a header to
	the dump file.
	(edge_order): New function.
	(reorder_basic_blocks_simple): New function.
	(reorder_basic_blocks): Choose between the STC and the simple
	algorithms (always choose the former).

From-SVN: r228317
parent af71fa39
2015-10-01 Segher Boessenkool <segher@kernel.crashing.org> 2015-10-01 Segher Boessenkool <segher@kernel.crashing.org>
* bb-reorder.c: Add intro comment.
(reorder_basic_blocks_software_trace_cache): Print a header to
the dump file.
(edge_order): New function.
(reorder_basic_blocks_simple): New function.
(reorder_basic_blocks): Choose between the STC and the simple
algorithms (always choose the former).
2015-10-01 Segher Boessenkool <segher@kernel.crashing.org>
* bb-reorder.c (reorder_basic_blocks_software_trace_cache): New * bb-reorder.c (reorder_basic_blocks_software_trace_cache): New
function, factored out from ... function, factored out from ...
(reorder_basic_blocks): ... here. (reorder_basic_blocks): ... here.
...@@ -17,6 +17,18 @@ ...@@ -17,6 +17,18 @@
along with GCC; see the file COPYING3. If not see along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */ <http://www.gnu.org/licenses/>. */
/* This file contains the "reorder blocks" pass, which changes the control
flow of a function to encounter fewer branches; the "partition blocks"
pass, which divides the basic blocks into "hot" and "cold" partitions,
which are kept separate; and the "duplicate computed gotos" pass, which
duplicates blocks ending in an indirect jump.
There are two algorithms for "reorder blocks": the "simple" algorithm,
which just rearranges blocks, trying to minimize the number of executed
unconditional branches; and the "software trace cache" algorithm, which
also copies code, and in general tries a lot harder to have long linear
pieces of machine code executed. This algorithm is described next. */
/* This (greedy) algorithm constructs traces in several rounds. /* This (greedy) algorithm constructs traces in several rounds.
The construction starts from "seeds". The seed for the first round The construction starts from "seeds". The seed for the first round
is the entry point of the function. When there are more than one seed, is the entry point of the function. When there are more than one seed,
...@@ -2231,6 +2243,9 @@ update_crossing_jump_flags (void) ...@@ -2231,6 +2243,9 @@ update_crossing_jump_flags (void)
static void static void
reorder_basic_blocks_software_trace_cache (void) reorder_basic_blocks_software_trace_cache (void)
{ {
if (dump_file)
fprintf (dump_file, "\nReordering with the STC algorithm.\n\n");
int n_traces; int n_traces;
int i; int i;
struct trace *traces; struct trace *traces;
...@@ -2261,6 +2276,161 @@ reorder_basic_blocks_software_trace_cache (void) ...@@ -2261,6 +2276,161 @@ reorder_basic_blocks_software_trace_cache (void)
FREE (bbd); FREE (bbd);
} }
/* Return true if edge E1 is more desirable as a fallthrough edge than
edge E2 is. */
static bool
edge_order (edge e1, edge e2)
{
return EDGE_FREQUENCY (e1) > EDGE_FREQUENCY (e2);
}
/* Reorder basic blocks using the "simple" algorithm. This tries to
maximize the dynamic number of branches that are fallthrough, without
copying instructions. The algorithm is greedy, looking at the most
frequently executed branch first. */
static void
reorder_basic_blocks_simple (void)
{
if (dump_file)
fprintf (dump_file, "\nReordering with the \"simple\" algorithm.\n\n");
edge *edges = new edge[2 * n_basic_blocks_for_fn (cfun)];
/* First, collect all edges that can be optimized by reordering blocks:
simple jumps and conditional jumps, as well as the function entry edge. */
int n = 0;
edges[n++] = EDGE_SUCC (ENTRY_BLOCK_PTR_FOR_FN (cfun), 0);
basic_block bb;
FOR_EACH_BB_FN (bb, cfun)
{
rtx_insn *end = BB_END (bb);
if (computed_jump_p (end) || tablejump_p (end, NULL, NULL))
continue;
/* We cannot optimize asm goto. */
if (JUMP_P (end) && extract_asm_operands (end))
continue;
if (any_condjump_p (end))
{
edges[n++] = EDGE_SUCC (bb, 0);
edges[n++] = EDGE_SUCC (bb, 1);
}
else if (single_succ_p (bb))
edges[n++] = EDGE_SUCC (bb, 0);
}
/* Sort the edges, the most desirable first. */
std::stable_sort (edges, edges + n, edge_order);
/* Now decide which of those edges to make fallthrough edges. We set
BB_VISITED if a block already has a fallthrough successor assigned
to it. We make ->AUX of an endpoint point to the opposite endpoint
of a sequence of blocks that fall through, and ->AUX will be NULL
for a block that is in such a sequence but not an endpoint anymore.
To start with, everything points to itself, nothing is assigned yet. */
FOR_ALL_BB_FN (bb, cfun)
bb->aux = bb;
EXIT_BLOCK_PTR_FOR_FN (cfun)->aux = 0;
/* Now for all edges, the most desirable first, see if that edge can
connect two sequences. If it can, update AUX and BB_VISITED; if it
cannot, zero out the edge in the table. */
for (int j = 0; j < n; j++)
{
edge e = edges[j];
basic_block tail_a = e->src;
basic_block head_b = e->dest;
basic_block head_a = (basic_block) tail_a->aux;
basic_block tail_b = (basic_block) head_b->aux;
/* An edge cannot connect two sequences if:
- it crosses partitions;
- its src is not a current endpoint;
- its dest is not a current endpoint;
- or, it would create a loop. */
if (e->flags & EDGE_CROSSING
|| tail_a->flags & BB_VISITED
|| !tail_b
|| (!(head_b->flags & BB_VISITED) && head_b != tail_b)
|| tail_a == tail_b)
{
edges[j] = 0;
continue;
}
tail_a->aux = 0;
head_b->aux = 0;
head_a->aux = tail_b;
tail_b->aux = head_a;
tail_a->flags |= BB_VISITED;
}
/* Put the pieces together, in the same order that the start blocks of
the sequences already had. The hot/cold partitioning gives a little
complication: as a first pass only do this for blocks in the same
partition as the start block, and (if there is anything left to do)
in a second pass handle the other partition. */
basic_block last_tail = (basic_block) ENTRY_BLOCK_PTR_FOR_FN (cfun)->aux;
int current_partition = BB_PARTITION (last_tail);
bool need_another_pass = true;
for (int pass = 0; pass < 2 && need_another_pass; pass++)
{
need_another_pass = false;
FOR_EACH_BB_FN (bb, cfun)
if ((bb->flags & BB_VISITED && bb->aux) || bb->aux == bb)
{
if (BB_PARTITION (bb) != current_partition)
{
need_another_pass = true;
continue;
}
last_tail->aux = bb;
last_tail = (basic_block) bb->aux;
}
current_partition ^= BB_HOT_PARTITION | BB_COLD_PARTITION;
}
last_tail->aux = 0;
/* Finally, link all the chosen fallthrough edges. */
for (int j = 0; j < n; j++)
if (edges[j])
edges[j]->src->aux = edges[j]->dest;
delete[] edges;
/* If the entry edge no longer falls through we have to make a new
block so it can do so again. */
edge e = EDGE_SUCC (ENTRY_BLOCK_PTR_FOR_FN (cfun), 0);
if (e->dest != ENTRY_BLOCK_PTR_FOR_FN (cfun)->aux)
{
force_nonfallthru (e);
e->src->aux = ENTRY_BLOCK_PTR_FOR_FN (cfun)->aux;
BB_COPY_PARTITION (e->src, e->dest);
}
}
/* Reorder basic blocks. The main entry point to this file. */ /* Reorder basic blocks. The main entry point to this file. */
static void static void
...@@ -2274,7 +2444,10 @@ reorder_basic_blocks (void) ...@@ -2274,7 +2444,10 @@ reorder_basic_blocks (void)
set_edge_can_fallthru_flag (); set_edge_can_fallthru_flag ();
mark_dfs_back_edges (); mark_dfs_back_edges ();
reorder_basic_blocks_software_trace_cache (); if (1)
reorder_basic_blocks_software_trace_cache ();
else
reorder_basic_blocks_simple ();
relink_block_chain (/*stay_in_cfglayout_mode=*/true); relink_block_chain (/*stay_in_cfglayout_mode=*/true);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment