Commit db0bf14f by Jan Hubicka Committed by Jan Hubicka

re PR driver/46760 (LTO bootstrap doesn't work with FDO)


	PR tree-optimization/46760
	* cgraph.c (cgraph_create_node): Initialize count_materialization_scale.
	* cgraph.h (struct cgraph_node): Add count_materialization_scale.
	* lto-cgraph.c (lto_output_edge): Fix assert.
	(lto_output_node): Output count_materialization_scale.
	(output_profile_summary): Output only runs and sum_max.
	(input_node): Input count_materialization_scale.
	(input_profile_summary): Read data into file specific gcov summary.
	(merge_profile_summaries): New function.
	(input_cgraph): Update call of input_profile_summary;
	call merge_profile_summaries.
	* lto-streamer-in.c (input_cfg): Add count_materialization_scale arg;
	rescale counts at read in.
	(intput_bb): Likewise.
	(input_function): Update call of input_bb.
	(lto_read_body): Update call of input_cfg.
	* lto-streamer.h: Inlclude gcov-io.h
	(lto_file_decl_data): Add gcov_ctr_summary.

From-SVN: r167458
parent f3007348
2010-12-03 Jan Hubicka <jh@suse.cz>
PR tree-optimization/46760
* cgraph.c (cgraph_create_node): Initialize count_materialization_scale.
* cgraph.h (struct cgraph_node): Add count_materialization_scale.
* lto-cgraph.c (lto_output_edge): Fix assert.
(lto_output_node): Output count_materialization_scale.
(output_profile_summary): Output only runs and sum_max.
(input_node): Input count_materialization_scale.
(input_profile_summary): Read data into file specific gcov summary.
(merge_profile_summaries): New function.
(input_cgraph): Update call of input_profile_summary;
call merge_profile_summaries.
* lto-streamer-in.c (input_cfg): Add count_materialization_scale arg;
rescale counts at read in.
(intput_bb): Likewise.
(input_function): Update call of input_bb.
(lto_read_body): Update call of input_cfg.
* lto-streamer.h: Inlclude gcov-io.h
(lto_file_decl_data): Add gcov_ctr_summary.
2010-12-03 Dave Korn <dave.korn.cygwin@gmail.com> 2010-12-03 Dave Korn <dave.korn.cygwin@gmail.com>
* doc/tm.texi.in (Cond. Exec. Macros): Rename node from this ... * doc/tm.texi.in (Cond. Exec. Macros): Rename node from this ...
...@@ -478,6 +478,7 @@ cgraph_create_node (void) ...@@ -478,6 +478,7 @@ cgraph_create_node (void)
node->previous = NULL; node->previous = NULL;
node->global.estimated_growth = INT_MIN; node->global.estimated_growth = INT_MIN;
node->frequency = NODE_FREQUENCY_NORMAL; node->frequency = NODE_FREQUENCY_NORMAL;
node->count_materialization_scale = REG_BR_PROB_BASE;
ipa_empty_ref_list (&node->ref_list); ipa_empty_ref_list (&node->ref_list);
cgraph_nodes = node; cgraph_nodes = node;
cgraph_n_nodes++; cgraph_n_nodes++;
......
...@@ -233,6 +233,9 @@ struct GTY((chain_next ("%h.next"), chain_prev ("%h.previous"))) cgraph_node { ...@@ -233,6 +233,9 @@ struct GTY((chain_next ("%h.next"), chain_prev ("%h.previous"))) cgraph_node {
/* Expected number of executions: calculated in profile.c. */ /* Expected number of executions: calculated in profile.c. */
gcov_type count; gcov_type count;
/* How to scale counts at materialization time; used to merge
LTO units with different number of profile runs. */
int count_materialization_scale;
/* Unique id of the node. */ /* Unique id of the node. */
int uid; int uid;
/* Ordering of all cgraph nodes. */ /* Ordering of all cgraph nodes. */
......
...@@ -302,6 +302,7 @@ lto_output_edge (struct lto_simple_output_block *ob, struct cgraph_edge *edge, ...@@ -302,6 +302,7 @@ lto_output_edge (struct lto_simple_output_block *ob, struct cgraph_edge *edge,
gcc_assert (!(flags & (ECF_LOOPING_CONST_OR_PURE gcc_assert (!(flags & (ECF_LOOPING_CONST_OR_PURE
| ECF_MAY_BE_ALLOCA | ECF_MAY_BE_ALLOCA
| ECF_SIBCALL | ECF_SIBCALL
| ECF_LEAF
| ECF_NOVOPS))); | ECF_NOVOPS)));
} }
lto_output_bitpack (&bp); lto_output_bitpack (&bp);
...@@ -462,6 +463,7 @@ lto_output_node (struct lto_simple_output_block *ob, struct cgraph_node *node, ...@@ -462,6 +463,7 @@ lto_output_node (struct lto_simple_output_block *ob, struct cgraph_node *node,
lto_output_fn_decl_index (ob->decl_state, ob->main_stream, node->decl); lto_output_fn_decl_index (ob->decl_state, ob->main_stream, node->decl);
lto_output_sleb128_stream (ob->main_stream, node->count); lto_output_sleb128_stream (ob->main_stream, node->count);
lto_output_sleb128_stream (ob->main_stream, node->count_materialization_scale);
if (tag == LTO_cgraph_analyzed_node) if (tag == LTO_cgraph_analyzed_node)
{ {
...@@ -661,12 +663,12 @@ output_profile_summary (struct lto_simple_output_block *ob) ...@@ -661,12 +663,12 @@ output_profile_summary (struct lto_simple_output_block *ob)
{ {
if (profile_info) if (profile_info)
{ {
/* We do not output num, it is not terribly useful. */ /* We do not output num, sum_all and run_max, they are not used by
GCC profile feedback and they are difficult to merge from multiple
units. */
gcc_assert (profile_info->runs); gcc_assert (profile_info->runs);
lto_output_uleb128_stream (ob->main_stream, profile_info->runs); lto_output_uleb128_stream (ob->main_stream, profile_info->runs);
lto_output_sleb128_stream (ob->main_stream, profile_info->sum_all); lto_output_uleb128_stream (ob->main_stream, profile_info->sum_max);
lto_output_sleb128_stream (ob->main_stream, profile_info->run_max);
lto_output_sleb128_stream (ob->main_stream, profile_info->sum_max);
} }
else else
lto_output_uleb128_stream (ob->main_stream, 0); lto_output_uleb128_stream (ob->main_stream, 0);
...@@ -1045,6 +1047,7 @@ input_node (struct lto_file_decl_data *file_data, ...@@ -1045,6 +1047,7 @@ input_node (struct lto_file_decl_data *file_data,
node = cgraph_node (fn_decl); node = cgraph_node (fn_decl);
node->count = lto_input_sleb128 (ib); node->count = lto_input_sleb128 (ib);
node->count_materialization_scale = lto_input_sleb128 (ib);
if (tag == LTO_cgraph_analyzed_node) if (tag == LTO_cgraph_analyzed_node)
{ {
...@@ -1424,32 +1427,108 @@ static struct gcov_ctr_summary lto_gcov_summary; ...@@ -1424,32 +1427,108 @@ static struct gcov_ctr_summary lto_gcov_summary;
/* Input profile_info from IB. */ /* Input profile_info from IB. */
static void static void
input_profile_summary (struct lto_input_block *ib) input_profile_summary (struct lto_input_block *ib,
struct lto_file_decl_data *file_data)
{ {
unsigned int runs = lto_input_uleb128 (ib); unsigned int runs = lto_input_uleb128 (ib);
if (runs) if (runs)
{ {
if (!profile_info) file_data->profile_info.runs = runs;
{ file_data->profile_info.sum_max = lto_input_uleb128 (ib);
profile_info = &lto_gcov_summary; if (runs > file_data->profile_info.sum_max)
lto_gcov_summary.runs = runs; fatal_error ("Corrupted profile info in %s: sum_max is smaller than runs",
lto_gcov_summary.sum_all = lto_input_sleb128 (ib); file_data->file_name);
lto_gcov_summary.run_max = lto_input_sleb128 (ib);
lto_gcov_summary.sum_max = lto_input_sleb128 (ib);
}
/* We can support this by scaling all counts to nearest common multiple
of all different runs, but it is perhaps not worth the effort. */
else if (profile_info->runs != runs
|| profile_info->sum_all != lto_input_sleb128 (ib)
|| profile_info->run_max != lto_input_sleb128 (ib)
|| profile_info->sum_max != lto_input_sleb128 (ib))
sorry ("combining units with different profiles is not supported");
/* We allow some units to have profile and other to not have one. This will
just make unprofiled units to be size optimized that is sane. */
} }
} }
/* Rescale profile summaries to the same number of runs in the whole unit. */
static void
merge_profile_summaries (struct lto_file_decl_data **file_data_vec)
{
struct lto_file_decl_data *file_data;
unsigned int j;
gcov_unsigned_t max_runs = 0;
struct cgraph_node *node;
struct cgraph_edge *edge;
/* Find unit with maximal number of runs. If we ever get serious about
roundoff errors, we might also consider computing smallest common
multiply. */
for (j = 0; (file_data = file_data_vec[j]) != NULL; j++)
if (max_runs < file_data->profile_info.runs)
max_runs = file_data->profile_info.runs;
if (!max_runs)
return;
/* Simple overflow check. We probably don't need to support that many train
runs. Such a large value probably imply data corruption anyway. */
if (max_runs > INT_MAX / REG_BR_PROB_BASE)
{
sorry ("At most %i profile runs is supported. Perhaps corrupted profile?",
INT_MAX / REG_BR_PROB_BASE);
return;
}
profile_info = &lto_gcov_summary;
lto_gcov_summary.runs = max_runs;
lto_gcov_summary.sum_max = 0;
/* Rescale all units to the maximal number of runs.
sum_max can not be easily merged, as we have no idea what files come from
the same run. We do not use the info anyway, so leave it 0. */
for (j = 0; (file_data = file_data_vec[j]) != NULL; j++)
if (file_data->profile_info.runs)
{
int scale = ((REG_BR_PROB_BASE * max_runs
+ file_data->profile_info.runs / 2)
/ file_data->profile_info.runs);
lto_gcov_summary.sum_max = MAX (lto_gcov_summary.sum_max,
(file_data->profile_info.sum_max
* scale
+ REG_BR_PROB_BASE / 2)
/ REG_BR_PROB_BASE);
}
/* Watch roundoff errors. */
if (lto_gcov_summary.sum_max < max_runs)
lto_gcov_summary.sum_max = max_runs;
/* If merging already happent at WPA time, we are done. */
if (flag_ltrans)
return;
/* Now compute count_materialization_scale of each node.
During LTRANS we already have values of count_materialization_scale
computed, so just update them. */
for (node = cgraph_nodes; node; node = node->next)
if (node->local.lto_file_data->profile_info.run_max)
{
int scale;
if (node->local.lto_file_data->profile_info.runs)
scale =
((node->count_materialization_scale * max_runs
+ node->local.lto_file_data->profile_info.run_max / 2)
/ node->local.lto_file_data->profile_info.run_max);
else
scale = node->count_materialization_scale;
node->count_materialization_scale = scale;
if (scale < 0)
fatal_error ("Profile information in %s corrupted",
file_data->file_name);
if (scale == REG_BR_PROB_BASE)
continue;
for (edge = node->callees; edge; edge = edge->next_callee)
edge->count = ((edge->count * scale + REG_BR_PROB_BASE / 2)
/ REG_BR_PROB_BASE);
node->count = ((node->count * scale + REG_BR_PROB_BASE / 2)
/ REG_BR_PROB_BASE);
}
}
/* Input and merge the cgraph from each of the .o files passed to /* Input and merge the cgraph from each of the .o files passed to
lto1. */ lto1. */
...@@ -1473,7 +1552,7 @@ input_cgraph (void) ...@@ -1473,7 +1552,7 @@ input_cgraph (void)
&data, &len); &data, &len);
if (!ib) if (!ib)
fatal_error ("cannot find LTO cgraph in %s", file_data->file_name); fatal_error ("cannot find LTO cgraph in %s", file_data->file_name);
input_profile_summary (ib); input_profile_summary (ib, file_data);
file_data->cgraph_node_encoder = lto_cgraph_encoder_new (); file_data->cgraph_node_encoder = lto_cgraph_encoder_new ();
nodes = input_cgraph_1 (file_data, ib); nodes = input_cgraph_1 (file_data, ib);
lto_destroy_simple_input_block (file_data, LTO_section_cgraph, lto_destroy_simple_input_block (file_data, LTO_section_cgraph,
...@@ -1499,6 +1578,8 @@ input_cgraph (void) ...@@ -1499,6 +1578,8 @@ input_cgraph (void)
VEC_free (cgraph_node_ptr, heap, nodes); VEC_free (cgraph_node_ptr, heap, nodes);
VEC_free (varpool_node_ptr, heap, varpool); VEC_free (varpool_node_ptr, heap, varpool);
} }
merge_profile_summaries (file_data_vec);
/* Clear out the aux field that was used to store enough state to /* Clear out the aux field that was used to store enough state to
tell which nodes should be overwritten. */ tell which nodes should be overwritten. */
......
...@@ -719,7 +719,8 @@ make_new_block (struct function *fn, unsigned int index) ...@@ -719,7 +719,8 @@ make_new_block (struct function *fn, unsigned int index)
/* Read the CFG for function FN from input block IB. */ /* Read the CFG for function FN from input block IB. */
static void static void
input_cfg (struct lto_input_block *ib, struct function *fn) input_cfg (struct lto_input_block *ib, struct function *fn,
int count_materialization_scale)
{ {
unsigned int bb_count; unsigned int bb_count;
basic_block p_bb; basic_block p_bb;
...@@ -752,7 +753,8 @@ input_cfg (struct lto_input_block *ib, struct function *fn) ...@@ -752,7 +753,8 @@ input_cfg (struct lto_input_block *ib, struct function *fn)
if (bb == NULL) if (bb == NULL)
bb = make_new_block (fn, index); bb = make_new_block (fn, index);
edge_count = lto_input_uleb128 (ib); edge_count = (lto_input_uleb128 (ib) * count_materialization_scale
+ REG_BR_PROB_BASE / 2) / REG_BR_PROB_BASE;
/* Connect up the CFG. */ /* Connect up the CFG. */
for (i = 0; i < edge_count; i++) for (i = 0; i < edge_count; i++)
...@@ -1066,7 +1068,8 @@ input_gimple_stmt (struct lto_input_block *ib, struct data_in *data_in, ...@@ -1066,7 +1068,8 @@ input_gimple_stmt (struct lto_input_block *ib, struct data_in *data_in,
static void static void
input_bb (struct lto_input_block *ib, enum LTO_tags tag, input_bb (struct lto_input_block *ib, enum LTO_tags tag,
struct data_in *data_in, struct function *fn) struct data_in *data_in, struct function *fn,
int count_materialization_scale)
{ {
unsigned int index; unsigned int index;
basic_block bb; basic_block bb;
...@@ -1079,7 +1082,8 @@ input_bb (struct lto_input_block *ib, enum LTO_tags tag, ...@@ -1079,7 +1082,8 @@ input_bb (struct lto_input_block *ib, enum LTO_tags tag,
index = lto_input_uleb128 (ib); index = lto_input_uleb128 (ib);
bb = BASIC_BLOCK_FOR_FUNCTION (fn, index); bb = BASIC_BLOCK_FOR_FUNCTION (fn, index);
bb->count = lto_input_sleb128 (ib); bb->count = (lto_input_sleb128 (ib) * count_materialization_scale
+ REG_BR_PROB_BASE / 2) / REG_BR_PROB_BASE;
bb->loop_depth = lto_input_sleb128 (ib); bb->loop_depth = lto_input_sleb128 (ib);
bb->frequency = lto_input_sleb128 (ib); bb->frequency = lto_input_sleb128 (ib);
bb->flags = lto_input_sleb128 (ib); bb->flags = lto_input_sleb128 (ib);
...@@ -1253,12 +1257,14 @@ input_function (tree fn_decl, struct data_in *data_in, ...@@ -1253,12 +1257,14 @@ input_function (tree fn_decl, struct data_in *data_in,
DECL_INITIAL (fn_decl) = lto_input_tree (ib, data_in); DECL_INITIAL (fn_decl) = lto_input_tree (ib, data_in);
gcc_assert (DECL_INITIAL (fn_decl)); gcc_assert (DECL_INITIAL (fn_decl));
DECL_SAVED_TREE (fn_decl) = NULL_TREE; DECL_SAVED_TREE (fn_decl) = NULL_TREE;
node = cgraph_node (fn_decl);
/* Read all the basic blocks. */ /* Read all the basic blocks. */
tag = input_record_start (ib); tag = input_record_start (ib);
while (tag) while (tag)
{ {
input_bb (ib, tag, data_in, fn); input_bb (ib, tag, data_in, fn,
node->count_materialization_scale);
tag = input_record_start (ib); tag = input_record_start (ib);
} }
...@@ -1300,7 +1306,6 @@ input_function (tree fn_decl, struct data_in *data_in, ...@@ -1300,7 +1306,6 @@ input_function (tree fn_decl, struct data_in *data_in,
gimple_set_body (fn_decl, bb_seq (ei_edge (ei)->dest)); gimple_set_body (fn_decl, bb_seq (ei_edge (ei)->dest));
} }
node = cgraph_node (fn_decl);
fixup_call_stmt_edges (node, stmts); fixup_call_stmt_edges (node, stmts);
execute_all_ipa_stmt_fixups (node, stmts); execute_all_ipa_stmt_fixups (node, stmts);
...@@ -1393,6 +1398,7 @@ lto_read_body (struct lto_file_decl_data *file_data, tree fn_decl, ...@@ -1393,6 +1398,7 @@ lto_read_body (struct lto_file_decl_data *file_data, tree fn_decl,
{ {
struct function *fn = DECL_STRUCT_FUNCTION (fn_decl); struct function *fn = DECL_STRUCT_FUNCTION (fn_decl);
struct lto_in_decl_state *decl_state; struct lto_in_decl_state *decl_state;
struct cgraph_node *node = cgraph_node (fn_decl);
push_cfun (fn); push_cfun (fn);
init_tree_ssa (fn); init_tree_ssa (fn);
...@@ -1402,7 +1408,7 @@ lto_read_body (struct lto_file_decl_data *file_data, tree fn_decl, ...@@ -1402,7 +1408,7 @@ lto_read_body (struct lto_file_decl_data *file_data, tree fn_decl,
gcc_assert (decl_state); gcc_assert (decl_state);
file_data->current_decl_state = decl_state; file_data->current_decl_state = decl_state;
input_cfg (&ib_cfg, fn); input_cfg (&ib_cfg, fn, node->count_materialization_scale);
/* Set up the struct function. */ /* Set up the struct function. */
input_function (fn_decl, data_in, &ib_main); input_function (fn_decl, data_in, &ib_main);
......
...@@ -31,6 +31,7 @@ along with GCC; see the file COPYING3. If not see ...@@ -31,6 +31,7 @@ along with GCC; see the file COPYING3. If not see
#include "vec.h" #include "vec.h"
#include "vecprim.h" #include "vecprim.h"
#include "alloc-pool.h" #include "alloc-pool.h"
#include "gcov-io.h"
/* Define when debugging the LTO streamer. This causes the writer /* Define when debugging the LTO streamer. This causes the writer
to output the numeric value for the memory address of the tree node to output the numeric value for the memory address of the tree node
...@@ -610,6 +611,8 @@ struct GTY(()) lto_file_decl_data ...@@ -610,6 +611,8 @@ struct GTY(()) lto_file_decl_data
/* Symbol resolutions for this file */ /* Symbol resolutions for this file */
VEC(ld_plugin_symbol_resolution_t,heap) * GTY((skip)) resolutions; VEC(ld_plugin_symbol_resolution_t,heap) * GTY((skip)) resolutions;
struct gcov_ctr_summary GTY((skip)) profile_info;
}; };
typedef struct lto_file_decl_data *lto_file_decl_data_ptr; typedef struct lto_file_decl_data *lto_file_decl_data_ptr;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment