Commit 0208f7da by Jan Hubicka Committed by Jan Hubicka

lto-cgraph.c (output_profile_summary, [...]): Use gcov streaming; stream hot bb…

lto-cgraph.c (output_profile_summary, [...]): Use gcov streaming; stream hot bb threshold to ltrans.


	* lto-cgraph.c (output_profile_summary, input_profile_summary): Use
	gcov streaming; stream hot bb threshold to ltrans.
	* predict.c (get_hot_bb_threshold): Break out from ....
	(maybe_hot_count_p): ... here.
	(set_hot_bb_threshold): New function.
	* lto-section-in.c (lto_section_name): Add profile.
	* profile.h (get_hot_bb_threshold, set_hot_bb_threshold): Declare.
	* ipa.c: Include hash-table.h, tree-inline.h, profile.h, lto-streamer.h
	and data-streamer.h
	(histogram_entry): New structure.
	(histogram, histogram_pool): New global vars.
	(histogram_hash): New structure.
	(histogram_hash::hash): New method.
	(histogram_hash::equal): Likewise.
	(account_time_size): New function.
	(cmp_counts): New function.
	(dump_histogram): New function.
	(ipa_profile_generate_summary): New function.
	(ipa_profile_write_summary): New function.
	(ipa_profile_read_summary): New function.
	(ipa_profile): Decide on threshold.
	(pass_ipa_profile): Add ipa_profile_write_summary and ipa_profile_read_summary.
	* Makefile.in (ipa.o): Update dependencies.
	* lto-streamer.h (LTO_section_ipa_profile): New section.

From-SVN: r197243
parent 5a6ccc94
2013-03-29 Jan Hubicka <jh@suse.cz>
* lto-cgraph.c (output_profile_summary, input_profile_summary): Use
gcov streaming; stream hot bb threshold to ltrans.
* predict.c (get_hot_bb_threshold): Break out from ....
(maybe_hot_count_p): ... here.
(set_hot_bb_threshold): New function.
* lto-section-in.c (lto_section_name): Add profile.
* profile.h (get_hot_bb_threshold, set_hot_bb_threshold): Declare.
* ipa.c: Include hash-table.h, tree-inline.h, profile.h, lto-streamer.h
and data-streamer.h
(histogram_entry): New structure.
(histogram, histogram_pool): New global vars.
(histogram_hash): New structure.
(histogram_hash::hash): New method.
(histogram_hash::equal): Likewise.
(account_time_size): New function.
(cmp_counts): New function.
(dump_histogram): New function.
(ipa_profile_generate_summary): New function.
(ipa_profile_write_summary): New function.
(ipa_profile_read_summary): New function.
(ipa_profile): Decide on threshold.
(pass_ipa_profile): Add ipa_profile_write_summary and ipa_profile_read_summary.
* Makefile.in (ipa.o): Update dependencies.
* lto-streamer.h (LTO_section_ipa_profile): New section.
2013-03-29 Gabriel Dos Reis <gdr@integrable-solutions.net>
* tree.h (VAR_P): New.
......
......@@ -2903,7 +2903,8 @@ varpool.o : varpool.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
$(TREE_FLOW_H)
ipa.o : ipa.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(CGRAPH_H) \
$(TREE_PASS_H) $(GIMPLE_H) $(TARGET_H) $(GGC_H) pointer-set.h \
$(IPA_UTILS_H)
$(IPA_UTILS_H) tree-inline.h $(HASH_TABLE_H) profile.h $(PARAMS_H) \
$(LTO_STREAMER_H) $(DATA_STREAMER_H)
ipa-prop.o : ipa-prop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
langhooks.h $(GGC_H) $(TARGET_H) $(CGRAPH_H) $(IPA_PROP_H) $(DIAGNOSTIC_H) \
$(TREE_FLOW_H) $(TM_H) $(TREE_PASS_H) $(FLAGS_H) $(TREE_H) \
......
......@@ -32,6 +32,12 @@ along with GCC; see the file COPYING3. If not see
#include "ipa-utils.h"
#include "pointer-set.h"
#include "ipa-inline.h"
#include "hash-table.h"
#include "tree-inline.h"
#include "profile.h"
#include "params.h"
#include "lto-streamer.h"
#include "data-streamer.h"
/* Look for all functions inlined to NODE and update their inlined_to pointers
to INLINED_TO. */
......@@ -1040,6 +1046,201 @@ struct ipa_opt_pass_d pass_ipa_whole_program_visibility =
NULL, /* variable_transform */
};
/* Entry in the histogram. */
struct histogram_entry
{
gcov_type count;
int time;
int size;
};
/* Histogram of profile values.
The histogram is represented as an ordered vector of entries allocated via
histogram_pool. During construction a separate hashtable is kept to lookup
duplicate entries. */
vec<histogram_entry *> histogram;
static alloc_pool histogram_pool;
/* Hashtable support for storing SSA names hashed by their SSA_NAME_VAR. */
struct histogram_hash : typed_noop_remove <histogram_entry>
{
typedef histogram_entry value_type;
typedef histogram_entry compare_type;
static inline hashval_t hash (const value_type *);
static inline int equal (const value_type *, const compare_type *);
};
inline hashval_t
histogram_hash::hash (const histogram_entry *val)
{
return val->count;
}
inline int
histogram_hash::equal (const histogram_entry *val, const histogram_entry *val2)
{
return val->count == val2->count;
}
/* Account TIME and SIZE executed COUNT times into HISTOGRAM.
HASHTABLE is the on-side hash kept to avoid duplicates. */
static void
account_time_size (hash_table <histogram_hash> hashtable,
vec<histogram_entry *> &histogram,
gcov_type count, int time, int size)
{
histogram_entry key = {count, 0, 0};
histogram_entry **val = hashtable.find_slot (&key, INSERT);
if (!*val)
{
*val = (histogram_entry *) pool_alloc (histogram_pool);
**val = key;
histogram.safe_push (*val);
}
(*val)->time += time;
(*val)->size += size;
}
int
cmp_counts (const void *v1, const void *v2)
{
const histogram_entry *h1 = *(const histogram_entry * const *)v1;
const histogram_entry *h2 = *(const histogram_entry * const *)v2;
if (h1->count < h2->count)
return 1;
if (h1->count > h2->count)
return -1;
return 0;
}
/* Dump HISTOGRAM to FILE. */
static void
dump_histogram (FILE *file, vec<histogram_entry *> histogram)
{
unsigned int i;
gcov_type overall_time = 0, cumulated_time = 0, cumulated_size = 0, overall_size = 0;
fprintf (dump_file, "Histogram:\n");
for (i = 0; i < histogram.length (); i++)
{
overall_time += histogram[i]->count * histogram[i]->time;
overall_size += histogram[i]->size;
}
if (!overall_time)
overall_time = 1;
if (!overall_size)
overall_size = 1;
for (i = 0; i < histogram.length (); i++)
{
cumulated_time += histogram[i]->count * histogram[i]->time;
cumulated_size += histogram[i]->size;
fprintf (file, " "HOST_WIDEST_INT_PRINT_DEC": time:%i (%2.2f) size:%i (%2.2f)\n",
(HOST_WIDEST_INT) histogram[i]->count,
histogram[i]->time,
cumulated_time * 100.0 / overall_time,
histogram[i]->size,
cumulated_size * 100.0 / overall_size);
}
}
/* Collect histogram from CFG profiles. */
static void
ipa_profile_generate_summary (void)
{
struct cgraph_node *node;
gimple_stmt_iterator gsi;
hash_table <histogram_hash> hashtable;
basic_block bb;
hashtable.create (10);
histogram_pool = create_alloc_pool ("IPA histogram", sizeof (struct histogram_entry),
10);
FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->symbol.decl))
{
int time = 0;
int size = 0;
for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
{
time += estimate_num_insns (gsi_stmt (gsi), &eni_time_weights);
size += estimate_num_insns (gsi_stmt (gsi), &eni_size_weights);
}
account_time_size (hashtable, histogram, bb->count, time, size);
}
hashtable.dispose ();
histogram.qsort (cmp_counts);
}
/* Serialize the ipa info for lto. */
static void
ipa_profile_write_summary (void)
{
struct lto_simple_output_block *ob
= lto_create_simple_output_block (LTO_section_ipa_profile);
unsigned int i;
streamer_write_uhwi_stream (ob->main_stream, histogram.length());
for (i = 0; i < histogram.length (); i++)
{
streamer_write_gcov_count_stream (ob->main_stream, histogram[i]->count);
streamer_write_uhwi_stream (ob->main_stream, histogram[i]->time);
streamer_write_uhwi_stream (ob->main_stream, histogram[i]->size);
}
lto_destroy_simple_output_block (ob);
}
/* Deserialize the ipa info for lto. */
static void
ipa_profile_read_summary (void)
{
struct lto_file_decl_data ** file_data_vec
= lto_get_file_decl_data ();
struct lto_file_decl_data * file_data;
hash_table <histogram_hash> hashtable;
int j = 0;
hashtable.create (10);
histogram_pool = create_alloc_pool ("IPA histogram", sizeof (struct histogram_entry),
10);
while ((file_data = file_data_vec[j++]))
{
const char *data;
size_t len;
struct lto_input_block *ib
= lto_create_simple_input_block (file_data,
LTO_section_ipa_profile,
&data, &len);
if (ib)
{
unsigned int num = streamer_read_uhwi (ib);
unsigned int n;
for (n = 0; n < num; n++)
{
gcov_type count = streamer_read_gcov_count (ib);
int time = streamer_read_uhwi (ib);
int size = streamer_read_uhwi (ib);
account_time_size (hashtable, histogram,
count, time, size);
}
lto_destroy_simple_input_block (file_data,
LTO_section_ipa_profile,
ib, data, len);
}
}
hashtable.dispose ();
histogram.qsort (cmp_counts);
}
/* Simple ipa profile pass propagating frequencies across the callgraph. */
......@@ -1051,6 +1252,75 @@ ipa_profile (void)
int order_pos;
bool something_changed = false;
int i;
gcov_type overall_time = 0, cutoff = 0, cumulated = 0, overall_size = 0;
if (dump_file)
dump_histogram (dump_file, histogram);
for (i = 0; i < (int)histogram.length (); i++)
{
overall_time += histogram[i]->count * histogram[i]->time;
overall_size += histogram[i]->size;
}
if (overall_time)
{
gcov_type threshold;
gcc_assert (overall_size);
if (dump_file)
{
gcov_type min, cumulated_time = 0, cumulated_size = 0;
fprintf (dump_file, "Overall time: "HOST_WIDEST_INT_PRINT_DEC"\n",
(HOST_WIDEST_INT)overall_time);
min = get_hot_bb_threshold ();
for (i = 0; i < (int)histogram.length () && histogram[i]->count >= min;
i++)
{
cumulated_time += histogram[i]->count * histogram[i]->time;
cumulated_size += histogram[i]->size;
}
fprintf (dump_file, "GCOV min count: "HOST_WIDEST_INT_PRINT_DEC
" Time:%3.2f%% Size:%3.2f%%\n",
(HOST_WIDEST_INT)min,
cumulated_time * 100.0 / overall_time,
cumulated_size * 100.0 / overall_size);
}
cutoff = (overall_time * PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE) + 500) / 1000;
threshold = 0;
for (i = 0; cumulated < cutoff; i++)
{
cumulated += histogram[i]->count * histogram[i]->time;
threshold = histogram[i]->count;
}
if (!threshold)
threshold = 1;
if (dump_file)
{
gcov_type cumulated_time = 0, cumulated_size = 0;
for (i = 0;
i < (int)histogram.length () && histogram[i]->count >= threshold;
i++)
{
cumulated_time += histogram[i]->count * histogram[i]->time;
cumulated_size += histogram[i]->size;
}
fprintf (dump_file, "Determined min count: "HOST_WIDEST_INT_PRINT_DEC
" Time:%3.2f%% Size:%3.2f%%\n",
(HOST_WIDEST_INT)threshold,
cumulated_time * 100.0 / overall_time,
cumulated_size * 100.0 / overall_size);
}
if (threshold > get_hot_bb_threshold ()
|| in_lto_p)
{
if (dump_file)
fprintf (dump_file, "Threshold updated.\n");
set_hot_bb_threshold (threshold);
}
}
histogram.release();
free_alloc_pool (histogram_pool);
order_pos = ipa_reverse_postorder (order);
for (i = order_pos - 1; i >= 0; i--)
......@@ -1112,9 +1382,9 @@ struct ipa_opt_pass_d pass_ipa_profile =
0, /* todo_flags_start */
0 /* todo_flags_finish */
},
NULL, /* generate_summary */
NULL, /* write_summary */
NULL, /* read_summary */
ipa_profile_generate_summary, /* generate_summary */
ipa_profile_write_summary, /* write_summary */
ipa_profile_read_summary, /* read_summary */
NULL, /* write_optimization_summary */
NULL, /* read_optimization_summary */
NULL, /* stmt_fixup */
......
......@@ -604,11 +604,11 @@ output_profile_summary (struct lto_simple_output_block *ob)
units. */
gcc_assert (profile_info->runs);
streamer_write_uhwi_stream (ob->main_stream, profile_info->runs);
streamer_write_uhwi_stream (ob->main_stream, profile_info->sum_max);
streamer_write_gcov_count_stream (ob->main_stream, profile_info->sum_max);
/* sum_all is needed for computing the working set with the
histogram. */
streamer_write_uhwi_stream (ob->main_stream, profile_info->sum_all);
streamer_write_gcov_count_stream (ob->main_stream, profile_info->sum_all);
/* Create and output a bitpack of non-zero histogram entries indices. */
bp = bitpack_create (ob->main_stream);
......@@ -620,13 +620,18 @@ output_profile_summary (struct lto_simple_output_block *ob)
{
if (!profile_info->histogram[h_ix].num_counters)
continue;
streamer_write_uhwi_stream (ob->main_stream,
streamer_write_gcov_count_stream (ob->main_stream,
profile_info->histogram[h_ix].num_counters);
streamer_write_uhwi_stream (ob->main_stream,
streamer_write_gcov_count_stream (ob->main_stream,
profile_info->histogram[h_ix].min_value);
streamer_write_uhwi_stream (ob->main_stream,
streamer_write_gcov_count_stream (ob->main_stream,
profile_info->histogram[h_ix].cum_value);
}
}
/* IPA-profile computes hot bb threshold based on cumulated
whole program profile. We need to stream it down to ltrans. */
if (flag_wpa)
streamer_write_gcov_count_stream (ob->main_stream,
get_hot_bb_threshold ());
}
else
streamer_write_uhwi_stream (ob->main_stream, 0);
......@@ -1259,8 +1264,8 @@ input_profile_summary (struct lto_input_block *ib,
if (runs)
{
file_data->profile_info.runs = runs;
file_data->profile_info.sum_max = streamer_read_uhwi (ib);
file_data->profile_info.sum_all = streamer_read_uhwi (ib);
file_data->profile_info.sum_max = streamer_read_gcov_count (ib);
file_data->profile_info.sum_all = streamer_read_gcov_count (ib);
memset (file_data->profile_info.histogram, 0,
sizeof (gcov_bucket_type) * GCOV_HISTOGRAM_SIZE);
......@@ -1279,12 +1284,16 @@ input_profile_summary (struct lto_input_block *ib,
continue;
file_data->profile_info.histogram[h_ix].num_counters
= streamer_read_uhwi (ib);
= streamer_read_gcov_count (ib);
file_data->profile_info.histogram[h_ix].min_value
= streamer_read_uhwi (ib);
= streamer_read_gcov_count (ib);
file_data->profile_info.histogram[h_ix].cum_value
= streamer_read_uhwi (ib);
= streamer_read_gcov_count (ib);
}
/* IPA-profile computes hot bb threshold based on cumulated
whole program profile. We need to stream it down to ltrans. */
if (flag_ltrans)
set_hot_bb_threshold (streamer_read_gcov_count (ib));
}
}
......
......@@ -55,6 +55,7 @@ const char *lto_section_name[LTO_N_SECTION_TYPES] =
"jmpfuncs",
"pureconst",
"reference",
"profile",
"symbol_nodes",
"opts",
"cgraphopt",
......
......@@ -243,6 +243,7 @@ enum lto_section_type
LTO_section_jump_functions,
LTO_section_ipa_pure_const,
LTO_section_ipa_reference,
LTO_section_ipa_profile,
LTO_section_symtab_nodes,
LTO_section_opts,
LTO_section_cgraph_opt_sum,
......
......@@ -128,25 +128,42 @@ maybe_hot_frequency_p (struct function *fun, int freq)
return true;
}
static gcov_type min_count = -1;
/* Determine the threshold for hot BB counts. */
gcov_type
get_hot_bb_threshold ()
{
gcov_working_set_t *ws;
if (min_count == -1)
{
ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE));
gcc_assert (ws);
min_count = ws->min_counter;
}
return min_count;
}
/* Set the threshold for hot BB counts. */
void
set_hot_bb_threshold (gcov_type min)
{
min_count = min;
}
/* Return TRUE if frequency FREQ is considered to be hot. */
static inline bool
maybe_hot_count_p (struct function *fun, gcov_type count)
{
gcov_working_set_t *ws;
static gcov_type min_count = -1;
if (fun && profile_status_for_function (fun) != PROFILE_READ)
return true;
/* Code executed at most once is not hot. */
if (profile_info->runs >= count)
return false;
if (min_count == -1)
{
ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE));
gcc_assert (ws);
min_count = ws->min_counter;
}
return (count >= min_count);
return (count >= get_hot_bb_threshold ());
}
/* Return true in case BB can be CPU intensive and should be optimized
......
......@@ -48,4 +48,8 @@ extern void del_node_map (void);
extern void compute_working_sets (void);
/* In predict.c. */
extern gcov_type get_hot_bb_threshold (void);
extern void set_hot_bb_threshold (gcov_type);
#endif /* PROFILE_H */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment