Commit 517048ce by Jan Hubicka Committed by Jan Hubicka

cgraphclones.c (cgraph_edge::clone): Rename gcov_count to prof_count.


	* cgraphclones.c (cgraph_edge::clone): Rename gcov_count to prof_count.
	(cgraph_edge::clone): Cleanup updating of profile.
	* ipa-cp.c (update_profiling_info): Likewise.
	* ipa-inline-transform.c (inline_transform): Likewise.
	* ipa-inline.c (inline_small_functions): Add missing space to dump.
	* ipa-split.c (execute_split_functions): Do not split when function
	is cold.
	* predict.c (estimate_bb_frequencies): Cleanup updating of profile.
	* profile-count.c (profile_count::dump): Add global0.
	(profile_count::to_cgraph_frequency): Do not ICE when entry is
	undefined.
	(profile_count::to_sreal_scale): Likewise.
	(profile_count::adjust_for_ipa_scaling): Fix typo in comment.
	(profile_count::combine_with_ipa_count): New function.
	* profile-count.h (profile_guessed_global0adjusted): New.
	(profile_count::adjusted_zero): New.
	(profile_count::global0adjusted): New.
	(profile_count::combine_with_ipa_count): New.
	* tree-inline.c (copy_edges_for_bb): Add NUM/DEN arugment;
	correct profile of return block of split functions.
	(copy_cfg_body): Remove unused profile_count.
	(copy_body): Likewise.
	(expand_call_inline): Update.
	(tree_function_versioning): Update.

From-SVN: r254919
parent 2563a16d
2017-11-18 Jan Hubicka <hubicka@ucw.cz>
* cgraphclones.c (cgraph_edge::clone): Rename gcov_count to prof_count.
(cgraph_edge::clone): Cleanup updating of profile.
* ipa-cp.c (update_profiling_info): Likewise.
* ipa-inline-transform.c (inline_transform): Likewise.
* ipa-inline.c (inline_small_functions): Add missing space to dump.
* ipa-split.c (execute_split_functions): Do not split when function
is cold.
* predict.c (estimate_bb_frequencies): Cleanup updating of profile.
* profile-count.c (profile_count::dump): Add global0.
(profile_count::to_cgraph_frequency): Do not ICE when entry is
undefined.
(profile_count::to_sreal_scale): Likewise.
(profile_count::adjust_for_ipa_scaling): Fix typo in comment.
(profile_count::combine_with_ipa_count): New function.
* profile-count.h (profile_guessed_global0adjusted): New.
(profile_count::adjusted_zero): New.
(profile_count::global0adjusted): New.
(profile_count::combine_with_ipa_count): New.
* tree-inline.c (copy_edges_for_bb): Add NUM/DEN arugment;
correct profile of return block of split functions.
(copy_cfg_body): Remove unused profile_count.
(copy_body): Likewise.
(expand_call_inline): Update.
(tree_function_versioning): Update.
2017-11-18 Aldy Hernandez <aldyh@redhat.com>
* hash-set.h (hash_set::empty): New.
......@@ -91,7 +91,7 @@ cgraph_edge::clone (cgraph_node *n, gcall *call_stmt, unsigned stmt_uid,
{
cgraph_edge *new_edge;
profile_count::adjust_for_ipa_scaling (&num, &den);
profile_count gcov_count = count.apply_scale (num, den);
profile_count prof_count = count.apply_scale (num, den);
if (indirect_unknown_callee)
{
......@@ -104,19 +104,19 @@ cgraph_edge::clone (cgraph_node *n, gcall *call_stmt, unsigned stmt_uid,
{
cgraph_node *callee = cgraph_node::get (decl);
gcc_checking_assert (callee);
new_edge = n->create_edge (callee, call_stmt, gcov_count);
new_edge = n->create_edge (callee, call_stmt, prof_count);
}
else
{
new_edge = n->create_indirect_edge (call_stmt,
indirect_info->ecf_flags,
gcov_count, false);
prof_count, false);
*new_edge->indirect_info = *indirect_info;
}
}
else
{
new_edge = n->create_edge (callee, call_stmt, gcov_count);
new_edge = n->create_edge (callee, call_stmt, prof_count);
if (indirect_info)
{
new_edge->indirect_info
......@@ -135,12 +135,9 @@ cgraph_edge::clone (cgraph_node *n, gcall *call_stmt, unsigned stmt_uid,
new_edge->in_polymorphic_cdtor = in_polymorphic_cdtor;
/* Update IPA profile. Local profiles need no updating in original. */
if (update_original
&& count.ipa () == count && new_edge->count.ipa () == new_edge->count)
count -= new_edge->count;
else if (caller->count.global0 () == caller->count
&& !(count == profile_count::zero ()))
count = count.global0 ();
if (update_original)
count = count.combine_with_ipa_count (count.ipa ()
- new_edge->count.ipa ());
symtab->call_edge_duplication_hooks (this, new_edge);
return new_edge;
}
......@@ -431,22 +428,12 @@ cgraph_node::create_clone (tree new_decl, profile_count prof_count,
if (new_inlined_to)
dump_callgraph_transformation (this, new_inlined_to, "inlining to");
if (prof_count == profile_count::zero ()
&& !(count == profile_count::zero ()))
prof_count = count.global0 ();
prof_count = count.combine_with_ipa_count (prof_count);
new_node->count = prof_count;
/* Update IPA profile. Local profiles need no updating in original. */
if (update_original && !(count == profile_count::zero ())
&& count.ipa () == count && prof_count.ipa () == prof_count)
{
if (count.nonzero_p ()
&& !(count - prof_count).nonzero_p ())
count = count.global0 ();
else
count -= prof_count;
}
if (update_original)
count = count.combine_with_ipa_count (count.ipa () - prof_count.ipa ());
new_node->decl = new_decl;
new_node->register_symbol ();
new_node->origin = origin;
......
......@@ -3695,18 +3695,9 @@ update_profiling_info (struct cgraph_node *orig_node,
}
}
if (!new_sum.nonzero_p ())
{
new_sum = new_sum.global0 ();
new_node->count = new_sum;
remainder = orig_node->count;
}
else
{
remainder = orig_node_count - new_sum;
if (!remainder.nonzero_p ())
remainder = orig_node_count.global0 ();
}
remainder = orig_node_count.combine_with_ipa_count (orig_node_count.ipa ()
- new_sum.ipa ());
new_sum = orig_node_count.combine_with_ipa_count (new_sum);
orig_node->count = remainder;
for (cs = new_node->callees; cs; cs = cs->next_callee)
......
......@@ -657,11 +657,10 @@ inline_transform (struct cgraph_node *node)
{
profile_count num = node->count;
profile_count den = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
bool scale = num.initialized_p () && den.ipa_p ()
&& (den.nonzero_p () || num == profile_count::zero ())
&& !(num == den.ipa ());
bool scale = num.initialized_p () && !(num == den);
if (scale)
{
profile_count::adjust_for_ipa_scaling (&num, &den);
if (dump_file)
{
fprintf (dump_file, "Applying count scale ");
......@@ -672,11 +671,12 @@ inline_transform (struct cgraph_node *node)
}
basic_block bb;
cfun->cfg->count_max = profile_count::uninitialized ();
FOR_ALL_BB_FN (bb, cfun)
if (num == profile_count::zero ())
bb->count = bb->count.global0 ();
else
{
bb->count = bb->count.apply_scale (num, den);
cfun->cfg->count_max = cfun->cfg->count_max.max (bb->count);
}
ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = node->count;
}
todo = optimize_inline_calls (current_function_decl);
......
......@@ -1873,7 +1873,8 @@ inline_small_functions (void)
/* Be sure that caches are maintained consistent.
This check is affected by scaling roundoff errors when compiling for
IPA this we skip it in that case. */
if (!edge->callee->count.ipa_p ())
if (!edge->callee->count.ipa_p ()
&& (!max_count.initialized_p () || !max_count.nonzero_p ()))
{
sreal cached_badness = edge_badness (edge, false);
......@@ -1951,7 +1952,7 @@ inline_small_functions (void)
{
fprintf (dump_file, " Called ");
edge->count.ipa ().dump (dump_file);
fprintf (dump_file, "times\n");
fprintf (dump_file, " times\n");
}
if (dump_flags & TDF_DETAILS)
edge_badness (edge, true);
......
......@@ -1752,6 +1752,12 @@ execute_split_functions (void)
fprintf (dump_file, "Not splitting: main function.\n");
return 0;
}
if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
{
if (dump_file)
fprintf (dump_file, "Not splitting: function is unlikely executed.\n");
return 0;
}
/* This can be relaxed; function might become inlinable after splitting
away the uninlinable part. */
if (ipa_fn_summaries
......
......@@ -3607,9 +3607,6 @@ estimate_bb_frequencies (bool force)
to outermost to examine frequencies for back edges. */
estimate_loops ();
bool global0 = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.initialized_p ()
&& ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa_p ();
freq_max = 0;
FOR_EACH_BB_FN (bb, cfun)
if (freq_max < BLOCK_INFO (bb)->frequency)
......@@ -3618,6 +3615,7 @@ estimate_bb_frequencies (bool force)
freq_max = real_bb_freq_max / freq_max;
if (freq_max < 16)
freq_max = 16;
profile_count ipa_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa ();
cfun->cfg->count_max = profile_count::uninitialized ();
FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
{
......@@ -3626,10 +3624,8 @@ estimate_bb_frequencies (bool force)
/* If we have profile feedback in which this function was never
executed, then preserve this info. */
if (global0)
bb->count = count.global0 ();
else if (!(bb->count == profile_count::zero ()))
bb->count = count.guessed_local ();
if (!(bb->count == profile_count::zero ()))
bb->count = count.guessed_local ().combine_with_ipa_count (ipa_count);
cfun->cfg->count_max = cfun->cfg->count_max.max (bb->count);
}
......
......@@ -47,6 +47,8 @@ profile_count::dump (FILE *f) const
fprintf (f, " (estimated locally)");
else if (m_quality == profile_guessed_global0)
fprintf (f, " (estimated locally, globally 0)");
else if (m_quality == profile_guessed_global0adjusted)
fprintf (f, " (estimated locally, globally 0 adjusted)");
else if (m_quality == profile_adjusted)
fprintf (f, " (adjusted)");
else if (m_quality == profile_afdo)
......@@ -245,7 +247,7 @@ profile_count::to_frequency (struct function *fun) const
int
profile_count::to_cgraph_frequency (profile_count entry_bb_count) const
{
if (!initialized_p ())
if (!initialized_p () || !entry_bb_count.initialized_p ())
return CGRAPH_FREQ_BASE;
if (*this == profile_count::zero ())
return 0;
......@@ -262,7 +264,7 @@ profile_count::to_cgraph_frequency (profile_count entry_bb_count) const
sreal
profile_count::to_sreal_scale (profile_count in, bool *known) const
{
if (!initialized_p ())
if (!initialized_p () || !in.initialized_p ())
{
if (known)
*known = false;
......@@ -272,7 +274,6 @@ profile_count::to_sreal_scale (profile_count in, bool *known) const
*known = true;
if (*this == profile_count::zero ())
return 0;
gcc_checking_assert (in.initialized_p ());
if (!in.m_val)
{
......@@ -297,7 +298,7 @@ profile_count::adjust_for_ipa_scaling (profile_count *num,
/* Scaling is no-op if NUM and DEN are the same. */
if (*num == *den)
return;
/* Scaling to zero is always zeor. */
/* Scaling to zero is always zero. */
if (*num == profile_count::zero ())
return;
/* If den is non-zero we are safe. */
......@@ -308,3 +309,21 @@ profile_count::adjust_for_ipa_scaling (profile_count *num,
*den = den->force_nonzero ();
*num = num->force_nonzero ();
}
/* THIS is a count of bb which is known to be executed IPA times.
Combine this information into bb counter. This means returning IPA
if it is nonzero, not changing anything if IPA is uninitialized
and if IPA is zero, turning THIS into corresponding local profile with
global0. */
profile_count
profile_count::combine_with_ipa_count (profile_count ipa)
{
ipa = ipa.ipa ();
if (ipa.nonzero_p ())
return ipa;
if (!ipa.initialized_p ())
return *this;
if (ipa == profile_count::zero ())
return this->global0 ();
return this->global0adjusted ();
}
......@@ -36,22 +36,24 @@ enum profile_quality {
Never used by probabilities. */
profile_guessed_global0 = 1,
/* Same as profile_guessed_global0 but global count is adjusted 0. */
profile_guessed_global0adjusted = 2,
/* Profile is based on static branch prediction heuristics. It may or may
not reflect the reality but it can be compared interprocedurally
(for example, we inlined function w/o profile feedback into function
with feedback and propagated from that).
Never used by probablities. */
profile_guessed = 2,
profile_guessed = 3,
/* Profile was determined by autofdo. */
profile_afdo = 3,
profile_afdo = 4,
/* Profile was originally based on feedback but it was adjusted
by code duplicating optimization. It may not precisely reflect the
particular code path. */
profile_adjusted = 4,
profile_adjusted = 5,
/* Profile was read from profile feedback or determined by accurate static
method. */
profile_precise = 5
profile_precise = 7
};
/* The base value for branch probability notes and edge probabilities. */
......@@ -637,6 +639,13 @@ public:
{
return from_gcov_type (0);
}
static profile_count adjusted_zero ()
{
profile_count c;
c.m_val = 0;
c.m_quality = profile_adjusted;
return c;
}
static profile_count guessed_zero ()
{
profile_count c;
......@@ -978,7 +987,7 @@ public:
return ret;
}
/* We know that profile is globally0 but keep local profile if present. */
/* We know that profile is globally 0 but keep local profile if present. */
profile_count global0 () const
{
profile_count ret = *this;
......@@ -988,6 +997,17 @@ public:
return ret;
}
/* We know that profile is globally adjusted 0 but keep local profile
if present. */
profile_count global0adjusted () const
{
profile_count ret = *this;
if (!initialized_p ())
return *this;
ret.m_quality = profile_guessed_global0adjusted;
return ret;
}
/* Return THIS with quality dropped to GUESSED. */
profile_count guessed () const
{
......@@ -1000,10 +1020,12 @@ public:
acorss functions. */
profile_count ipa () const
{
if (m_quality > profile_guessed_global0)
if (m_quality > profile_guessed_global0adjusted)
return *this;
if (m_quality == profile_guessed_global0)
return profile_count::zero ();
if (m_quality == profile_guessed_global0adjusted)
return profile_count::adjusted_zero ();
return profile_count::uninitialized ();
}
......@@ -1054,6 +1076,13 @@ public:
kinds. */
static void adjust_for_ipa_scaling (profile_count *num, profile_count *den);
/* THIS is a count of bb which is known to be executed IPA times.
Combine this information into bb counter. This means returning IPA
if it is nonzero, not changing anything if IPA is uninitialized
and if IPA is zero, turning THIS into corresponding local profile with
global0. */
profile_count combine_with_ipa_count (profile_count ipa);
/* LTO streaming support. */
static profile_count stream_in (struct lto_input_block *);
void stream_out (struct output_block *);
......
......@@ -2173,7 +2173,7 @@ update_ssa_across_abnormal_edges (basic_block bb, basic_block ret_bb,
debug stmts are left after a statement that must end the basic block. */
static bool
copy_edges_for_bb (basic_block bb,
copy_edges_for_bb (basic_block bb, profile_count num, profile_count den,
basic_block ret_bb, basic_block abnormal_goto_dest)
{
basic_block new_bb = (basic_block) bb->aux;
......@@ -2204,6 +2204,14 @@ copy_edges_for_bb (basic_block bb,
if (bb->index == ENTRY_BLOCK || bb->index == EXIT_BLOCK)
return false;
/* When doing function splitting, we must decreate count of the return block
which was previously reachable by block we did not copy. */
if (single_succ_p (bb) && single_succ_edge (bb)->dest->index == EXIT_BLOCK)
FOR_EACH_EDGE (old_edge, ei, bb->preds)
if (old_edge->src->index != ENTRY_BLOCK
&& !old_edge->src->aux)
new_bb->count -= old_edge->count ().apply_scale (num, den);
for (si = gsi_start_bb (new_bb); !gsi_end_p (si);)
{
gimple *copy_stmt;
......@@ -2465,23 +2473,16 @@ initialize_cfun (tree new_fndecl, tree callee_fndecl, profile_count count)
profile_status_for_fn (cfun) = profile_status_for_fn (src_cfun);
if (ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count.initialized_p ()
&& count.ipa ().initialized_p ())
{
ENTRY_BLOCK_PTR_FOR_FN (cfun)->count =
ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count.apply_scale (count,
ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count);
EXIT_BLOCK_PTR_FOR_FN (cfun)->count =
EXIT_BLOCK_PTR_FOR_FN (src_cfun)->count.apply_scale (count,
ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count);
}
else
{
ENTRY_BLOCK_PTR_FOR_FN (cfun)->count
= ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count;
EXIT_BLOCK_PTR_FOR_FN (cfun)->count
= EXIT_BLOCK_PTR_FOR_FN (src_cfun)->count;
}
profile_count num = count;
profile_count den = ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count;
profile_count::adjust_for_ipa_scaling (&num, &den);
ENTRY_BLOCK_PTR_FOR_FN (cfun)->count =
ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count.apply_scale (count,
ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count);
EXIT_BLOCK_PTR_FOR_FN (cfun)->count =
EXIT_BLOCK_PTR_FOR_FN (src_cfun)->count.apply_scale (count,
ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count);
if (src_cfun->eh)
init_eh_for_function ();
......@@ -2642,7 +2643,7 @@ redirect_all_calls (copy_body_data * id, basic_block bb)
another function. Walks FN via CFG, returns new fndecl. */
static tree
copy_cfg_body (copy_body_data * id, profile_count,
copy_cfg_body (copy_body_data * id,
basic_block entry_block_map, basic_block exit_block_map,
basic_block new_entry)
{
......@@ -2723,12 +2724,13 @@ copy_cfg_body (copy_body_data * id, profile_count,
FOR_ALL_BB_FN (bb, cfun_to_copy)
if (!id->blocks_to_copy
|| (bb->index > 0 && bitmap_bit_p (id->blocks_to_copy, bb->index)))
need_debug_cleanup |= copy_edges_for_bb (bb, exit_block_map,
need_debug_cleanup |= copy_edges_for_bb (bb, num, den, exit_block_map,
abnormal_goto_dest);
if (new_entry)
{
edge e = make_edge (entry_block_map, (basic_block)new_entry->aux, EDGE_FALLTHRU);
edge e = make_edge (entry_block_map, (basic_block)new_entry->aux,
EDGE_FALLTHRU);
e->probability = profile_probability::always ();
}
......@@ -2927,7 +2929,7 @@ copy_tree_body (copy_body_data *id)
another function. */
static tree
copy_body (copy_body_data *id, profile_count count,
copy_body (copy_body_data *id,
basic_block entry_block_map, basic_block exit_block_map,
basic_block new_entry)
{
......@@ -2936,7 +2938,7 @@ copy_body (copy_body_data *id, profile_count count,
/* If this body has a CFG, walk CFG and copy. */
gcc_assert (ENTRY_BLOCK_PTR_FOR_FN (DECL_STRUCT_FUNCTION (fndecl)));
body = copy_cfg_body (id, count, entry_block_map, exit_block_map,
body = copy_cfg_body (id, entry_block_map, exit_block_map,
new_entry);
copy_debug_stmts (id);
......@@ -4684,8 +4686,7 @@ expand_call_inline (basic_block bb, gimple *stmt, copy_body_data *id)
function in any way before this point, as this CALL_EXPR may be
a self-referential call; if we're calling ourselves, we need to
duplicate our body before altering anything. */
copy_body (id, cg_edge->callee->count,
bb, return_block, NULL);
copy_body (id, bb, return_block, NULL);
reset_debug_bindings (id, stmt_gsi);
......@@ -5857,7 +5858,7 @@ tree_function_versioning (tree old_decl, tree new_decl,
DECL_RESULT (new_decl) = DECL_RESULT (old_decl);
DECL_ARGUMENTS (new_decl) = DECL_ARGUMENTS (old_decl);
initialize_cfun (new_decl, old_decl,
old_entry_block->count);
new_entry ? new_entry->count : old_entry_block->count);
if (DECL_STRUCT_FUNCTION (new_decl)->gimple_df)
DECL_STRUCT_FUNCTION (new_decl)->gimple_df->ipa_pta
= id.src_cfun->gimple_df->ipa_pta;
......@@ -6004,8 +6005,7 @@ tree_function_versioning (tree old_decl, tree new_decl,
}
/* Copy the Function's body. */
copy_body (&id, old_entry_block->count,
ENTRY_BLOCK_PTR_FOR_FN (cfun), EXIT_BLOCK_PTR_FOR_FN (cfun),
copy_body (&id, ENTRY_BLOCK_PTR_FOR_FN (cfun), EXIT_BLOCK_PTR_FOR_FN (cfun),
new_entry);
/* Renumber the lexical scoping (non-code) blocks consecutively. */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment