Commit 34fbe3f0 by Jan Hubicka Committed by Jan Hubicka

cgraphclones.c (localize_profile): New function.


	* cgraphclones.c (localize_profile): New function.
	(cgraph_node::create_clone): Use it for partial profiles.
	* common.opt (fprofile-partial-training): New flag.
	* doc/invoke.texi (-fprofile-partial-training): Document.
	* ipa-cp.c (update_profiling_info): For partial profiles do not
	set function profile to zero.
	* profile.c (compute_branch_probabilities): With partial profile
	watch if edge count is zero and turn all probabilities to guessed.
	(compute_branch_probabilities): For partial profiles do not apply
	profile when entry count is zero.
	* tree-profile.c (tree_profiling): Only do value_profile_transformations
	when profile is read.

From-SVN: r279013
parent 8575d592
2019-12-05 Jan Hubicka <hubicka@ucw.cz>
* cgraphclones.c (localize_profile): New function.
(cgraph_node::create_clone): Use it for partial profiles.
* common.opt (fprofile-partial-training): New flag.
* doc/invoke.texi (-fprofile-partial-training): Document.
* ipa-cp.c (update_profiling_info): For partial profiles do not
set function profile to zero.
* profile.c (compute_branch_probabilities): With partial profile
watch if edge count is zero and turn all probabilities to guessed.
(compute_branch_probabilities): For partial profiles do not apply
profile when entry count is zero.
* tree-profile.c (tree_profiling): Only do value_profile_transformations
when profile is read.
2019-12-05 Sudakshina Das <sudi.das@arm.com>
* tree-vect-loop.c (vect_model_reduction_cost): Remove reduction_type
......@@ -307,6 +307,22 @@ dump_callgraph_transformation (const cgraph_node *original,
}
}
/* Turn profile of N to local profile. */
static void
localize_profile (cgraph_node *n)
{
n->count = n->count.guessed_local ();
for (cgraph_edge *e = n->callees; e; e=e->next_callee)
{
e->count = e->count.guessed_local ();
if (!e->inline_failed)
localize_profile (e->callee);
}
for (cgraph_edge *e = n->indirect_calls; e; e=e->next_callee)
e->count = e->count.guessed_local ();
}
/* Create node representing clone of N executed COUNT times. Decrease
the execution counts from original node too.
The new clone will have decl set to DECL that may or may not be the same
......@@ -340,6 +356,7 @@ cgraph_node::create_clone (tree new_decl, profile_count prof_count,
cgraph_edge *e;
unsigned i;
profile_count old_count = count;
bool nonzero = count.ipa ().nonzero_p ();
if (new_inlined_to)
dump_callgraph_transformation (this, new_inlined_to, "inlining to");
......@@ -426,6 +443,15 @@ cgraph_node::create_clone (tree new_decl, profile_count prof_count,
if (call_duplication_hook)
symtab->call_cgraph_duplication_hooks (this, new_node);
/* With partial train run we do not want to assume that original's
count is zero whenever we redurect all executed edges to clone.
Simply drop profile to local one in this case. */
if (update_original
&& opt_for_fn (decl, flag_profile_partial_training)
&& nonzero
&& count.ipa_p ()
&& !count.ipa ().nonzero_p ())
localize_profile (this);
if (!new_inlined_to)
dump_callgraph_transformation (this, new_node, suffix);
......
......@@ -2160,6 +2160,10 @@ fprofile-generate=
Common Joined RejectNegative
Enable common options for generating profile info for profile feedback directed optimizations, and set -fprofile-dir=.
fprofile-partial-training
Common Report Var(flag_profile_partial_training) Optimization
Do not assume that functions never executed during the train run are cold
fprofile-use
Common Var(flag_profile_use)
Enable common options for performing profile feedback directed optimizations.
......
......@@ -453,8 +453,8 @@ Objective-C and Objective-C++ Dialects}.
-fpartial-inlining -fpeel-loops -fpredictive-commoning @gol
-fprefetch-loop-arrays @gol
-fprofile-correction @gol
-fprofile-use -fprofile-use=@var{path} -fprofile-values @gol
-fprofile-reorder-functions @gol
-fprofile-use -fprofile-use=@var{path} -fprofile-partial-training @gol
-fprofile-values -fprofile-reorder-functions @gol
-freciprocal-math -free -frename-registers -freorder-blocks @gol
-freorder-blocks-algorithm=@var{algorithm} @gol
-freorder-blocks-and-partition -freorder-functions @gol
......@@ -10634,6 +10634,19 @@ default, GCC emits an error message when an inconsistent profile is detected.
This option is enabled by @option{-fauto-profile}.
@item -fprofile-partial-training
@opindex fprofile-use
With @code{-fprofile-use} all portions of programs not executed during train
run are optimized agressively for size rather than speed. In some cases it is
not practical to train all possible hot paths in the program. (For
example, program may contain functions specific for a given hardware and
trianing may not cover all hardware configurations program is run on.) With
@code{-fprofile-partial-training} profile feedback will be ignored for all
functions not executed during the train run leading them to be optimized as if
they were compiled without profile feedback. This leads to better performance
when train run is not representative but also leads to significantly bigger
code.
@item -fprofile-use
@itemx -fprofile-use=@var{path}
@opindex fprofile-use
......@@ -4295,6 +4295,15 @@ update_profiling_info (struct cgraph_node *orig_node,
remainder = orig_node_count.combine_with_ipa_count (orig_node_count.ipa ()
- new_sum.ipa ());
/* With partial train run we do not want to assume that original's
count is zero whenever we redurect all executed edges to clone.
Simply drop profile to local one in this case. */
if (remainder.ipa_p () && !remainder.ipa ().nonzero_p ()
&& orig_node->count.ipa_p () && orig_node->count.ipa ().nonzero_p ()
&& flag_profile_partial_training)
remainder = remainder.guessed_local ();
new_sum = orig_node_count.combine_with_ipa_count (new_sum);
new_node->count = new_sum;
orig_node->count = remainder;
......
......@@ -635,9 +635,20 @@ compute_branch_probabilities (unsigned cfg_checksum, unsigned lineno_checksum)
}
if (bb_gcov_count (bb))
{
bool set_to_guessed = false;
FOR_EACH_EDGE (e, ei, bb->succs)
e->probability = profile_probability::probability_in_gcov_type
(edge_gcov_count (e), bb_gcov_count (bb));
{
bool prev_never = e->probability == profile_probability::never ();
e->probability = profile_probability::probability_in_gcov_type
(edge_gcov_count (e), bb_gcov_count (bb));
if (e->probability == profile_probability::never ()
&& !prev_never
&& flag_profile_partial_training)
set_to_guessed = true;
}
if (set_to_guessed)
FOR_EACH_EDGE (e, ei, bb->succs)
e->probability = e->probability.guessed ();
if (bb->index >= NUM_FIXED_BLOCKS
&& block_ends_with_condjump_p (bb)
&& EDGE_COUNT (bb->succs) >= 2)
......@@ -697,17 +708,23 @@ compute_branch_probabilities (unsigned cfg_checksum, unsigned lineno_checksum)
}
}
if (exec_counts)
if (exec_counts
&& (bb_gcov_count (ENTRY_BLOCK_PTR_FOR_FN (cfun))
|| !flag_profile_partial_training))
profile_status_for_fn (cfun) = PROFILE_READ;
/* If we have real data, use them! */
if (bb_gcov_count (ENTRY_BLOCK_PTR_FOR_FN (cfun))
|| !flag_guess_branch_prob)
FOR_ALL_BB_FN (bb, cfun)
bb->count = profile_count::from_gcov_type (bb_gcov_count (bb));
if (bb_gcov_count (bb) || !flag_profile_partial_training)
bb->count = profile_count::from_gcov_type (bb_gcov_count (bb));
else
bb->count = profile_count::guessed_zero ();
/* If function was not trained, preserve local estimates including statically
determined zero counts. */
else if (profile_status_for_fn (cfun) == PROFILE_READ)
else if (profile_status_for_fn (cfun) == PROFILE_READ
&& !flag_profile_partial_training)
FOR_ALL_BB_FN (bb, cfun)
if (!(bb->count == profile_count::zero ()))
bb->count = bb->count.global0 ();
......@@ -1417,7 +1434,7 @@ branch_prob (bool thunk)
/* At this moment we have precise loop iteration count estimates.
Record them to loop structure before the profile gets out of date. */
FOR_EACH_LOOP (loop, 0)
if (loop->header->count > 0)
if (loop->header->count > 0 && loop->header->count.reliable_p ())
{
gcov_type nit = expected_loop_iterations_unbounded (loop);
widest_int bound = gcov_type_to_wide_int (nit);
......
......@@ -785,7 +785,8 @@ tree_profiling (void)
if (flag_branch_probabilities
&& !thunk
&& flag_profile_values
&& flag_value_profile_transformations)
&& flag_value_profile_transformations
&& profile_status_for_fn (cfun) == PROFILE_READ)
gimple_value_profile_transformations ();
/* The above could hose dominator info. Currently there is
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment