Commit b49d29d7 by Jan Hubicka Committed by Jan Hubicka

Fix profile adjusments while cloning

This patch fixes profile updates while cloning.  When new clone is produced
its global profile is subtracted from the original function.  If the original
function profile drops to 0 we want to switch from global profiles to global0
profiles which is implemented by combine_with_ipa_count_within.

However this is done on all edges independnetly and it may happen that we end
up combining global and globa0 profiles in one functions which is not a good
idea.

This implements profile_count::combine_with_ipa_count_within which is able
to take into account that the counter is inside function with a given count.

	* profile-count.h (profile_count::combine_with_ipa_count_within):
	Declare.
	* profile-count.c (profile_count::combine_with_ipa_count_within):
	New.
	* cgraphclones.c (cgraph_edge::clone, cgraph_node::create_clone): Use
	it.

From-SVN: r278810
parent eb081fd0
2019-11-28 Jan Hubicka <hubicka@ucw.cz>
* profile-count.h (profile_count::combine_with_ipa_count_within):
Declare.
* profile-count.c (profile_count::combine_with_ipa_count_within):
New.
* cgraphclones.c (cgraph_edge::clone, cgraph_node::create_clone): Use
it.
2019-11-28 Jan Hubicka <hubicka@ucw.cz>
* ipa-utils.c (ipa_merge_profiles): Be sure that all type transtions
of counters are done same way.
......@@ -80,6 +80,11 @@ along with GCC; see the file COPYING3. If not see
#include "tree-inline.h"
#include "dumpfile.h"
#include "gimple-pretty-print.h"
#include "alloc-pool.h"
#include "symbol-summary.h"
#include "tree-vrp.h"
#include "ipa-prop.h"
#include "ipa-fnsummary.h"
/* Create clone of edge in the node N represented by CALL_EXPR
the callgraph. */
......@@ -136,8 +141,9 @@ cgraph_edge::clone (cgraph_node *n, gcall *call_stmt, unsigned stmt_uid,
/* Update IPA profile. Local profiles need no updating in original. */
if (update_original)
count = count.combine_with_ipa_count (count.ipa ()
- new_edge->count.ipa ());
count = count.combine_with_ipa_count_within (count.ipa ()
- new_edge->count.ipa (),
caller->count);
symtab->call_edge_duplication_hooks (this, new_edge);
return new_edge;
}
......@@ -268,6 +274,8 @@ cgraph_node::expand_all_artificial_thunks ()
thunk->thunk.thunk_p = false;
thunk->analyze ();
}
ipa_analyze_node (thunk);
inline_analyze_function (thunk);
thunk->expand_all_artificial_thunks ();
}
else
......@@ -341,7 +349,14 @@ cgraph_node::create_clone (tree new_decl, profile_count prof_count,
/* Update IPA profile. Local profiles need no updating in original. */
if (update_original)
count = count.combine_with_ipa_count (count.ipa () - prof_count.ipa ());
{
if (inlined_to)
count = count.combine_with_ipa_count_within (count.ipa ()
- prof_count.ipa (),
inlined_to->count);
else
count = count.combine_with_ipa_count (count.ipa () - prof_count.ipa ());
}
new_node->decl = new_decl;
new_node->register_symbol ();
new_node->origin = origin;
......
......@@ -32,6 +32,7 @@ along with GCC; see the file COPYING3. If not see
#include "cgraph.h"
#include "wide-int.h"
#include "sreal.h"
#include "selftest.h"
/* Names from profile_quality enum values. */
......@@ -291,6 +292,7 @@ profile_count::to_cgraph_frequency (profile_count entry_bb_count) const
return 0;
gcc_checking_assert (entry_bb_count.initialized_p ());
uint64_t scale;
gcc_checking_assert (compatible_p (entry_bb_count));
if (!safe_scale_64bit (!entry_bb_count.m_val ? m_val + 1 : m_val,
CGRAPH_FREQ_BASE, MAX (1, entry_bb_count.m_val), &scale))
return CGRAPH_FREQ_MAX;
......@@ -328,6 +330,7 @@ profile_count::to_sreal_scale (profile_count in, bool *known) const
return 0;
if (m_val == in.m_val)
return 1;
gcc_checking_assert (compatible_p (in));
if (!in.m_val)
{
......@@ -373,6 +376,8 @@ profile_count::adjust_for_ipa_scaling (profile_count *num,
profile_count
profile_count::combine_with_ipa_count (profile_count ipa)
{
if (!initialized_p ())
return *this;
ipa = ipa.ipa ();
if (ipa.nonzero_p ())
return ipa;
......@@ -383,6 +388,23 @@ profile_count::combine_with_ipa_count (profile_count ipa)
return this->global0adjusted ();
}
/* Sae as profile_count::combine_with_ipa_count but within function with count
IPA2. */
profile_count
profile_count::combine_with_ipa_count_within (profile_count ipa,
profile_count ipa2)
{
profile_count ret;
if (!initialized_p ())
return *this;
if (ipa2.ipa () == ipa2 && ipa.initialized_p ())
ret = ipa;
else
ret = combine_with_ipa_count (ipa);
gcc_checking_assert (ret.compatible_p (ipa2));
return ret;
}
/* The profiling runtime uses gcov_type, which is usually 64bit integer.
Conversions back and forth are used to read the coverage and get it
into internal representation. */
......@@ -425,3 +447,65 @@ profile_probability::combine_with_count (profile_count count1,
else
return *this * even () + other * even ();
}
#if CHECKING_P
namespace selftest {
/* Verify non-trivial type conversions for IPA scaling. This happens often
during inlining. */
static void
profile_count_verify_ipa_scaling (void)
{
profile_count cnt1 = profile_count::from_gcov_type (4).global0 ();
profile_count cnt2 = profile_count::from_gcov_type (2);
profile_count cnt3 = profile_count::from_gcov_type (8);
profile_count cnt4 = cnt3.apply_scale (cnt1, cnt2);
/* Result should be 16 with GUESSED_GLOBAL0. */
ASSERT_EQ (cnt4.ipa (), profile_count::zero ());
ASSERT_EQ (cnt4.to_gcov_type (), 16);
cnt1 = profile_count::from_gcov_type (4).global0adjusted ();
cnt4 = cnt3.apply_scale (cnt1, cnt2);
/* Result should be 16 with GUESSED_GLOBAL0_ADJUSTED. */
ASSERT_EQ (cnt4.ipa (), profile_count::adjusted_zero ());
ASSERT_EQ (cnt4.to_gcov_type (), 16);
}
/* Verify non-trivial cases of sreal scale calculations. */
static void
profile_count_verify_to_sreal_scale (void)
{
profile_count cnt1 = profile_count::from_gcov_type (4).global0 ();
profile_count cnt2 = profile_count::from_gcov_type (8);
/* If count is globally 0 it should have 0 scale in non-zero global count. */
ASSERT_EQ (cnt1.to_sreal_scale (cnt2), 0);
}
/* Verify non-trivial cases of probability_in calculations. */
static void
profile_count_verify_probability_in (void)
{
/*profile_count cnt1 = profile_count::from_gcov_type (4).global0 ();
profile_count cnt2 = profile_count::from_gcov_type (8);*/
/* If count is globally 0 it should have 0 probability in non-zero global
count. */
/*ASSERT_EQ (cnt1.probability_in (cnt2), profile_probability::never ());*/
}
/* Run all of the selftests within this file. */
void profile_count_c_tests (void)
{
profile_count_verify_ipa_scaling ();
profile_count_verify_to_sreal_scale ();
profile_count_verify_probability_in ();
}
}
#endif
......@@ -700,6 +700,7 @@ private:
uint64_t UINT64_BIT_FIELD_ALIGN m_val : n_bits;
#undef UINT64_BIT_FIELD_ALIGN
enum profile_quality m_quality : 3;
public:
/* Return true if both values can meaningfully appear in single function
body. We have either all counters in function local or global, otherwise
......@@ -711,9 +712,18 @@ private:
if (*this == zero ()
|| other == zero ())
return true;
/* Do not allow nonzero global profile together with local guesses
that are globally0. */
if (ipa ().nonzero_p ()
&& !(other.ipa () == other))
return false;
if (other.ipa ().nonzero_p ()
&& !(ipa () == *this))
return false;
return ipa_p () == other.ipa_p ();
}
public:
/* Used for counters which are expected to be never executed. */
static profile_count zero ()
{
......@@ -992,6 +1002,14 @@ public:
profile_count max (profile_count other) const
{
profile_count val = *this;
/* Always prefer nonzero IPA counts over local counts. */
if (ipa ().nonzero_p () || other.ipa ().nonzero_p ())
{
val = ipa ();
other = other.ipa ();
}
if (!initialized_p ())
return other;
if (!other.initialized_p ())
......@@ -1001,8 +1019,8 @@ public:
if (other == zero ())
return *this;
gcc_checking_assert (compatible_p (other));
if (m_val < other.m_val || (m_val == other.m_val
&& m_quality < other.m_quality))
if (val.m_val < other.m_val || (m_val == other.m_val
&& val.m_quality < other.m_quality))
return other;
return *this;
}
......@@ -1061,6 +1079,7 @@ public:
{
if (*this == zero ())
return *this;
if (num == zero ())
return num;
if (!initialized_p () || !num.initialized_p () || !den.initialized_p ())
......@@ -1075,7 +1094,9 @@ public:
ret.m_val = MIN (val, max_count);
ret.m_quality = MIN (MIN (MIN (m_quality, ADJUSTED),
num.m_quality), den.m_quality);
if (num.ipa_p () && !ret.ipa_p ())
/* Be sure that ret is not local or global0 type
if num is global. */
if (num.ipa_p () && (!ret.ipa_p () || !(ret.ipa () == ret)))
ret.m_quality = MIN (num.m_quality, GUESSED);
return ret;
}
......@@ -1153,8 +1174,8 @@ public:
if (*this == overall && m_quality == PRECISE)
return profile_probability::always ();
profile_probability ret;
gcc_checking_assert (compatible_p (overall));
gcc_checking_assert (compatible_p (overall));
if (overall.m_val < m_val)
{
ret.m_val = profile_probability::max_probability;
......@@ -1194,6 +1215,10 @@ public:
global0. */
profile_count combine_with_ipa_count (profile_count ipa);
/* Same as combine_with_ipa_count but inside function with count IPA2. */
profile_count combine_with_ipa_count_within
(profile_count ipa, profile_count ipa2);
/* The profiling runtime uses gcov_type, which is usually 64bit integer.
Conversions back and forth are used to read the coverage and get it
into internal representation. */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment