Commit 650fe732 by Jan Hubicka Committed by Jan Hubicka

predict.c (determine_unlikely_bbs): Set cgraph node count to 0 when entry block…

predict.c (determine_unlikely_bbs): Set cgraph node count to 0 when entry block was promoted unlikely.


	* predict.c (determine_unlikely_bbs): Set cgraph node count to 0
	when entry block was promoted unlikely.
	(estimate_bb_frequencies): Increase frequency scale.
	* profile-count.h (profile_count): Export precision info.
	* gcc.dg/tree-ssa/dump-2.c: Fixup template for profile precision
	changes.
	* gcc.dg/tree-ssa/pr77445-2.c: Fixup template for profile precision
	changes.

From-SVN: r254888
parent db16c184
2017-11-17 Jan Hubicka <hubicka@ucw.cz> 2017-11-17 Jan Hubicka <hubicka@ucw.cz>
* predict.c (determine_unlikely_bbs): Set cgraph node count to 0
when entry block was promoted unlikely.
(estimate_bb_frequencies): Increase frequency scale.
* profile-count.h (profile_count): Export precision info.
2017-11-17 Jan Hubicka <hubicka@ucw.cz>
* tree-tailcall.c (eliminate_tail_call): Be more careful about not * tree-tailcall.c (eliminate_tail_call): Be more careful about not
disturbin profile of entry block. disturbin profile of entry block.
...@@ -3542,6 +3542,8 @@ determine_unlikely_bbs () ...@@ -3542,6 +3542,8 @@ determine_unlikely_bbs ()
bb->index, e->dest->index); bb->index, e->dest->index);
e->probability = profile_probability::never (); e->probability = profile_probability::never ();
} }
if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count == profile_count::zero ())
cgraph_node::get (current_function_decl)->count = profile_count::zero ();
} }
/* Estimate and propagate basic block frequencies using the given branch /* Estimate and propagate basic block frequencies using the given branch
...@@ -3565,7 +3567,11 @@ estimate_bb_frequencies (bool force) ...@@ -3565,7 +3567,11 @@ estimate_bb_frequencies (bool force)
{ {
real_values_initialized = 1; real_values_initialized = 1;
real_br_prob_base = REG_BR_PROB_BASE; real_br_prob_base = REG_BR_PROB_BASE;
real_bb_freq_max = BB_FREQ_MAX; /* Scaling frequencies up to maximal profile count may result in
frequent overflows especially when inlining loops.
Small scalling results in unnecesary precision loss. Stay in
the half of the (exponential) range. */
real_bb_freq_max = (uint64_t)1 << (profile_count::n_bits / 2);
real_one_half = sreal (1, -1); real_one_half = sreal (1, -1);
real_inv_br_prob_base = sreal (1) / real_br_prob_base; real_inv_br_prob_base = sreal (1) / real_br_prob_base;
real_almost_one = sreal (1) - real_inv_br_prob_base; real_almost_one = sreal (1) - real_inv_br_prob_base;
...@@ -3610,6 +3616,8 @@ estimate_bb_frequencies (bool force) ...@@ -3610,6 +3616,8 @@ estimate_bb_frequencies (bool force)
freq_max = BLOCK_INFO (bb)->frequency; freq_max = BLOCK_INFO (bb)->frequency;
freq_max = real_bb_freq_max / freq_max; freq_max = real_bb_freq_max / freq_max;
if (freq_max < 16)
freq_max = 16;
cfun->cfg->count_max = profile_count::uninitialized (); cfun->cfg->count_max = profile_count::uninitialized ();
FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb) FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
{ {
......
...@@ -605,11 +605,13 @@ class sreal; ...@@ -605,11 +605,13 @@ class sreal;
class GTY(()) profile_count class GTY(()) profile_count
{ {
public:
/* Use 62bit to hold basic block counters. Should be at least /* Use 62bit to hold basic block counters. Should be at least
64bit. Although a counter cannot be negative, we use a signed 64bit. Although a counter cannot be negative, we use a signed
type to hold various extra stages. */ type to hold various extra stages. */
static const int n_bits = 61; static const int n_bits = 61;
private:
static const uint64_t max_count = ((uint64_t) 1 << n_bits) - 2; static const uint64_t max_count = ((uint64_t) 1 << n_bits) - 2;
static const uint64_t uninitialized_count = ((uint64_t) 1 << n_bits) - 1; static const uint64_t uninitialized_count = ((uint64_t) 1 << n_bits) - 1;
......
2017-11-17 Jan Hubicka <hubicka@ucw.cz>
* gcc.dg/tree-ssa/dump-2.c: Fixup template for profile precision
changes.
* gcc.dg/tree-ssa/pr77445-2.c: Fixup template for profile precision
changes.
2017-11-17 Nathan Sidwell <nathan@acm.org> 2017-11-17 Nathan Sidwell <nathan@acm.org>
* g++.dg/pr82836.C: Fix for c++17. * g++.dg/pr82836.C: Fix for c++17.
......
...@@ -6,4 +6,4 @@ int f(void) ...@@ -6,4 +6,4 @@ int f(void)
return 0; return 0;
} }
/* { dg-final { scan-tree-dump "<bb \[0-9\]> \\\[local count: 10000\\\]:" "optimized" } } */ /* { dg-final { scan-tree-dump "<bb \[0-9\]> \\\[local count: " "optimized" } } */
...@@ -120,7 +120,7 @@ enum STATES FMS( u8 **in , u32 *transitions) { ...@@ -120,7 +120,7 @@ enum STATES FMS( u8 **in , u32 *transitions) {
profile estimation stage. But the number of inconsistencies should not profile estimation stage. But the number of inconsistencies should not
increase much. */ increase much. */
/* { dg-final { scan-tree-dump "Jumps threaded: 1\[1-9\]" "thread1" } } */ /* { dg-final { scan-tree-dump "Jumps threaded: 1\[1-9\]" "thread1" } } */
/* { dg-final { scan-tree-dump-times "Invalid sum" 2 "thread1" } } */ /* { dg-final { scan-tree-dump-times "Invalid sum" 3 "thread1" } } */
/* { dg-final { scan-tree-dump-not "not considered" "thread1" } } */ /* { dg-final { scan-tree-dump-not "not considered" "thread1" } } */
/* { dg-final { scan-tree-dump-not "not considered" "thread2" } } */ /* { dg-final { scan-tree-dump-not "not considered" "thread2" } } */
/* { dg-final { scan-tree-dump-not "not considered" "thread3" } } */ /* { dg-final { scan-tree-dump-not "not considered" "thread3" } } */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment