Commit 5d3ebb71 by Jan Hubicka Committed by Jan Hubicka

re PR tree-optimization/79347 (vect_do_peeling is messing up profile)

	PR tree-ssa/79347
	* cfgloopmanip.c (lv_adjust_loop_entry_edge, loop_version): Add
	ELSE_PROB.
	* cfgloopmanip.h (loop_version): Update prototype.
	* modulo-sched.c (sms_schedule): Update call of loop_version.
	* tree-if-conv.c(version_loop_for_if_conversion): Likewise.
	* tree-parloops.c (gen_parallel_loop): Likewise.
	* tree-ssa-loop-manip.c (tree_transform_and_unroll_loop): Likewise.
	* tree-ssa-loop-split.c (split_loop): Likewise.
	* tree-ssa-loop-unswitch.c (tree_unswitch_loop): Likewise.
	* tree-vect-loop-manip.c (vect_loop_versioning): Likewise.

	* gcc.dg/tree-ssa/ifc-10.c: Match for profile mismatches.
	* gcc.dg/tree-ssa/ifc-11.c: Match for profile mismatches.
	* gcc.dg/tree-ssa/ifc-12.c: Match for profile mismatches.
	* gcc.dg/tree-ssa/ifc-20040816-1.c: Match for profile mismatches.
	* gcc.dg/tree-ssa/ifc-20040816-2.c: Match for profile mismatches.
	* gcc.dg/tree-ssa/ifc-5.c: Match for profile mismatches.
	* gcc.dg/tree-ssa/ifc-8.c: Match for profile mismatches.
	* gcc.dg/tree-ssa/ifc-9.c: Match for profile mismatches.
	* gcc.dg/tree-ssa/ifc-cd.c: Match for profile mismatches.
	* gcc.dg/tree-ssa/ifc-pr56541.c: Match for profile mismatches.
	* gcc.dg/tree-ssa/ifc-pr68583.c: Match for profile mismatches.
	* gcc.dg/tree-ssa/ifc-pr69489-1.c: Match for profile mismatches.
	* gcc.dg/tree-ssa/ifc-pr69489-2.c: Match for profile mismatches.

From-SVN: r245196
parent e6b07173
2017-02-05 Jan Hubicka <hubicka@ucw.cz>
PR tree-ssa/79347
* cfgloopmanip.c (lv_adjust_loop_entry_edge, loop_version): Add
ELSE_PROB.
* cfgloopmanip.h (loop_version): Update prototype.
* modulo-sched.c (sms_schedule): Update call of loop_version.
* tree-if-conv.c(version_loop_for_if_conversion): Likewise.
* tree-parloops.c (gen_parallel_loop): Likewise.
* tree-ssa-loop-manip.c (tree_transform_and_unroll_loop): Likewise.
* tree-ssa-loop-split.c (split_loop): Likewise.
* tree-ssa-loop-unswitch.c (tree_unswitch_loop): Likewise.
* tree-vect-loop-manip.c (vect_loop_versioning): Likewise.
2017-02-05 Martin Liska <mliska@suse.cz>
PR bootstrap/78985
......
......@@ -1645,11 +1645,14 @@ force_single_succ_latches (void)
|
+---------> [second_head]
THEN_PROB is the probability of then branch of the condition. */
THEN_PROB is the probability of then branch of the condition.
ELSE_PROB is the probability of else branch. Note that they may be both
REG_BR_PROB_BASE when condition is IFN_LOOP_VECTORIZED. */
static basic_block
lv_adjust_loop_entry_edge (basic_block first_head, basic_block second_head,
edge e, void *cond_expr, unsigned then_prob)
edge e, void *cond_expr, unsigned then_prob,
unsigned else_prob)
{
basic_block new_head = NULL;
edge e1;
......@@ -1668,7 +1671,7 @@ lv_adjust_loop_entry_edge (basic_block first_head, basic_block second_head,
e1 = make_edge (new_head, first_head,
current_ir_type () == IR_GIMPLE ? EDGE_TRUE_VALUE : 0);
e1->probability = then_prob;
e->probability = REG_BR_PROB_BASE - then_prob;
e->probability = else_prob;
e1->count = apply_probability (e->count, e1->probability);
e->count = apply_probability (e->count, e->probability);
......@@ -1701,7 +1704,8 @@ lv_adjust_loop_entry_edge (basic_block first_head, basic_block second_head,
struct loop *
loop_version (struct loop *loop,
void *cond_expr, basic_block *condition_bb,
unsigned then_prob, unsigned then_scale, unsigned else_scale,
unsigned then_prob, unsigned else_prob,
unsigned then_scale, unsigned else_scale,
bool place_after)
{
basic_block first_head, second_head;
......@@ -1732,7 +1736,7 @@ loop_version (struct loop *loop,
/* Split loop entry edge and insert new block with cond expr. */
cond_bb = lv_adjust_loop_entry_edge (first_head, second_head,
entry, cond_expr, then_prob);
entry, cond_expr, then_prob, else_prob);
if (condition_bb)
*condition_bb = cond_bb;
......
......@@ -58,6 +58,7 @@ basic_block create_preheader (struct loop *, int);
extern void create_preheaders (int);
extern void force_single_succ_latches (void);
struct loop * loop_version (struct loop *, void *,
basic_block *, unsigned, unsigned, unsigned, bool);
basic_block *, unsigned, unsigned,
unsigned, unsigned, bool);
#endif /* GCC_CFGLOOPMANIP_H */
......@@ -1713,7 +1713,8 @@ sms_schedule (void)
* REG_BR_PROB_BASE) / 100;
loop_version (loop, comp_rtx, &condition_bb,
prob, prob, REG_BR_PROB_BASE - prob,
prob, REG_BR_PROB_BASE - prob,
prob, REG_BR_PROB_BASE - prob,
true);
}
......
2017-02-05 Jan Hubicka <hubicka@ucw.cz>
PR tree-ssa/79347
* gcc.dg/tree-ssa/ifc-10.c: Match for profile mismatches.
* gcc.dg/tree-ssa/ifc-11.c: Match for profile mismatches.
* gcc.dg/tree-ssa/ifc-12.c: Match for profile mismatches.
* gcc.dg/tree-ssa/ifc-20040816-1.c: Match for profile mismatches.
* gcc.dg/tree-ssa/ifc-20040816-2.c: Match for profile mismatches.
* gcc.dg/tree-ssa/ifc-5.c: Match for profile mismatches.
* gcc.dg/tree-ssa/ifc-8.c: Match for profile mismatches.
* gcc.dg/tree-ssa/ifc-9.c: Match for profile mismatches.
* gcc.dg/tree-ssa/ifc-cd.c: Match for profile mismatches.
* gcc.dg/tree-ssa/ifc-pr56541.c: Match for profile mismatches.
* gcc.dg/tree-ssa/ifc-pr68583.c: Match for profile mismatches.
* gcc.dg/tree-ssa/ifc-pr69489-1.c: Match for profile mismatches.
* gcc.dg/tree-ssa/ifc-pr69489-2.c: Match for profile mismatches.
2017-02-05 Andre Vehreschild <vehre@gcc.gnu.org>
PR fortran/79344
......
/* { dg-do compile } */
/* { dg-options "-Ofast -fno-split-loops -fdump-tree-ifcvt-stats" } */
/* { dg-options "-Ofast -fno-split-loops -fdump-tree-ifcvt-stats-blocks-details" } */
/* { dg-require-visibility "" } */
int b[256] = {0}, y;
......@@ -20,3 +20,11 @@ int foo (int x, int n)
}
/* { dg-final { scan-tree-dump-times "Applying if-conversion" 1 "ifcvt" } } */
/* We insert into code
if (LOOP_VECTORIZED (...))
which is folded by vectorizer. Both outgoing edges must have probability
100% so the resulting profile match after folding. */
/* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */
/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 1 "ifcvt" } } */
/* { dg-do compile } */
/* { dg-options "-Ofast -fdump-tree-ifcvt-stats" } */
/* { dg-options "-Ofast -fdump-tree-ifcvt-stats-blocks-details" } */
/* { dg-require-visibility "" } */
int a[1024] = {0.0};
......@@ -18,3 +18,11 @@ int foo (float *x)
}
/* { dg-final { scan-tree-dump-times "Applying if-conversion" 1 "ifcvt" } } */
/* We insert into code
if (LOOP_VECTORIZED (...))
which is folded by vectorizer. Both outgoing edges must have probability
100% so the resulting profile match after folding. */
/* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */
/* Sum is wrong here, but not enough for error to be reported. */
/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 0 "ifcvt" } } */
/* { dg-do compile } */
/* { dg-options "-Ofast -fdump-tree-ifcvt-stats" } */
/* { dg-options "-Ofast -fdump-tree-ifcvt-stats-blocks-details" } */
/* { dg-require-visibility "" } */
struct st
......@@ -23,3 +23,12 @@ int foo (int x)
return 0;
}
/* { dg-final { scan-tree-dump-times "Applying if-conversion" 1 "ifcvt" } } */
/* We insert into code
if (LOOP_VECTORIZED (...))
which is folded by vectorizer. Both outgoing edges must have probability
100% so the resulting profile match after folding. */
/* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */
/* Sum is wrong here, but not enough for error to be reported. */
/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 0 "ifcvt" } } */
/* { dg-do compile } */
/* { dg-options "-c -O2 -ftree-vectorize -fdump-tree-ifcvt-stats" { target *-*-* } } */
/* { dg-options "-c -O2 -ftree-vectorize -fdump-tree-ifcvt-stats-blocks-details" { target *-*-* } } */
#include <stdarg.h>
......@@ -33,3 +33,10 @@ int main1 ()
/* { dg-final { scan-tree-dump-times "Applying if-conversion" 1 "ifcvt" } } */
/* We insert into code
if (LOOP_VECTORIZED (...))
which is folded by vectorizer. Both outgoing edges must have probability
100% so the resulting profile match after folding. */
/* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */
/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 1 "ifcvt" } } */
/* { dg-do compile } */
/* { dg-options "-c -O2 -ftree-vectorize -fdump-tree-ifcvt-stats" { target *-*-* } } */
/* { dg-options "-c -O2 -ftree-vectorize -fdump-tree-ifcvt-stats-blocks-details" { target *-*-* } } */
void foo(const int * __restrict__ zr_in,
const int * __restrict__ zi_in,
......@@ -37,3 +37,11 @@ void foo(const int * __restrict__ zr_in,
/* { dg-final { scan-tree-dump-times "Applying if-conversion" 1 "ifcvt" } } */
/* We insert into code
if (LOOP_VECTORIZED (...))
which is folded by vectorizer. Both outgoing edges must have probability
100% so the resulting profile match after folding. */
/* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */
/* Sum is wrong here, but not enough for error to be reported. */
/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 0 "ifcvt" } } */
/* { dg-do compile } */
/* { dg-options "-c -O2 -ftree-vectorize -fdump-tree-ifcvt-stats" { target *-*-* } } */
/* { dg-options "-c -O2 -ftree-vectorize -fdump-tree-ifcvt-stats-blocks-details" { target *-*-* } } */
void
dct_unquantize_h263_inter_c (short *block, int n, int qscale, int nCoeffs)
......@@ -21,3 +21,10 @@ dct_unquantize_h263_inter_c (short *block, int n, int qscale, int nCoeffs)
}
/* { dg-final { scan-tree-dump-times "Applying if-conversion" 1 "ifcvt" } } */
/* We insert into code
if (LOOP_VECTORIZED (...))
which is folded by vectorizer. Both outgoing edges must have probability
100% so the resulting profile match after folding. */
/* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */
/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 1 "ifcvt" } } */
/* { dg-do compile } */
/* { dg-options "-Ofast -fdump-tree-ifcvt-details -ftree-loop-if-convert-stores" } */
/* { dg-options "-Ofast -fdump-tree-ifcvt-details-blocks -ftree-loop-if-convert-stores" } */
/* { dg-require-visibility "" } */
#define LEN 4096
......@@ -16,3 +16,11 @@ void test ()
}
/* { dg-final { scan-tree-dump-times "Applying if-conversion" 1 "ifcvt" } } */
/* We insert into code
if (LOOP_VECTORIZED (...))
which is folded by vectorizer. Both outgoing edges must have probability
100% so the resulting profile match after folding. */
/* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */
/* Sum is wrong here, but not enough for error to be reported. */
/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 0 "ifcvt" } } */
/* { dg-do compile } */
/* { dg-options "-Ofast -fno-split-loops -fdump-tree-ifcvt-stats" } */
/* { dg-options "-Ofast -fno-split-loops -fdump-tree-ifcvt-stats-blocks-details" } */
/* { dg-require-visibility "" } */
extern int b[256], y;
......@@ -20,3 +20,10 @@ int foo (int x, int n)
}
/* { dg-final { scan-tree-dump-times "Applying if-conversion" 1 "ifcvt" } } */
/* We insert into code
if (LOOP_VECTORIZED (...))
which is folded by vectorizer. Both outgoing edges must have probability
100% so the resulting profile match after folding. */
/* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */
/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 1 "ifcvt" } } */
/* { dg-do compile } */
/* { dg-options "-O3 -fdump-tree-ifcvt-details -ftree-loop-if-convert-stores" } */
/* { dg-options "-O3 -fdump-tree-ifcvt-details-blocks-details -ftree-loop-if-convert-stores" } */
void foo (int *x1, int *x2, int *x3, int *x4, int *y)
{
......@@ -26,3 +26,11 @@ void foo (int *x1, int *x2, int *x3, int *x4, int *y)
}
/* { dg-final { scan-tree-dump-times "Use predicate of bb" 4 "ifcvt" } } */
/* We insert into code
if (LOOP_VECTORIZED (...))
which is folded by vectorizer. Both outgoing edges must have probability
100% so the resulting profile match after folding. */
/* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */
/* Sum is wrong here, but not enough for error to be reported. */
/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 0 "ifcvt" } } */
/* { dg-do compile } */
/* { dg-options "-O3 -fdump-tree-ifcvt-stats" } */
/* { dg-options "-O3 -fdump-tree-ifcvt-stats-blocks-details" } */
float a,b,c,d;
......@@ -23,3 +23,11 @@ void foo()
}
/* { dg-final { scan-tree-dump-times "Applying if-conversion" 1 "ifcvt" } } */
/* We insert into code
if (LOOP_VECTORIZED (...))
which is folded by vectorizer. Both outgoing edges must have probability
100% so the resulting profile match after folding. */
/* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */
/* Sum is wrong here, but not enough for error to be reported. */
/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 0 "ifcvt" } } */
/* { dg-do compile } */
/* { dg-options "-O3 -fdump-tree-ifcvt" } */
/* { dg-options "-O3 -fdump-tree-ifcvt-blocks-details" } */
void foo (long *a)
{
......@@ -21,3 +21,10 @@ void foo (long *a)
}
/* { dg-final { scan-tree-dump "Applying if-conversion" "ifcvt" } } */
/* We insert into code
if (LOOP_VECTORIZED (...))
which is folded by vectorizer. Both outgoing edges must have probability
100% so the resulting profile match after folding. */
/* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */
/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 1 "ifcvt" } } */
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-ifcvt-stats" { target *-*-* } } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-ifcvt-stats-blocks-details" { target *-*-* } } */
void foo (int a[], int b[])
{
......@@ -14,3 +14,11 @@ void foo (int a[], int b[])
}
/* { dg-final { scan-tree-dump-times "Applying if-conversion" 1 "ifcvt" } } */
/* We insert into code
if (LOOP_VECTORIZED (...))
which is folded by vectorizer. Both outgoing edges must have probability
100% so the resulting profile match after folding. */
/* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */
/* Sum is wrong here, but not enough for error to be reported. */
/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 0 "ifcvt" } } */
/* { dg-do compile } */
/* { dg-options "-S -O2 -ftree-vectorize -fdump-tree-ifcvt-stats" { target *-*-* } } */
/* { dg-options "-S -O2 -ftree-vectorize -fdump-tree-ifcvt-stats-blocks-details" { target *-*-* } } */
double
foo (const char *u, const char *v, long n)
......@@ -15,3 +15,10 @@ foo (const char *u, const char *v, long n)
}
/* { dg-final { scan-tree-dump-times "Applying if-conversion" 1 "ifcvt" } } */
/* We insert into code
if (LOOP_VECTORIZED (...))
which is folded by vectorizer. Both outgoing edges must have probability
100% so the resulting profile match after folding. */
/* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */
/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 1 "ifcvt" } } */
......@@ -2533,7 +2533,11 @@ combine_blocks (struct loop *loop)
will be if-converted, the new copy of the loop will not,
and the LOOP_VECTORIZED internal call will be guarding which
loop to execute. The vectorizer pass will fold this
internal call into either true or false. */
internal call into either true or false.
Note that this function intentionally invalidates profile. Both edges
out of LOOP_VECTORIZED must have 100% probability so the profile remains
consistent after the condition is folded in the vectorizer. */
static struct loop *
version_loop_for_if_conversion (struct loop *loop)
......@@ -2557,9 +2561,11 @@ version_loop_for_if_conversion (struct loop *loop)
saved_preds[i] = ifc_bbs[i]->aux;
initialize_original_copy_tables ();
/* At this point we invalidate porfile confistency until IFN_LOOP_VECTORIZED
is re-merged in the vectorizer. */
new_loop = loop_version (loop, cond, &cond_bb,
REG_BR_PROB_BASE, REG_BR_PROB_BASE,
REG_BR_PROB_BASE, true);
REG_BR_PROB_BASE, REG_BR_PROB_BASE, true);
free_original_copy_tables ();
for (unsigned i = 0; i < save_length; i++)
......
......@@ -2353,7 +2353,8 @@ gen_parallel_loop (struct loop *loop,
/* We assume that the loop usually iterates a lot. */
prob = 4 * REG_BR_PROB_BASE / 5;
loop_version (loop, many_iterations_cond, NULL,
prob, prob, REG_BR_PROB_BASE - prob, true);
prob, REG_BR_PROB_BASE - prob,
prob, REG_BR_PROB_BASE - prob, true);
update_ssa (TODO_update_ssa);
free_original_copy_tables ();
}
......
......@@ -1202,7 +1202,8 @@ tree_transform_and_unroll_loop (struct loop *loop, unsigned factor,
scale_rest = REG_BR_PROB_BASE;
new_loop = loop_version (loop, enter_main_cond, NULL,
prob_entry, scale_unrolled, scale_rest, true);
prob_entry, REG_BR_PROB_BASE - prob_entry,
scale_unrolled, scale_rest, true);
gcc_assert (new_loop != NULL);
update_ssa (TODO_update_ssa);
......
......@@ -562,7 +562,8 @@ split_loop (struct loop *loop1, struct tree_niter_desc *niter)
basic_block cond_bb;
struct loop *loop2 = loop_version (loop1, cond, &cond_bb,
REG_BR_PROB_BASE, REG_BR_PROB_BASE,
REG_BR_PROB_BASE, true);
REG_BR_PROB_BASE, REG_BR_PROB_BASE,
true);
gcc_assert (loop2);
update_ssa (TODO_update_ssa);
......
......@@ -493,7 +493,7 @@ tree_unswitch_loop (struct loop *loop,
extract_true_false_edges_from_block (unswitch_on, &edge_true, &edge_false);
prob_true = edge_true->probability;
return loop_version (loop, unshare_expr (cond),
NULL, prob_true, prob_true,
NULL, prob_true, REG_BR_PROB_BASE - prob_true, prob_true,
REG_BR_PROB_BASE - prob_true, false);
}
......
......@@ -2319,7 +2319,8 @@ vect_loop_versioning (loop_vec_info loop_vinfo,
/* We don't want to scale SCALAR_LOOP's frequencies, we need to
scale LOOP's frequencies instead. */
nloop = loop_version (scalar_loop, cond_expr, &condition_bb, prob,
nloop = loop_version (scalar_loop, cond_expr, &condition_bb,
prob, REG_BR_PROB_BASE - prob,
REG_BR_PROB_BASE, REG_BR_PROB_BASE - prob, true);
scale_loop_frequencies (loop, prob, REG_BR_PROB_BASE);
/* CONDITION_BB was created above SCALAR_LOOP's preheader,
......@@ -2348,7 +2349,8 @@ vect_loop_versioning (loop_vec_info loop_vinfo,
}
else
nloop = loop_version (loop, cond_expr, &condition_bb,
prob, prob, REG_BR_PROB_BASE - prob, true);
prob, REG_BR_PROB_BASE - prob,
prob, REG_BR_PROB_BASE - prob, true);
if (version_niter)
{
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment