Commit 08f1af2e by Jan Hubicka Committed by Jan Hubicka

pr21829.c: Simplify matching since we now optimize better.


	* gcc.dg/tree-ssa/pr21829.c: Simplify matching since
	we now optimize better.
	* gcc.dg/Wunreachable-8.c: Bogus warnings now come
	out at different places.
	* gcc.dg/vect/vect-92.c: Increase loop iteration count to prevent
	unroling.
	* gcc.dg/vect/vect-76.c: Likewise.
	* gcc.dg/vect/vect-70.c: Likewise.
	* gcc.dg/vect/vect-66.c: Likewise.
	* gcc.dg/vect/no-section-anchors-vect-66.c: Likewise.
	* gcc.dg/vect/slp-3.c: One of loops gets now fully unrolled.
	* tree-ssa-loop-ivcanon.c: Include target.h
	(struct loop_size): new structure.
	(constant_after_peeling): New predicate.
	(tree_estimate_loop_size): New function.
	(estimated_unrolled_size): Rewrite for new estimates.
	(try_unroll_loop_completely): Use new estimates.
	* Makefile.in (tree-ssa-loop-ivcanon.o): Add dependenc on target.h

From-SVN: r147395
parent a7d39bd3
2009-05-11 Jan Hubicka <jh@suse.cz>
* tree-ssa-loop-ivcanon.c: Include target.h
(struct loop_size): new structure.
(constant_after_peeling): New predicate.
(tree_estimate_loop_size): New function.
(estimated_unrolled_size): Rewrite for new estimates.
(try_unroll_loop_completely): Use new estimates.
* Makefile.in (tree-ssa-loop-ivcanon.o): Add dependenc on target.h
2009-05-11 Andrew Pinski <andrew_pinski@playstation.sony.com> 2009-05-11 Andrew Pinski <andrew_pinski@playstation.sony.com>
* config/spu/spu-c.c (spu_categorize_keyword): Update for recent * config/spu/spu-c.c (spu_categorize_keyword): Update for recent
......
...@@ -2268,7 +2268,7 @@ tree-ssa-loop-ivcanon.o : tree-ssa-loop-ivcanon.c $(TREE_FLOW_H) $(CONFIG_H) \ ...@@ -2268,7 +2268,7 @@ tree-ssa-loop-ivcanon.o : tree-ssa-loop-ivcanon.c $(TREE_FLOW_H) $(CONFIG_H) \
$(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) $(PARAMS_H) \ $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) $(PARAMS_H) \
$(TREE_INLINE_H) output.h $(DIAGNOSTIC_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \ $(TREE_INLINE_H) output.h $(DIAGNOSTIC_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
$(FLAGS_H) $(TREE_PASS_H) $(SCEV_H) $(BASIC_BLOCK_H) $(GGC_H) \ $(FLAGS_H) $(TREE_PASS_H) $(SCEV_H) $(BASIC_BLOCK_H) $(GGC_H) \
hard-reg-set.h tree-chrec.h hard-reg-set.h tree-chrec.h $(TARGET_H)
tree-ssa-loop-ch.o : tree-ssa-loop-ch.c $(TREE_FLOW_H) $(CONFIG_H) \ tree-ssa-loop-ch.o : tree-ssa-loop-ch.c $(TREE_FLOW_H) $(CONFIG_H) \
$(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) $(TREE_INLINE_H) \ $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) $(TREE_INLINE_H) \
output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \ output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
......
2009-05-11 Jan Hubicka <jh@suse.cz>
* gcc.dg/tree-ssa/pr21829.c: Simplify matching since
we now optimize better.
* gcc.dg/Wunreachable-8.c: Bogus warnings now come
out at different places.
* gcc.dg/vect/vect-92.c: Increase loop iteration count to prevent
unroling.
* gcc.dg/vect/vect-76.c: Likewise.
* gcc.dg/vect/vect-70.c: Likewise.
* gcc.dg/vect/vect-66.c: Likewise.
* gcc.dg/vect/no-section-anchors-vect-66.c: Likewise.
* gcc.dg/vect/slp-3.c: One of loops gets now fully unrolled.
2009-05-11 H.J. Lu <hongjiu.lu@intel.com> 2009-05-11 H.J. Lu <hongjiu.lu@intel.com>
PR middle-end/40080 PR middle-end/40080
......
...@@ -9,8 +9,8 @@ void bar (void) ...@@ -9,8 +9,8 @@ void bar (void)
{ {
int i; int i;
for (i = 0; i < 2; i++) for (i = 0; i < 2; i++) /* { dg-bogus "will never be executed" "" { xfail *-*-* } } */
if (! foo (a[i])) if (! foo (a[i])) /* { dg-bogus "will never be executed" "" { xfail *-*-* } } */
return; return;
baz (); /* { dg-bogus "will never be executed" } */ baz (); /* { dg-bogus "will never be executed" } */
......
...@@ -4,7 +4,7 @@ float Factorial(float X) ...@@ -4,7 +4,7 @@ float Factorial(float X)
{ {
float val = 1.0; float val = 1.0;
int k,j; int k,j;
for (k=1; k < 5; k++) for (k=1; k < 5; k++) /* { dg-bogus "will never be executed" "" { xfail *-*-* } } */
{ {
val += 1.0; /* { dg-bogus "will never be executed" "" { xfail *-*-* } } */ val += 1.0; /* { dg-bogus "will never be executed" "" { xfail *-*-* } } */
} }
......
/* { dg-do compile } */ /* { dg-do compile } */
/* { dg-options "-O2 -fdump-tree-optimized -fdump-tree-cddce2" } */ /* { dg-options "-O2 -fdump-tree-optimized" } */
int test(int v) int test(int v)
{ {
...@@ -16,33 +16,7 @@ int test(int v) ...@@ -16,33 +16,7 @@ int test(int v)
return x; return x;
} }
/* This should be optimized to /* This should be unrolled and optimized into conditional set of return value "v < 0". */
if (v <= 0) goto <L1>; else goto <L3>; /* { dg-final { scan-tree-dump-not "if \\(" "optimized" } } */
<L1>:;
# x_1 = PHI <0(3), 1(1)>;
<L3>:;
return x_1;
retaining only a single conditional. This doesn't work as nobody
combines the two tests
if (v < 0) goto <bb 4>; else goto <bb 3>;
<bb 3>:
if (v <= 0) goto <bb 4>; else goto <bb 5>;
this late in the game. tree-ssa-ifcombine.c would do it if we would
unroll the loop during early loop unrolling though.
For now vrp2 does all the needed folding and threading and cddce2
provides a nice IL to scan. */
/* { dg-final { scan-tree-dump-times "if " 1 "optimized" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times "if " 2 "cddce2" } } */
/* { dg-final { scan-tree-dump "x_. = PHI <0\\\(.\\\), 1\\\(.\\\)>" "cddce2" } } */
/* { dg-final { cleanup-tree-dump "cddce2" } } */
/* { dg-final { cleanup-tree-dump "optimized" } } */ /* { dg-final { cleanup-tree-dump "optimized" } } */
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
#include <stdarg.h> #include <stdarg.h>
#include "tree-vect.h" #include "tree-vect.h"
#define N 8 #define N 16
int ia[8][5][N+2]; int ia[8][5][N+2];
int ic[16][16][5][N+2]; int ic[16][16][5][N+2];
......
...@@ -142,7 +142,8 @@ int main (void) ...@@ -142,7 +142,8 @@ int main (void)
return 0; return 0;
} }
/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail vect_no_align } } } */ /* One of the loops gets complettely unrolled. */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" { xfail vect_no_align } } } */ /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { xfail vect_no_align } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { xfail vect_no_align } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
#include <stdarg.h> #include <stdarg.h>
#include "tree-vect.h" #include "tree-vect.h"
#define N 8 #define N 16
__attribute__ ((noinline)) __attribute__ ((noinline))
void main1 () void main1 ()
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
#include <stdarg.h> #include <stdarg.h>
#include "tree-vect.h" #include "tree-vect.h"
#define N 12 #define N 24
struct s{ struct s{
int m; int m;
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
#include <stdarg.h> #include <stdarg.h>
#include "tree-vect.h" #include "tree-vect.h"
#define N 12 #define N 24
#define OFF 4 #define OFF 4
/* Check handling of accesses for which the "initial condition" - /* Check handling of accesses for which the "initial condition" -
......
...@@ -22,13 +22,13 @@ main1 () ...@@ -22,13 +22,13 @@ main1 ()
{ {
int i; int i;
for (i = 0; i < 5; i++) for (i = 0; i < 10; i++)
{ {
pa[i+1] = pb[i+1] * pc[i+1]; pa[i+1] = pb[i+1] * pc[i+1];
} }
/* check results: */ /* check results: */
for (i = 0; i < 5; i++) for (i = 0; i < 10; i++)
{ {
if (pa[i+1] != (pb[i+1] * pc[i+1])) if (pa[i+1] != (pb[i+1] * pc[i+1]))
abort (); abort ();
...@@ -42,13 +42,13 @@ main2 () ...@@ -42,13 +42,13 @@ main2 ()
{ {
int i; int i;
for (i = 0; i < 6; i++) for (i = 0; i < 12; i++)
{ {
pa[i+1] = pb[i+1] * pc[i+1]; pa[i+1] = pb[i+1] * pc[i+1];
} }
/* check results: */ /* check results: */
for (i = 0; i < 6; i++) for (i = 0; i < 12; i++)
{ {
if (pa[i+1] != (pb[i+1] * pc[i+1])) if (pa[i+1] != (pb[i+1] * pc[i+1]))
abort (); abort ();
......
...@@ -53,6 +53,7 @@ along with GCC; see the file COPYING3. If not see ...@@ -53,6 +53,7 @@ along with GCC; see the file COPYING3. If not see
#include "params.h" #include "params.h"
#include "flags.h" #include "flags.h"
#include "tree-inline.h" #include "tree-inline.h"
#include "target.h"
/* Specifies types of loops that may be unrolled. */ /* Specifies types of loops that may be unrolled. */
...@@ -118,7 +119,7 @@ tree_num_loop_insns (struct loop *loop, eni_weights *weights) ...@@ -118,7 +119,7 @@ tree_num_loop_insns (struct loop *loop, eni_weights *weights)
{ {
basic_block *body = get_loop_body (loop); basic_block *body = get_loop_body (loop);
gimple_stmt_iterator gsi; gimple_stmt_iterator gsi;
unsigned size = 1, i; unsigned size = 0, i;
for (i = 0; i < loop->num_nodes; i++) for (i = 0; i < loop->num_nodes; i++)
for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi)) for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
...@@ -128,28 +129,195 @@ tree_num_loop_insns (struct loop *loop, eni_weights *weights) ...@@ -128,28 +129,195 @@ tree_num_loop_insns (struct loop *loop, eni_weights *weights)
return size; return size;
} }
/* Estimate number of insns of completely unrolled loop. We assume /* Describe size of loop as detected by tree_estimate_loop_size. */
that the size of the unrolled loop is decreased in the struct loop_size
following way (the numbers of insns are based on what {
estimate_num_insns returns for appropriate statements): /* Number of instructions in the loop. */
int overall;
/* Number of instructions that will be likely optimized out in
peeled iterations of loop (i.e. computation based on induction
variable where induction variable starts at known constant.) */
int eliminated_by_peeling;
/* Same statistics for last iteration of loop: it is smaller because
instructions after exit are not executed. */
int last_iteration;
int last_iteration_eliminated_by_peeling;
};
/* Return true if OP in STMT will be constant after peeling LOOP. */
static bool
constant_after_peeling (tree op, gimple stmt, struct loop *loop)
{
affine_iv iv;
if (is_gimple_min_invariant (op))
return true;
/* We can still fold accesses to constant arrays when index is known. */
if (TREE_CODE (op) != SSA_NAME)
{
tree base = op;
/* First make fast look if we see constant array inside. */
while (handled_component_p (base))
base = TREE_OPERAND (base, 0);
if ((DECL_P (base)
&& TREE_STATIC (base)
&& TREE_READONLY (base)
&& (DECL_INITIAL (base)
|| (!DECL_EXTERNAL (base)
&& targetm.binds_local_p (base))))
|| CONSTANT_CLASS_P (base))
{
/* If so, see if we understand all the indices. */
base = op;
while (handled_component_p (base))
{
if (TREE_CODE (base) == ARRAY_REF
&& !constant_after_peeling (TREE_OPERAND (base, 1), stmt, loop))
return false;
base = TREE_OPERAND (base, 0);
}
return true;
}
return false;
}
/* Induction variables are constants. */
if (!simple_iv (loop, loop_containing_stmt (stmt), op, &iv, false))
return false;
if (!is_gimple_min_invariant (iv.base))
return false;
if (!is_gimple_min_invariant (iv.step))
return false;
return true;
}
/* Computes an estimated number of insns in LOOP, weighted by WEIGHTS.
Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT. */
static void
tree_estimate_loop_size (struct loop *loop, edge exit, struct loop_size *size)
{
basic_block *body = get_loop_body (loop);
gimple_stmt_iterator gsi;
unsigned int i;
bool after_exit;
size->overall = 0;
size->eliminated_by_peeling = 0;
size->last_iteration = 0;
size->last_iteration_eliminated_by_peeling = 0;
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "Estimating sizes for loop %i\n", loop->num);
for (i = 0; i < loop->num_nodes; i++)
{
if (exit && body[i] != exit->src
&& dominated_by_p (CDI_DOMINATORS, body[i], exit->src))
after_exit = true;
else
after_exit = false;
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, " BB: %i, after_exit: %i\n", body[i]->index, after_exit);
for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
{
gimple stmt = gsi_stmt (gsi);
int num = estimate_num_insns (stmt, &eni_size_weights);
bool likely_eliminated = false;
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, " size: %3i ", num);
print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, 0);
}
/* Look for reasons why we might optimize this stmt away. */
/* Exit conditional. */
if (body[i] == exit->src && stmt == last_stmt (exit->src))
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, " Exit condition will be eliminated.\n");
likely_eliminated = true;
}
/* Sets of IV variables */
else if (gimple_code (stmt) == GIMPLE_ASSIGN
&& constant_after_peeling (gimple_assign_lhs (stmt), stmt, loop))
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, " Induction variable computation will"
" be folded away.\n");
likely_eliminated = true;
}
/* Assignments of IV variables. */
else if (gimple_code (stmt) == GIMPLE_ASSIGN
&& TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
&& constant_after_peeling (gimple_assign_rhs1 (stmt), stmt,loop)
&& (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS
|| constant_after_peeling (gimple_assign_rhs2 (stmt),
stmt, loop)))
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, " Constant expression will be folded away.\n");
likely_eliminated = true;
}
/* Conditionals. */
else if (gimple_code (stmt) == GIMPLE_COND
&& constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop)
&& constant_after_peeling (gimple_cond_rhs (stmt), stmt, loop))
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, " Constant conditional.\n");
likely_eliminated = true;
}
size->overall += num;
if (likely_eliminated)
size->eliminated_by_peeling += num;
if (!after_exit)
{
size->last_iteration += num;
if (likely_eliminated)
size->last_iteration_eliminated_by_peeling += num;
}
}
}
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "size: %i-%i, last_iteration: %i-%i\n", size->overall,
size->eliminated_by_peeling, size->last_iteration,
size->last_iteration_eliminated_by_peeling);
free (body);
}
1) exit condition gets removed (2 insns) /* Estimate number of insns of completely unrolled loop.
2) increment of the control variable gets removed (2 insns) It is (NUNROLL + 1) * size of loop body with taking into account
3) All remaining statements are likely to get simplified the fact that in last copy everything after exit conditional
due to constant propagation. Hard to estimate; just is dead and that some instructions will be eliminated after
as a heuristics we decrease the rest by 1/3. peeling.
NINSNS is the number of insns in the loop before unrolling. Loop body is likely going to simplify futher, this is difficult
NUNROLL is the number of times the loop is unrolled. */ to guess, we just decrease the result by 1/3. */
static unsigned HOST_WIDE_INT static unsigned HOST_WIDE_INT
estimated_unrolled_size (unsigned HOST_WIDE_INT ninsns, estimated_unrolled_size (struct loop_size *size,
unsigned HOST_WIDE_INT nunroll) unsigned HOST_WIDE_INT nunroll)
{ {
HOST_WIDE_INT unr_insns = 2 * ((HOST_WIDE_INT) ninsns - 4) / 3; HOST_WIDE_INT unr_insns = ((nunroll)
* (HOST_WIDE_INT) (size->overall
- size->eliminated_by_peeling));
if (!nunroll)
unr_insns = 0;
unr_insns += size->last_iteration - size->last_iteration_eliminated_by_peeling;
unr_insns = unr_insns * 2 / 3;
if (unr_insns <= 0) if (unr_insns <= 0)
unr_insns = 1; unr_insns = 1;
unr_insns *= (nunroll + 1);
return unr_insns; return unr_insns;
} }
...@@ -165,6 +333,7 @@ try_unroll_loop_completely (struct loop *loop, ...@@ -165,6 +333,7 @@ try_unroll_loop_completely (struct loop *loop,
{ {
unsigned HOST_WIDE_INT n_unroll, ninsns, max_unroll, unr_insns; unsigned HOST_WIDE_INT n_unroll, ninsns, max_unroll, unr_insns;
gimple cond; gimple cond;
struct loop_size size;
if (loop->inner) if (loop->inner)
return false; return false;
...@@ -182,9 +351,10 @@ try_unroll_loop_completely (struct loop *loop, ...@@ -182,9 +351,10 @@ try_unroll_loop_completely (struct loop *loop,
if (ul == UL_SINGLE_ITER) if (ul == UL_SINGLE_ITER)
return false; return false;
ninsns = tree_num_loop_insns (loop, &eni_size_weights); tree_estimate_loop_size (loop, exit, &size);
ninsns = size.overall;
unr_insns = estimated_unrolled_size (ninsns, n_unroll); unr_insns = estimated_unrolled_size (&size, n_unroll);
if (dump_file && (dump_flags & TDF_DETAILS)) if (dump_file && (dump_flags & TDF_DETAILS))
{ {
fprintf (dump_file, " Loop size: %d\n", (int) ninsns); fprintf (dump_file, " Loop size: %d\n", (int) ninsns);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment