Commit 447f3223 by Richard Biener Committed by Richard Biener

re PR tree-optimization/58626 (possible array wrong code bug)

2013-10-25  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/58626
	* tree-loop-distribution.c (enum rdg_dep_type): Remove
	anti_dd, output_dd and input_dd.
	(struct rdg_edge): Remove level and relation members.
	(RDGE_LEVEL, RDGE_RELATION): Remove.
	(dot_rdg_1): Adjust.
	(create_rdg_edge_for_ddr): Remove.
	(create_rdg_edges_for_scalar): Adjust.
	(create_edge_for_control_dependence): Likewise.
	(create_rdg_edges): Split into ...
	(create_rdg_flow_edges): ... this
	(create_rdg_cd_edges): ... and this.
	(free_rdg): Adjust.
	(build_rdg): Likewise, do not compute data dependences or
	add edges for them.
	(pg_add_dependence_edges): New function.
	(pgcmp): Likewise.
	(distribute_loop): First apply all non-dependence based
	partition mergings.  Then compute dependences between partitions
	and merge and order partitions according to them.

	* gcc.dg/torture/pr58626.c: New testcase.

From-SVN: r204062
parent 5970573c
2013-10-25 Richard Biener <rguenther@suse.de>
PR tree-optimization/58626
* tree-loop-distribution.c (enum rdg_dep_type): Remove
anti_dd, output_dd and input_dd.
(struct rdg_edge): Remove level and relation members.
(RDGE_LEVEL, RDGE_RELATION): Remove.
(dot_rdg_1): Adjust.
(create_rdg_edge_for_ddr): Remove.
(create_rdg_edges_for_scalar): Adjust.
(create_edge_for_control_dependence): Likewise.
(create_rdg_edges): Split into ...
(create_rdg_flow_edges): ... this
(create_rdg_cd_edges): ... and this.
(free_rdg): Adjust.
(build_rdg): Likewise, do not compute data dependences or
add edges for them.
(pg_add_dependence_edges): New function.
(pgcmp): Likewise.
(distribute_loop): First apply all non-dependence based
partition mergings. Then compute dependences between partitions
and merge and order partitions according to them.
2013-10-25 Eric Botcazou <ebotcazou@adacore.com> 2013-10-25 Eric Botcazou <ebotcazou@adacore.com>
PR rtl-optimization/58831 PR rtl-optimization/58831
2013-10-25 Richard Biener <rguenther@suse.de>
PR tree-optimization/58626
* gcc.dg/torture/pr58626.c: New testcase.
2013-10-25 Paolo Carlini <paolo.carlini@oracle.com> 2013-10-25 Paolo Carlini <paolo.carlini@oracle.com>
PR c++/54812 PR c++/54812
......
/* { dg-do run } */
extern void abort (void);
int a[8][6] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 };
int b;
int main(void)
{
for (b = 0; b <= 1; b++) {
a[1][3] = 0;
int c;
for (c = 0; c <= 1; c++) {
a[c + 1][b] = a[c + 2][b];
}
}
if (a[1][1] != 1)
abort ();
return 0;
}
...@@ -96,15 +96,6 @@ enum rdg_dep_type ...@@ -96,15 +96,6 @@ enum rdg_dep_type
/* Read After Write (RAW). */ /* Read After Write (RAW). */
flow_dd = 'f', flow_dd = 'f',
/* Write After Read (WAR). */
anti_dd = 'a',
/* Write After Write (WAW). */
output_dd = 'o',
/* Read After Read (RAR). */
input_dd = 'i',
/* Control dependence (execute conditional on). */ /* Control dependence (execute conditional on). */
control_dd = 'c' control_dd = 'c'
}; };
...@@ -115,19 +106,9 @@ typedef struct rdg_edge ...@@ -115,19 +106,9 @@ typedef struct rdg_edge
{ {
/* Type of the dependence. */ /* Type of the dependence. */
enum rdg_dep_type type; enum rdg_dep_type type;
/* Levels of the dependence: the depth of the loops that carry the
dependence. */
unsigned level;
/* Dependence relation between data dependences, NULL when one of
the vertices is a scalar. */
ddr_p relation;
} *rdg_edge_p; } *rdg_edge_p;
#define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
#define RDGE_LEVEL(E) ((struct rdg_edge *) ((E)->data))->level
#define RDGE_RELATION(E) ((struct rdg_edge *) ((E)->data))->relation
/* Dump vertex I in RDG to FILE. */ /* Dump vertex I in RDG to FILE. */
...@@ -215,23 +196,11 @@ dot_rdg_1 (FILE *file, struct graph *rdg) ...@@ -215,23 +196,11 @@ dot_rdg_1 (FILE *file, struct graph *rdg)
for (e = v->succ; e; e = e->succ_next) for (e = v->succ; e; e = e->succ_next)
switch (RDGE_TYPE (e)) switch (RDGE_TYPE (e))
{ {
case input_dd:
fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
break;
case output_dd:
fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
break;
case flow_dd: case flow_dd:
/* These are the most common dependences: don't print these. */ /* These are the most common dependences: don't print these. */
fprintf (file, "%d -> %d \n", i, e->dest); fprintf (file, "%d -> %d \n", i, e->dest);
break; break;
case anti_dd:
fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
break;
case control_dd: case control_dd:
fprintf (file, "%d -> %d [label=control] \n", i, e->dest); fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
break; break;
...@@ -273,52 +242,6 @@ rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple stmt) ...@@ -273,52 +242,6 @@ rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple stmt)
return index; return index;
} }
/* Creates an edge in RDG for each distance vector from DDR. The
order that we keep track of in the RDG is the order in which
statements have to be executed. */
static void
create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
{
struct graph_edge *e;
int va, vb;
data_reference_p dra = DDR_A (ddr);
data_reference_p drb = DDR_B (ddr);
unsigned level = ddr_dependence_level (ddr);
/* For non scalar dependences, when the dependence is REVERSED,
statement B has to be executed before statement A. */
if (level > 0
&& !DDR_REVERSED_P (ddr))
{
data_reference_p tmp = dra;
dra = drb;
drb = tmp;
}
va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
if (va < 0 || vb < 0)
return;
e = add_edge (rdg, va, vb);
e->data = XNEW (struct rdg_edge);
RDGE_LEVEL (e) = level;
RDGE_RELATION (e) = ddr;
/* Determines the type of the data dependence. */
if (DR_IS_READ (dra) && DR_IS_READ (drb))
RDGE_TYPE (e) = input_dd;
else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
RDGE_TYPE (e) = output_dd;
else if (DR_IS_WRITE (dra) && DR_IS_READ (drb))
RDGE_TYPE (e) = flow_dd;
else if (DR_IS_READ (dra) && DR_IS_WRITE (drb))
RDGE_TYPE (e) = anti_dd;
}
/* Creates dependence edges in RDG for all the uses of DEF. IDEF is /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
the index of DEF in RDG. */ the index of DEF in RDG. */
...@@ -339,7 +262,6 @@ create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef) ...@@ -339,7 +262,6 @@ create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
e = add_edge (rdg, idef, use); e = add_edge (rdg, idef, use);
e->data = XNEW (struct rdg_edge); e->data = XNEW (struct rdg_edge);
RDGE_TYPE (e) = flow_dd; RDGE_TYPE (e) = flow_dd;
RDGE_RELATION (e) = NULL;
} }
} }
...@@ -366,7 +288,6 @@ create_edge_for_control_dependence (struct graph *rdg, basic_block bb, ...@@ -366,7 +288,6 @@ create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
e = add_edge (rdg, c, v); e = add_edge (rdg, c, v);
e->data = XNEW (struct rdg_edge); e->data = XNEW (struct rdg_edge);
RDGE_TYPE (e) = control_dd; RDGE_TYPE (e) = control_dd;
RDGE_RELATION (e) = NULL;
} }
} }
} }
...@@ -374,38 +295,38 @@ create_edge_for_control_dependence (struct graph *rdg, basic_block bb, ...@@ -374,38 +295,38 @@ create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
/* Creates the edges of the reduced dependence graph RDG. */ /* Creates the edges of the reduced dependence graph RDG. */
static void static void
create_rdg_edges (struct graph *rdg, vec<ddr_p> ddrs, control_dependences *cd) create_rdg_flow_edges (struct graph *rdg)
{ {
int i; int i;
struct data_dependence_relation *ddr;
def_operand_p def_p; def_operand_p def_p;
ssa_op_iter iter; ssa_op_iter iter;
FOR_EACH_VEC_ELT (ddrs, i, ddr)
if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
create_rdg_edge_for_ddr (rdg, ddr);
else
free_dependence_relation (ddr);
for (i = 0; i < rdg->n_vertices; i++) for (i = 0; i < rdg->n_vertices; i++)
FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i), FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
iter, SSA_OP_DEF) iter, SSA_OP_DEF)
create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i); create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
}
if (cd) /* Creates the edges of the reduced dependence graph RDG. */
for (i = 0; i < rdg->n_vertices; i++)
{ static void
gimple stmt = RDG_STMT (rdg, i); create_rdg_cd_edges (struct graph *rdg, control_dependences *cd)
if (gimple_code (stmt) == GIMPLE_PHI) {
{ int i;
edge_iterator ei;
edge e; for (i = 0; i < rdg->n_vertices; i++)
FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds) {
gimple stmt = RDG_STMT (rdg, i);
if (gimple_code (stmt) == GIMPLE_PHI)
{
edge_iterator ei;
edge e;
FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
create_edge_for_control_dependence (rdg, e->src, i, cd); create_edge_for_control_dependence (rdg, e->src, i, cd);
} }
else else
create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd); create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
} }
} }
/* Build the vertices of the reduced dependence graph RDG. Return false /* Build the vertices of the reduced dependence graph RDG. Return false
...@@ -494,10 +415,7 @@ free_rdg (struct graph *rdg) ...@@ -494,10 +415,7 @@ free_rdg (struct graph *rdg)
struct graph_edge *e; struct graph_edge *e;
for (e = v->succ; e; e = e->succ_next) for (e = v->succ; e; e = e->succ_next)
{ free (e->data);
free_dependence_relation (RDGE_RELATION (e));
free (e->data);
}
if (v->data) if (v->data)
{ {
...@@ -520,7 +438,6 @@ build_rdg (vec<loop_p> loop_nest, control_dependences *cd) ...@@ -520,7 +438,6 @@ build_rdg (vec<loop_p> loop_nest, control_dependences *cd)
struct graph *rdg; struct graph *rdg;
vec<gimple> stmts; vec<gimple> stmts;
vec<data_reference_p> datarefs; vec<data_reference_p> datarefs;
vec<ddr_p> dependence_relations;
/* Create the RDG vertices from the stmts of the loop nest. */ /* Create the RDG vertices from the stmts of the loop nest. */
stmts.create (10); stmts.create (10);
...@@ -536,19 +453,10 @@ build_rdg (vec<loop_p> loop_nest, control_dependences *cd) ...@@ -536,19 +453,10 @@ build_rdg (vec<loop_p> loop_nest, control_dependences *cd)
} }
stmts.release (); stmts.release ();
/* Create the RDG edges from the data dependences in the loop nest. */ create_rdg_flow_edges (rdg);
dependence_relations.create (100); if (cd)
if (!compute_all_dependences (datarefs, &dependence_relations, loop_nest, create_rdg_cd_edges (rdg, cd);
false)
|| !known_dependences_p (dependence_relations))
{
free_dependence_relations (dependence_relations);
datarefs.release ();
free_rdg (rdg);
return NULL;
}
create_rdg_edges (rdg, dependence_relations, cd);
dependence_relations.release ();
datarefs.release (); datarefs.release ();
return rdg; return rdg;
...@@ -1405,6 +1313,70 @@ partition_contains_all_rw (struct graph *rdg, ...@@ -1405,6 +1313,70 @@ partition_contains_all_rw (struct graph *rdg,
return false; return false;
} }
/* Compute partition dependence created by the data references in DRS1
and DRS2 and modify and return DIR according to that. */
static int
pg_add_dependence_edges (struct graph *rdg, vec<loop_p> loops, int dir,
vec<data_reference_p> drs1,
vec<data_reference_p> drs2)
{
data_reference_p dr1, dr2;
/* dependence direction - 0 is no dependence, -1 is back,
1 is forth, 2 is both (we can stop then, merging will occur). */
for (int ii = 0; drs1.iterate (ii, &dr1); ++ii)
for (int jj = 0; drs2.iterate (jj, &dr2); ++jj)
{
int this_dir = 1;
ddr_p ddr;
/* Re-shuffle data-refs to be in dominator order. */
if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
> rdg_vertex_for_stmt (rdg, DR_STMT (dr2)))
{
data_reference_p tem = dr1;
dr1 = dr2;
dr2 = tem;
this_dir = -this_dir;
}
ddr = initialize_data_dependence_relation (dr1, dr2, loops);
compute_affine_dependence (ddr, loops[0]);
if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
this_dir = 2;
else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
{
if (DDR_REVERSED_P (ddr))
{
data_reference_p tem = dr1;
dr1 = dr2;
dr2 = tem;
this_dir = -this_dir;
}
/* Known dependences can still be unordered througout the
iteration space, see gcc.dg/tree-ssa/ldist-16.c. */
if (DDR_NUM_DIST_VECTS (ddr) == 0)
this_dir = 2;
}
else
this_dir = 0;
free_dependence_relation (ddr);
if (dir == 0)
dir = this_dir;
else if (dir != this_dir)
return 2;
}
return dir;
}
/* Compare postorder number of the partition graph vertices V1 and V2. */
static int
pgcmp (const void *v1_, const void *v2_)
{
const vertex *v1 = (const vertex *)v1_;
const vertex *v2 = (const vertex *)v2_;
return v2->post - v1->post;
}
/* Distributes the code from LOOP in such a way that producer /* Distributes the code from LOOP in such a way that producer
statements are placed before consumer statements. Tries to separate statements are placed before consumer statements. Tries to separate
...@@ -1421,6 +1393,8 @@ distribute_loop (struct loop *loop, vec<gimple> stmts, ...@@ -1421,6 +1393,8 @@ distribute_loop (struct loop *loop, vec<gimple> stmts,
partition_t partition; partition_t partition;
bool any_builtin; bool any_builtin;
int i, nbp; int i, nbp;
graph *pg = NULL;
int num_sccs = 1;
*nb_calls = 0; *nb_calls = 0;
loop_nest.create (3); loop_nest.create (3);
...@@ -1455,8 +1429,8 @@ distribute_loop (struct loop *loop, vec<gimple> stmts, ...@@ -1455,8 +1429,8 @@ distribute_loop (struct loop *loop, vec<gimple> stmts,
any_builtin |= partition_builtin_p (partition); any_builtin |= partition_builtin_p (partition);
} }
/* If we did not detect any builtin but are not asked to apply /* If we are only distributing patterns but did not detect any,
regular loop distribution simply bail out. */ simply bail out. */
if (!flag_tree_loop_distribution if (!flag_tree_loop_distribution
&& !any_builtin) && !any_builtin)
{ {
...@@ -1464,9 +1438,56 @@ distribute_loop (struct loop *loop, vec<gimple> stmts, ...@@ -1464,9 +1438,56 @@ distribute_loop (struct loop *loop, vec<gimple> stmts,
goto ldist_done; goto ldist_done;
} }
/* If we are only distributing patterns fuse all partitions that
were not classified as builtins. This also avoids chopping
a loop into pieces, separated by builtin calls. That is, we
only want no or a single loop body remaining. */
partition_t into;
if (!flag_tree_loop_distribution)
{
for (i = 0; partitions.iterate (i, &into); ++i)
if (!partition_builtin_p (into))
break;
for (++i; partitions.iterate (i, &partition); ++i)
if (!partition_builtin_p (partition))
{
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, "fusing non-builtin partitions\n");
dump_bitmap (dump_file, into->stmts);
dump_bitmap (dump_file, partition->stmts);
}
partition_merge_into (into, partition);
partitions.unordered_remove (i);
partition_free (partition);
i--;
}
}
/* Due to limitations in the transform phase we have to fuse all
reduction partitions into the last partition so the existing
loop will contain all loop-closed PHI nodes. */
for (i = 0; partitions.iterate (i, &into); ++i)
if (partition_reduction_p (into))
break;
for (i = i + 1; partitions.iterate (i, &partition); ++i)
if (partition_reduction_p (partition))
{
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, "fusing partitions\n");
dump_bitmap (dump_file, into->stmts);
dump_bitmap (dump_file, partition->stmts);
fprintf (dump_file, "because they have reductions\n");
}
partition_merge_into (into, partition);
partitions.unordered_remove (i);
partition_free (partition);
i--;
}
/* Apply our simple cost model - fuse partitions with similar /* Apply our simple cost model - fuse partitions with similar
memory accesses. */ memory accesses. */
partition_t into;
for (i = 0; partitions.iterate (i, &into); ++i) for (i = 0; partitions.iterate (i, &into); ++i)
{ {
if (partition_builtin_p (into)) if (partition_builtin_p (into))
...@@ -1486,61 +1507,119 @@ distribute_loop (struct loop *loop, vec<gimple> stmts, ...@@ -1486,61 +1507,119 @@ distribute_loop (struct loop *loop, vec<gimple> stmts,
"memory accesses\n"); "memory accesses\n");
} }
partition_merge_into (into, partition); partition_merge_into (into, partition);
partitions.ordered_remove (j); partitions.unordered_remove (j);
partition_free (partition); partition_free (partition);
j--; j--;
} }
} }
} }
/* If we are only distributing patterns fuse all partitions that /* Build the partition dependency graph. */
were not properly classified as builtins. */ if (partitions.length () > 1)
if (!flag_tree_loop_distribution)
{ {
partition_t into; pg = new_graph (partitions.length ());
/* Only fuse adjacent non-builtin partitions, see PR53616. struct pgdata {
??? Use dependence information to improve partition ordering. */ partition_t partition;
i = 0; vec<data_reference_p> writes;
do vec<data_reference_p> reads;
};
#define PGDATA(i) ((pgdata *)(pg->vertices[i].data))
for (i = 0; partitions.iterate (i, &partition); ++i)
{
vertex *v = &pg->vertices[i];
pgdata *data = new pgdata;
data_reference_p dr;
/* FIXME - leaks. */
v->data = data;
bitmap_iterator bi;
unsigned j;
data->partition = partition;
data->reads = vNULL;
data->writes = vNULL;
EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, j, bi)
for (int k = 0; RDG_DATAREFS (rdg, j).iterate (k, &dr); ++k)
if (DR_IS_READ (dr))
data->reads.safe_push (dr);
else
data->writes.safe_push (dr);
}
partition_t partition1, partition2;
for (i = 0; partitions.iterate (i, &partition1); ++i)
for (int j = i + 1; partitions.iterate (j, &partition2); ++j)
{
/* dependence direction - 0 is no dependence, -1 is back,
1 is forth, 2 is both (we can stop then, merging will occur). */
int dir = 0;
dir = pg_add_dependence_edges (rdg, loop_nest, dir,
PGDATA(i)->writes,
PGDATA(j)->reads);
if (dir != 2)
dir = pg_add_dependence_edges (rdg, loop_nest, dir,
PGDATA(i)->reads,
PGDATA(j)->writes);
if (dir != 2)
dir = pg_add_dependence_edges (rdg, loop_nest, dir,
PGDATA(i)->writes,
PGDATA(j)->writes);
if (dir == 1 || dir == 2)
add_edge (pg, i, j);
if (dir == -1 || dir == 2)
add_edge (pg, j, i);
}
/* Add edges to the reduction partition (if any) to force it last. */
unsigned j;
for (j = 0; partitions.iterate (j, &partition); ++j)
if (partition_reduction_p (partition))
break;
if (j < partitions.length ())
{ {
for (; partitions.iterate (i, &into); ++i) for (unsigned i = 0; partitions.iterate (i, &partition); ++i)
if (!partition_builtin_p (into)) if (i != j)
add_edge (pg, i, j);
}
/* Compute partitions we cannot separate and fuse them. */
num_sccs = graphds_scc (pg, NULL);
for (i = 0; i < num_sccs; ++i)
{
partition_t first;
int j;
for (j = 0; partitions.iterate (j, &first); ++j)
if (pg->vertices[j].component == i)
break; break;
for (++i; partitions.iterate (i, &partition); ++i) for (j = j + 1; partitions.iterate (j, &partition); ++j)
if (!partition_builtin_p (partition)) if (pg->vertices[j].component == i)
{ {
partition_merge_into (into, partition); if (dump_file && (dump_flags & TDF_DETAILS))
partitions.ordered_remove (i); {
fprintf (dump_file, "fusing partitions\n");
dump_bitmap (dump_file, first->stmts);
dump_bitmap (dump_file, partition->stmts);
fprintf (dump_file, "because they are in the same "
"dependence SCC\n");
}
partition_merge_into (first, partition);
partitions[j] = NULL;
partition_free (partition); partition_free (partition);
i--; PGDATA (j)->partition = NULL;
} }
else
break;
} }
while ((unsigned) i < partitions.length ());
}
/* Fuse all reduction partitions into the last. */ /* Now order the remaining nodes in postorder. */
if (partitions.length () > 1) qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp);
{ partitions.truncate (0);
partition_t into = partitions.last (); for (i = 0; i < pg->n_vertices; ++i)
for (i = partitions.length () - 2; i >= 0; --i)
{ {
partition_t what = partitions[i]; pgdata *data = PGDATA (i);
if (partition_reduction_p (what)) if (data->partition)
{ partitions.safe_push (data->partition);
if (dump_file && (dump_flags & TDF_DETAILS)) data->reads.release ();
{ data->writes.release ();
fprintf (dump_file, "fusing partitions\n"); delete data;
dump_bitmap (dump_file, into->stmts);
dump_bitmap (dump_file, what->stmts);
fprintf (dump_file, "because the latter has reductions\n");
}
partition_merge_into (into, what);
partitions.ordered_remove (i);
partition_free (what);
}
} }
gcc_assert (partitions.length () == (unsigned)num_sccs);
free_graph (pg);
} }
nbp = partitions.length (); nbp = partitions.length ();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment