Commit 174ec470 by Richard Biener Committed by Richard Biener

tree-loop-distribution.c (dot_rdg_1): Make graph prettier.

2013-09-12  Richard Biener  <rguenther@suse.de>

	* tree-loop-distribution.c (dot_rdg_1): Make graph prettier.
	(dot_rdg): Use popen instead of system in optional code.
	(remaining_stmts, upstream_mem_writes): Remove global bitmaps.
	(already_processed_vertex_p): Adjust.
	(has_anti_or_output_dependence, predecessor_has_mem_write,
	mark_nodes_having_upstream_mem_writes, has_upstream_mem_writes,
	rdg_flag_uses): Remove.
	(rdg_flag_vertex): Simplify.
	(rdg_flag_vertex_and_dependent): Rely on a correct RDG and
	remove recursion.
	(build_rdg_partition_for_component): Process the first vertex
	of a component only.
	(ldist_gen): Do not compute remaining_stmts or upstream_mem_writes.

	* gcc.dg/tree-ssa/ldist-4.c: Remove undefined behavior.  Adjust
	expected outcome and comment why that happens.

From-SVN: r202516
parent 38c89763
2013-09-12 Richard Biener <rguenther@suse.de>
* tree-loop-distribution.c (dot_rdg_1): Make graph prettier.
(dot_rdg): Use popen instead of system in optional code.
(remaining_stmts, upstream_mem_writes): Remove global bitmaps.
(already_processed_vertex_p): Adjust.
(has_anti_or_output_dependence, predecessor_has_mem_write,
mark_nodes_having_upstream_mem_writes, has_upstream_mem_writes,
rdg_flag_uses): Remove.
(rdg_flag_vertex): Simplify.
(rdg_flag_vertex_and_dependent): Rely on a correct RDG and
remove recursion.
(build_rdg_partition_for_component): Process the first vertex
of a component only.
(ldist_gen): Do not compute remaining_stmts or upstream_mem_writes.
2013-09-12 Alan Modra <amodra@gmail.com>
* config/rs6000/rs6000.c (toc_relative_expr_p): Use add_cint_operand.
......
2013-09-12 Richard Biener <rguenther@suse.de>
* gcc.dg/tree-ssa/ldist-4.c: Remove undefined behavior. Adjust
expected outcome and comment why that happens.
2013-09-11 Richard Biener <rguenther@suse.de>
PR middle-end/58377
......
......@@ -10,20 +10,18 @@ int loop1 (int k)
a[0] = k;
for (i = 1; i < 100; i ++)
{
for (j = 0; j < 100; j++)
for (j = 1; j < 100; j++)
{
a[j] = k * i;
b[i][j] = a[j-1] + k;
}
}
return b[100-1][0];
return b[100-1][1];
}
/* We used to distribute also innermost loops, but these could produce
too much code in the outer loop, degrading performance of scalar
code. So this test was XFAILed because the cost model of the stand
alone distribution pass has evolved. Now it passes. */
/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 0 "ldist" { target ilp32 } } } */
/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 1 "ldist" { target lp64 } } } */
/* The current cost model fuses the two partitions because they have
similar memory accesses. */
/* { dg-final { scan-tree-dump "similar memory accesses" "ldist" } } */
/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 0 "ldist" } } */
/* { dg-final { cleanup-tree-dump "ldist" } } */
......@@ -218,6 +218,9 @@ static void
dot_rdg_1 (FILE *file, struct graph *rdg)
{
int i;
pretty_printer buffer;
pp_needs_newline (&buffer) = false;
buffer.buffer->stream = file;
fprintf (file, "digraph RDG {\n");
......@@ -226,6 +229,11 @@ dot_rdg_1 (FILE *file, struct graph *rdg)
struct vertex *v = &(rdg->vertices[i]);
struct graph_edge *e;
fprintf (file, "%d [label=\"[%d] ", i, i);
pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM);
pp_flush (&buffer);
fprintf (file, "\"]\n");
/* Highlight reads from memory. */
if (RDG_MEM_READS_STMT (rdg, i))
fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
......@@ -268,16 +276,15 @@ dot_rdg_1 (FILE *file, struct graph *rdg)
DEBUG_FUNCTION void
dot_rdg (struct graph *rdg)
{
/* When debugging, enable the following code. This cannot be used
in production compilers because it calls "system". */
#if 0
FILE *file = fopen ("/tmp/rdg.dot", "w");
gcc_assert (file != NULL);
/* When debugging, you may want to enable the following code. */
#if 1
FILE *file = popen("dot -Tx11", "w");
if (!file)
return;
dot_rdg_1 (file, rdg);
fclose (file);
system ("dotty /tmp/rdg.dot &");
fflush (file);
close (fileno (file));
pclose (file);
#else
dot_rdg_1 (stderr, rdg);
#endif
......@@ -645,17 +652,6 @@ partition_has_writes (partition_t partition)
return partition->has_writes;
}
/* If bit I is not set, it means that this node represents an
operation that has already been performed, and that should not be
performed again. This is the subgraph of remaining important
computations that is passed to the DFS algorithm for avoiding to
include several times the same stores in different loops. */
static bitmap remaining_stmts;
/* A node of the RDG is marked in this bitmap when it has as a
predecessor a node that writes to memory. */
static bitmap upstream_mem_writes;
/* Returns true when DEF is an SSA_NAME defined in LOOP and used after
the LOOP. */
......@@ -1080,140 +1076,12 @@ rdg_cannot_recompute_vertex_p (struct graph *rdg, int v)
static inline bool
already_processed_vertex_p (bitmap processed, int v)
{
return (bitmap_bit_p (processed, v)
|| !bitmap_bit_p (remaining_stmts, v));
}
/* Returns NULL when there is no anti-dependence or output-dependence
among the successors of vertex V, otherwise returns the edge with the
dependency. */
static struct graph_edge *
has_anti_or_output_dependence (struct vertex *v)
{
struct graph_edge *e;
if (v->succ)
for (e = v->succ; e; e = e->succ_next)
if (RDGE_TYPE (e) == anti_dd
|| RDGE_TYPE (e) == output_dd)
return e;
return NULL;
}
/* Returns true when V has an anti-dependence edge among its successors. */
static bool
predecessor_has_mem_write (struct graph *rdg, struct vertex *v)
{
struct graph_edge *e;
if (v->pred)
for (e = v->pred; e; e = e->pred_next)
if (bitmap_bit_p (upstream_mem_writes, e->src)
/* Don't consider flow channels: a write to memory followed
by a read from memory. These channels allow the split of
the RDG in different partitions. */
&& !RDG_MEM_WRITE_STMT (rdg, e->src))
return true;
return false;
}
/* Initializes the upstream_mem_writes bitmap following the
information from RDG. */
static void
mark_nodes_having_upstream_mem_writes (struct graph *rdg)
{
int v, x;
bitmap seen = BITMAP_ALLOC (NULL);
for (v = rdg->n_vertices - 1; v >= 0; v--)
if (!bitmap_bit_p (seen, v))
{
unsigned i;
vec<int> nodes;
nodes.create (3);
graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
FOR_EACH_VEC_ELT (nodes, i, x)
{
if (!bitmap_set_bit (seen, x))
continue;
if (RDG_MEM_WRITE_STMT (rdg, x)
|| predecessor_has_mem_write (rdg, &(rdg->vertices[x]))
/* In anti dependences the read should occur before
the write, this is why both the read and the write
should be placed in the same partition. In output
dependences the writes order need to be preserved. */
|| has_anti_or_output_dependence (&(rdg->vertices[x])))
bitmap_set_bit (upstream_mem_writes, x);
}
nodes.release ();
}
}
/* Returns true when vertex u has a memory write node as a predecessor
in RDG. */
static bool
has_upstream_mem_writes (int u)
{
return bitmap_bit_p (upstream_mem_writes, u);
return bitmap_bit_p (processed, v);
}
static void rdg_flag_vertex_and_dependent (struct graph *, int, partition_t,
bitmap);
/* Flag the uses of U stopping following the information from
upstream_mem_writes. */
static void
rdg_flag_uses (struct graph *rdg, int u, partition_t partition,
bitmap processed)
{
struct vertex *x = &(rdg->vertices[u]);
gimple stmt = RDGV_STMT (x);
struct graph_edge *anti_dep = has_anti_or_output_dependence (x);
/* Keep in the same partition the destination of an antidependence,
because this is a store to the exact same location. Putting this
in another partition is bad for cache locality. */
if (anti_dep)
{
int v = anti_dep->dest;
if (!already_processed_vertex_p (processed, v))
rdg_flag_vertex_and_dependent (rdg, v, partition, processed);
}
if (is_gimple_assign (stmt) && has_upstream_mem_writes (u))
{
tree op0 = gimple_assign_lhs (stmt);
/* Scalar channels don't have enough space for transmitting data
between tasks, unless we add more storage by privatizing. */
if (is_gimple_reg (op0))
{
use_operand_p use_p;
imm_use_iterator iter;
FOR_EACH_IMM_USE_FAST (use_p, iter, op0)
{
int v = rdg_vertex_for_stmt (rdg, USE_STMT (use_p));
if (!already_processed_vertex_p (processed, v))
rdg_flag_vertex_and_dependent (rdg, v, partition, processed);
}
}
}
}
/* Flag V from RDG as part of PARTITION, and also flag its loop number
in LOOPS. */
......@@ -1229,10 +1097,7 @@ rdg_flag_vertex (struct graph *rdg, int v, partition_t partition)
bitmap_set_bit (partition->loops, loop->num);
if (rdg_cannot_recompute_vertex_p (rdg, v))
{
partition->has_writes = true;
bitmap_clear_bit (remaining_stmts, v);
}
partition->has_writes = true;
}
/* Flag in the bitmap PARTITION the vertex V and all its predecessors.
......@@ -1247,14 +1112,11 @@ rdg_flag_vertex_and_dependent (struct graph *rdg, int v, partition_t partition,
nodes.create (3);
int x;
bitmap_set_bit (processed, v);
rdg_flag_uses (rdg, v, partition, processed);
graphds_dfs (rdg, &v, 1, &nodes, false, remaining_stmts);
rdg_flag_vertex (rdg, v, partition);
graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
FOR_EACH_VEC_ELT (nodes, i, x)
if (!already_processed_vertex_p (processed, x))
rdg_flag_vertex_and_dependent (rdg, x, partition, processed);
if (bitmap_set_bit (processed, x))
rdg_flag_vertex (rdg, x, partition);
nodes.release ();
}
......@@ -1322,13 +1184,14 @@ rdg_flag_loop_exits (struct graph *rdg, partition_t partition,
static partition_t
build_rdg_partition_for_component (struct graph *rdg, rdgc c)
{
int i, v;
partition_t partition = partition_alloc (NULL, NULL);
bitmap processed = BITMAP_ALLOC (NULL);
FOR_EACH_VEC_ELT (c->vertices, i, v)
if (!already_processed_vertex_p (processed, v))
rdg_flag_vertex_and_dependent (rdg, v, partition, processed);
/* Flag the first vertex of the component and its dependent nodes.
Other members of the component are included in its dependencies.
??? What do we need components for again? To early merge initial
vertices that are in a SCC of the RDG? */
rdg_flag_vertex_and_dependent (rdg, c->vertices[0], partition, processed);
rdg_flag_loop_exits (rdg, partition, processed);
......@@ -1777,13 +1640,8 @@ ldist_gen (struct loop *loop, struct graph *rdg,
bitmap processed = BITMAP_ALLOC (NULL);
bool any_builtin;
remaining_stmts = BITMAP_ALLOC (NULL);
upstream_mem_writes = BITMAP_ALLOC (NULL);
for (i = 0; i < rdg->n_vertices; i++)
{
bitmap_set_bit (remaining_stmts, i);
/* Save in OTHER_STORES all the memory writes that are not in
STARTING_VERTICES. */
if (RDG_MEM_WRITE_STMT (rdg, i))
......@@ -1804,7 +1662,6 @@ ldist_gen (struct loop *loop, struct graph *rdg,
}
}
mark_nodes_having_upstream_mem_writes (rdg);
rdg_build_components (rdg, starting_vertices, &components);
rdg_build_partitions (rdg, components, &other_stores, &partitions,
processed);
......@@ -1929,9 +1786,6 @@ ldist_gen (struct loop *loop, struct graph *rdg,
ldist_done:
BITMAP_FREE (remaining_stmts);
BITMAP_FREE (upstream_mem_writes);
FOR_EACH_VEC_ELT (partitions, i, partition)
partition_free (partition);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment